From f1b961c044947c9d3fe679cac9832e143fee0ea0 Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Tue, 23 Nov 2021 15:24:14 +0100 Subject: [PATCH] Initial FlexOS release. Signed-off-by: Hugo Lefeuvre Signed-off-by: Sebastian Rauch Signed-off-by: Stefan Teodorescu Signed-off-by: Vlad-Andrei Badoiu --- Config.uk | 9 + Makefile | 14 + Makefile.uk | 30 + README.md | 516 ++++- README.unikraft.md | 116 ++ arch/x86/x86_64/include/uk/asm/mm.h | 194 ++ arch/x86/x86_64/include/uk/asm/mm_native.h | 85 + arch/x86/x86_64/include/uk/asm/mm_pv.h | 145 ++ arch/x86/x86_64/include/uk/asm/page.h | 117 ++ docker/flexos.dockerfile | 87 + docker/kraftcleanup | 9 + docker/kraftrc.default | 28 + flexos-support/0001-Myshmem.patch | 146 ++ include/uk/mem_layout.h | 154 ++ include/uk/plat/bootstrap.h | 3 +- include/uk/plat/memory.h | 6 +- include/uk/plat/mm.h | 353 ++++ include/uk/sections.h | 133 ++ lib/Makefile.uk | 5 + lib/cpio/Makefile.uk | 7 + lib/cpio/cpio.c | 240 +++ lib/cpio/exportsyms.uk | 1 + lib/cpio/include/uk/cpio.h | 57 + lib/devfs/device.c | 3 +- lib/flexos-core/Config.uk | 80 + lib/flexos-core/Makefile.uk | 48 + .../include/flexos/impl/bitfield.h | 151 ++ .../include/flexos/impl/intelpku-impl.h | 1808 +++++++++++++++++ .../include/flexos/impl/intelpku.h | 182 ++ .../include/flexos/impl/main_annotation.h | 53 + .../include/flexos/impl/typecheck.h | 88 + lib/flexos-core/include/flexos/impl/vmept.h | 705 +++++++ lib/flexos-core/include/flexos/isolation.h | 83 + lib/flexos-core/include/flexos/literals.h | 61 + lib/flexos-core/intelpku.c | 186 ++ lib/flexos-core/vmept.c | 339 ++++ lib/kasan/Config.uk | 14 + lib/kasan/Makefile.uk | 8 + lib/kasan/exportsyms.uk | 39 + lib/kasan/include/uk/kasan.h | 81 + lib/kasan/kasan.c | 333 +++ lib/kasan/kasan_internal.h | 112 + lib/posix-mmap/Config.uk | 4 + lib/posix-mmap/Makefile.uk | 8 + lib/posix-mmap/exportsyms.uk | 9 + lib/posix-mmap/include/sys/mman.h | 59 + lib/posix-mmap/mm.c | 234 +++ lib/ramfs/ramfs_vfsops.c | 3 +- lib/ramfs/ramfs_vnops.c | 43 +- lib/ubsan/Config.uk | 13 + lib/ubsan/Makefile.uk | 5 + lib/ubsan/exportsyms.uk | 21 + lib/ubsan/ubsan.c | 365 ++++ lib/uk9p/9pdev.c | 32 +- lib/uk9p/9preq.c | 31 +- lib/ukalloc/Makefile.uk | 2 + lib/ukalloc/alloc.c | 27 + lib/ukalloc/exportsyms.uk | 1 + lib/ukalloc/include/uk/alloc.h | 35 + lib/ukallocbbuddy/Makefile.uk | 2 + lib/ukblkdev/blkdev.c | 9 +- lib/ukboot/boot.c | 222 +- lib/ukboot/exportsyms.uk | 1 + lib/ukboot/weak_main.c | 6 +- lib/ukdebug/include/uk/print.h | 49 +- lib/uklock/include/uk/mutex.h | 49 +- lib/uklock/include/uk/semaphore.h | 67 +- lib/uklock/mutex.c | 3 +- lib/uklock/semaphore.c | 7 +- lib/uknetdev/include/uk/netdev_core.h | 5 +- lib/uknetdev/netdev.c | 32 +- lib/uksched/exportsyms.uk | 8 +- lib/uksched/include/uk/sched.h | 40 +- lib/uksched/include/uk/thread.h | 104 +- lib/uksched/include/uk/wait.h | 38 +- lib/uksched/include/uk/wait_types.h | 23 +- lib/uksched/sched.c | 299 ++- lib/uksched/thread.c | 225 +- lib/ukschedcoop/schedcoop.c | 25 +- lib/uksignal/include/uk/uk_signal.h | 20 +- lib/uksignal/uk_signal.c | 9 +- lib/uksp/Makefile.uk | 4 - lib/uksp/ssp.c | 5 +- lib/uktime/time.c | 41 + lib/vfscore/Config.uk | 12 + lib/vfscore/dentry.c | 63 +- lib/vfscore/extra.ld | 2 + lib/vfscore/file.c | 3 +- lib/vfscore/include/vfscore/file.h | 12 +- lib/vfscore/main.c | 79 +- lib/vfscore/mount.c | 43 +- lib/vfscore/pipe.c | 98 +- lib/vfscore/rootfs.c | 45 +- lib/vfscore/stdio.c | 15 +- lib/vfscore/syscalls.c | 17 +- lib/vfscore/vnode.c | 46 +- plat/Config.uk | 16 + plat/common/mm.c | 1024 ++++++++++ plat/common/sw_ctx.c | 19 +- plat/common/x86/traps.c | 47 + plat/drivers/virtio/virtio_net.c | 15 +- plat/kvm/Makefile.uk | 9 +- plat/kvm/arm/entry64.S | 2 +- plat/kvm/arm/setup.c | 2 +- plat/kvm/io.c | 28 +- plat/kvm/irq.c | 20 + plat/kvm/memory.c | 27 +- plat/kvm/x86/cpu_vectors_x86_64.S | 2 + plat/kvm/x86/link64.lds.S | 37 +- plat/kvm/x86/setup.c | 95 +- plat/kvm/x86/traps.c | 19 +- plat/linuxu/include/linuxu/setup.h | 11 +- plat/linuxu/include/linuxu/syscall-arm_32.h | 1 + plat/linuxu/include/linuxu/syscall-x86_64.h | 1 + plat/linuxu/include/linuxu/syscall.h | 15 + plat/linuxu/memory.c | 105 +- plat/xen/Config.uk | 1 + plat/xen/Makefile.uk | 4 + plat/xen/arm/setup.c | 2 +- plat/xen/drivers/9p/9pfront.c | 9 +- plat/xen/events.c | 9 + plat/xen/include/xen-arm/mm.h | 2 +- plat/xen/include/xen-x86/hypercall64.h | 4 +- plat/xen/include/xen-x86/mm.h | 43 +- plat/xen/memory.c | 2 +- plat/xen/x86/mm.c | 80 +- plat/xen/x86/setup.c | 26 +- plat/xen/xenbus/client.c | 3 +- plat/xen/xenbus/xs_comms.c | 17 +- plat/xen/xenbus/xs_watch.c | 3 +- support/build/Makefile.rules | 34 +- 131 files changed, 10567 insertions(+), 616 deletions(-) create mode 100644 README.unikraft.md create mode 100644 arch/x86/x86_64/include/uk/asm/mm.h create mode 100644 arch/x86/x86_64/include/uk/asm/mm_native.h create mode 100644 arch/x86/x86_64/include/uk/asm/mm_pv.h create mode 100644 arch/x86/x86_64/include/uk/asm/page.h create mode 100644 docker/flexos.dockerfile create mode 100755 docker/kraftcleanup create mode 100644 docker/kraftrc.default create mode 100644 flexos-support/0001-Myshmem.patch create mode 100644 include/uk/mem_layout.h create mode 100644 include/uk/plat/mm.h create mode 100644 include/uk/sections.h create mode 100644 lib/cpio/Makefile.uk create mode 100644 lib/cpio/cpio.c create mode 100644 lib/cpio/exportsyms.uk create mode 100644 lib/cpio/include/uk/cpio.h create mode 100644 lib/flexos-core/Config.uk create mode 100644 lib/flexos-core/Makefile.uk create mode 100644 lib/flexos-core/include/flexos/impl/bitfield.h create mode 100644 lib/flexos-core/include/flexos/impl/intelpku-impl.h create mode 100644 lib/flexos-core/include/flexos/impl/intelpku.h create mode 100644 lib/flexos-core/include/flexos/impl/main_annotation.h create mode 100644 lib/flexos-core/include/flexos/impl/typecheck.h create mode 100644 lib/flexos-core/include/flexos/impl/vmept.h create mode 100644 lib/flexos-core/include/flexos/isolation.h create mode 100644 lib/flexos-core/include/flexos/literals.h create mode 100644 lib/flexos-core/intelpku.c create mode 100644 lib/flexos-core/vmept.c create mode 100644 lib/kasan/Config.uk create mode 100644 lib/kasan/Makefile.uk create mode 100644 lib/kasan/exportsyms.uk create mode 100644 lib/kasan/include/uk/kasan.h create mode 100644 lib/kasan/kasan.c create mode 100644 lib/kasan/kasan_internal.h create mode 100644 lib/posix-mmap/Config.uk create mode 100644 lib/posix-mmap/Makefile.uk create mode 100644 lib/posix-mmap/exportsyms.uk create mode 100644 lib/posix-mmap/include/sys/mman.h create mode 100644 lib/posix-mmap/mm.c create mode 100644 lib/ubsan/Config.uk create mode 100644 lib/ubsan/Makefile.uk create mode 100644 lib/ubsan/exportsyms.uk create mode 100644 lib/ubsan/ubsan.c create mode 100644 plat/common/mm.c diff --git a/Config.uk b/Config.uk index 1d4143ecbf..7c33d098ff 100644 --- a/Config.uk +++ b/Config.uk @@ -168,6 +168,15 @@ config CROSS_COMPILE need to set this unless you want the configured build directory to select the cross-compiler automatically. + +config LLVM_TARGET_ARCH + string "Custom cross-compiler LLVM target (optional)" + help + Same as running 'make LLVM_TARGET_ARCH=target' but stored for + default make runs in this build directory. You don't + need to set this unless you want the configured build + directory to select the cross-compiler automatically. + This is used for LLVM based compilers such as clang or rustc. #config PARALLEL_JOBS # int "Number of jobs to run simultaneously (0 for auto)" # default "0" diff --git a/Makefile b/Makefile index b396b0bcba..2eb3301773 100644 --- a/Makefile +++ b/Makefile @@ -317,6 +317,8 @@ CXXINCLUDES := CXXINCLUDES-y := GOCFLAGS := GOCFLAGS-y := +RUSTCFLAGS := +RUSTCFLAGS-y := GOCINCLUDES := GOCINCLUDES-y := DBGFLAGS := @@ -513,6 +515,7 @@ ifeq ($(sub_make_exec), 1) ifeq ($(UK_HAVE_DOT_CONFIG),y) # Hide troublesome environment variables from sub processes unexport CONFIG_CROSS_COMPILE +unexport CONFIG_LLVM_TARGET_ARCH unexport CONFIG_COMPILER #unexport CC #unexport LD @@ -546,6 +549,10 @@ ifneq ("$(origin CROSS_COMPILE)","undefined") CONFIG_CROSS_COMPILE := $(CROSS_COMPILE:"%"=%) endif +ifneq ("$(origin LLVM_TARGET_ARCH)","undefined") +CONFIG_LLVM_TARGET_ARCH := $(LLVM_TARGET_ARCH:"%"=%) +endif + ifneq ("$(origin COMPILER)","undefined") CONFIG_COMPILER := $(COMPILER:"%"=%) else @@ -561,6 +568,13 @@ CC := $(CONFIG_CROSS_COMPILE)$(CONFIG_COMPILER) CPP := $(CC) CXX := $(CPP) GOC := $(CONFIG_CROSS_COMPILE)gccgo-7 +# We use rustc because the gcc frontend is experimental and missing features such +# as borrowing checking +ifneq ("$(origin LLVM_TARGET_ARCH)","undefined") +RUSTC := rustc --target=$(CONFIG_LLVM_TARGET_ARCH) +else +RUSTC := rustc +endif AS := $(CC) AR := $(CONFIG_CROSS_COMPILE)gcc-ar NM := $(CONFIG_CROSS_COMPILE)gcc-nm diff --git a/Makefile.uk b/Makefile.uk index b5520babee..8cf47242ce 100644 --- a/Makefile.uk +++ b/Makefile.uk @@ -7,6 +7,12 @@ COMPFLAGS += -nostdlib COMPFLAGS += -U __linux__ -U __FreeBSD__ -U __sun__ COMPFLAGS += -fno-omit-frame-pointer +# treat "attribute directive ignored" as error for FlexOS, this is indicative +# that a rewriting pass failed +COMPFLAGS += -Werror=attributes +# because of Coccinelle, a good number of __unused attributes have to be removed +# as a consequence we experience many spurious warnings. Disable them for now +COMPFLAGS += -Wno-unused-variable COMPFLAGS-$(call have_gcc) += -fno-tree-sra -fno-split-stack -nostdinc ifneq ($(HAVE_STACKPROTECTOR),y) @@ -21,6 +27,30 @@ CINCLUDES += -I$(CONFIG_UK_BASE)/include CXXINCLUDES += -I$(CONFIG_UK_BASE)/include GOCINCLUDES += -I$(CONFIG_UK_BASE)/include +RUSTCFLAGS-y += --emit=obj --crate-type=rlib --edition=2018 \ + -Cpanic=abort -Cembed-bitcode=n \ + -Zbinary_dep_depinfo=y -Zsymbol-mangling-version=v0 \ + -Cforce-unwind-tables=n -Ccodegen-units=1 \ + -Dunsafe_op_in_unsafe_fn -Drust_2018_idioms + + +RUSTCFLAGS-$(CONFIG_OPTIMIZE_NONE) += -Copt-level="0" +RUSTCFLAGS-$(CONFIG_OPTIMIZE_SIZE) += -Copt-level="s" +RUSTCFLAGS-$(CONFIG_OPTIMIZE_PERF) += -Copt-level="2" + +RUSTCFLAGS-$(CONFIG_DEBUG_SYMBOLS_LVL0) += -Cdebuginfo=0 +RUSTCFLAGS-$(CONFIG_DEBUG_SYMBOLS_LVL1) += -Cdebuginfo=1 +RUSTCFLAGS-$(CONFIG_DEBUG_SYMBOLS_LVL2) += -Cdebuginfo=2 +# NOTE: There is not level 3 in rustc +RUSTCFLAGS-$(CONFIG_DEBUG_SYMBOLS_LVL3) += -Cdebuginfo=2 + +# NOTE: rustc supports LTO only with clang +ifeq ($(call have_clang),y) +RUSTCFLAGS-$(CONFIG_OPTIMIZE_LTO) += -Clinker-plugin-lto +else +RUSTCFLAGS-y += -Cembed-bitcode=n -Clto=n +endif + LIBLDFLAGS += -nostdlib -Wl,-r -Wl,-d -Wl,--build-id=none -no-pie LIBLDFLAGS-$(call have_gcc) += -nostdinc diff --git a/README.md b/README.md index 4031c5579c..a663ae8840 100644 --- a/README.md +++ b/README.md @@ -1,116 +1,422 @@ -# [![Unikraft](http://unikraft.org/assets/imgs/unikraft-logo-small.png)][unikraft-website] +# FlexOS: Making OS Isolation Flexible + +Warning: this README is outdated. We will provide up-to-date documentation +as part of our ASPLOS'22 AE submission. + +## Installing from the Docker container + +Clone this repository and build the Docker container: + +``` +$ git clone git@github.com:project-flexos/unikraft.git +$ pushd unikraft/docker +$ docker build -f flexos.dockerfile --tag flexos-dev . +$ popd +``` + +Run the container as following: + +``` +$ docker run --privileged -ti flexos-dev bash +``` + +## Installing from source + +Should work on a recent enough Debian-based distro. + +Install a recent-enough Coccinelle: + +``` +$ git clone https://github.com/coccinelle/coccinelle +$ apt-get build-dep coccinelle +$ pushd coccinelle +$ git checkout 99ec612852a88fae85dfad863daafccf55b874ce +$ ./autogen +$ ./configure +$ make +$ sudo make install +$ popd +$ # workaround a Coccinelle bug, if this fails don't worry... +$ mkdir /usr/local/bin/lib +$ ln -s /usr/local/lib/coccinelle /usr/local/bin/lib/coccinelle +``` + +Install `kraft` for FlexOS: + +``` +$ git clone git@github.com:project-flexos/kraft.git +$ pushd kraft +$ pip3 install -e . +$ popd +``` + +Configure `kraft` for FlexOS: + +``` +$ cat > ~/.kraftrc<< EOF +[fetch] +prioritise_origin = true +mirrors = [ + "https://releases.unikraft.org/mirrors", +] + +[configure] +platform = "kvm" +architecture = "x86_64" + +[list] +origins = [ + "git@github.com:project-flexos/lib-newlib.git", + "git@github.com:project-flexos/lib-tlsf.git", + "git@github.com:project-flexos/app-flexos-example.git", + "git@github.com:project-flexos/app-flexos-microbenchmarks.git", + "git@github.com:project-flexos/lib-flexos-example.git", + "git@github.com:project-flexos/lib-flexos-microbenchmarks.git", + "git@github.com:project-flexos/lib-lwip.git", + "git@github.com:project-flexos/lib-nginx.git", + "git@github.com:project-flexos/app-nginx.git", + "git@github.com:project-flexos/lib-redis.git", + "git@github.com:project-flexos/app-redis.git", + "git@github.com:project-flexos/lib-iperf.git", + "git@github.com:project-flexos/app-iperf.git", + "git@github.com:project-flexos/unikraft.git", + "git@github.com:project-flexos/lib-pthread-embedded.git" +] +EOF +``` + +Setup the FlexOS build environment: + +``` +$ kraft list update +$ kraft -v list pull flexos-microbenchmarks@staging iperf@staging \ + newlib@staging tlsf@staging flexos-example@staging \ + lwip@staging redis@staging unikraft@staging \ + pthread-embedded@staging nginx@staging +$ cd ~/.unikraft/ && ls +apps archs libs plats unikraft +``` + +## Building FlexOS + +Applies to both Docker & installation from source. + +Configure Redis with two compartments: + +``` +$ cd ~/.unikraft/apps/redis +$ cat ./kraft.yaml +--- +specification: '0.6' +name: redis +unikraft: + version: staging + kconfig: + - CONFIG_LIBUK9P=y + - CONFIG_LIB9PFS=y + - CONFIG_LIBDEVFS=y + - CONFIG_LIBDEVFS_AUTOMOUNT=y + - CONFIG_LIBVFSCORE_AUTOMOUNT_ROOTFS=y + - CONFIG_LIBVFSCORE_ROOTFS_RAMFS=y + - CONFIG_LIBUKLIBPARAM=y + - CONFIG_LIBUKDEBUG=y + - CONFIG_LIBUKALLOC=y + - CONFIG_LIBUKSCHED=y + - CONFIG_LIBPOSIX_SYSINFO=y + - CONFIG_LIBPOSIX_LIBDL=y + - CONFIG_LIBFLEXOS=y +targets: + - architecture: x86_64 + platform: kvm +compartments: + - name: comp1 + mechanism: + driver: intel-pku + noisolstack: false + default: true + - name: comp2 + mechanism: + driver: intel-pku + noisolstack: false +libraries: + tlsf: + version: staging + kconfig: + - CONFIG_LIBTLSF=y + pthread-embedded: + version: staging + compartment: comp1 + newlib: + version: staging + kconfig: + - CONFIG_LIBNEWLIBC=y + - CONFIG_LIBNEWLIBC_WANT_IO_C99_FORMATS=y + - CONFIG_LIBNEWLIBC_LINUX_ERRNO_EXTENSIONS=y + compartment: comp1 + lwip: + version: staging + kconfig: + - CONFIG_LWIP_IPV6=y + compartment: comp2 + redis: + version: staging + kconfig: + - CONFIG_LIBREDIS_SERVER=y + - CONFIG_LIBREDIS_COMMON=y + - CONFIG_LIBREDIS_LIBREDIS_LUA=y + - CONFIG_LIBREDIS_SERVER_MAIN_FUNCTION=y + compartment: comp1 +volumes: {} +networks: {} +$ kraft configure +``` + +Now we have a fully set up system. We only have to build and run. The following commands are what you would run as part of your development workflow. + +Build Redis with two compartments: + +``` +$ make prepare && kraft -v build --compartmentalize '-j' +``` + +Run the freshly built image: + +``` +$ kraft run --initrd ./redis.cpio -M 1024 "" +``` + +## FlexOS `kraft.yaml` primer + +### Declaring Compartments + +Declare compartments in `compartments`: + +``` +compartments: + - name: comp1 + mechanism: + driver: intel-pku + noisolstack: false + default: true + - name: comp2 + mechanism: + driver: intel-pku + noisolstack: false +``` + +Each compartment has a `name` and a `mechanism`. + +Each mechanism has a driver (for Intel MPK/PKU it's `intel-pku`, for VM-based `vmept`, for simple function calls `fcalls`), and possibly a number of driver-specific options. Intel MPK/PKU, for instance, can isolate or share the stack (`noisolstack`). + +There is one `default` compartment. All libraries that have not been assigned a specific compartment will go into the default compartment. + +:warning: If you declare a compartment, make sure to actually use it! That is, it should either be default or used by a library (see below). + +### Assigned compartments to libraries + +Libraries can be assigned a compartment using `compartment`: + +``` +libraries: +[...] + lwip: + version: staging + kconfig: + - CONFIG_LWIP_IPV6=y + compartment: comp2 +``` + +The value has to match the name of a previously declared compartment. + +Note that internal libraries (the ones whose source live under `unikraft/`) require an additional `is_core: true` parameter: + +``` +libraries: +[...] + uksched: + is_core: true + compartment: comp2 + ukschedcoop: + is_core: true + compartment: comp2 +``` + +:warning: always put uksched and ukschedcoop together. + +## Development Workflow + +`kraft` rewrites the source code of microlibraries **in-place** to implement isolation primitives. If you change the isolation profile of the image (by editing `kraft.yaml`), make sure to thoroughly cleanup your setup. + +Here is a script that does it: + +``` +#!/bin/bash + +git checkout . +make properclean +# git checkout . && git clean -xdf in all repositories would be fine too +rm -rf ~/.kraftcache ~/.unikraft/libs ~/.unikraft/unikraft +kraft list update +kraft list pull flexos-microbenchmarks@staging iperf@staging newlib@staging \ + tlsf@staging flexos-example@staging lwip@staging redis@staging \ + unikraft@staging pthread-embedded@staging nginx@staging +``` -[![](https://img.shields.io/badge/version-v0.4%20(Rhea)-%23EC591A)][unikraft-latest] [![](https://img.shields.io/static/v1?label=license&message=BSD-3&color=%23385177)][unikraft-license] +I recommend putting this in `/usr/local/bin/kraftcleanup`. -***Unleash the Power of Unikernels!*** +The usual workflow is then: -![](http://unikraft.org/assets/imgs/monkey-business.gif) +``` +$ kraftcleanup # clean +$ make prepare && kraft -v build --compartmentalize '-j' # compile +$ kraft run [...] # run +``` -How Unikraft works +If you didn't change `kraft.yaml`, a simple `make -j` should do the trick. -Unikraft is an automated system for building specialized POSIX-compliant OSes known as [unikernels][unikernel-wikipedia]; these images are tailored to the needs of specific applications. Unikraft is based around the concept of small, modular libraries, each providing a part of the functionality commonly found in an operating system (e.g., memory allocation, scheduling, filesystem support, network stack, etc.). +:warning: Once again, if you do not run `kraftcleanup`, running `kraft build --compartmentalize` **will not** rewrite your code again because the rewriting is done in-place! -Unikraft supports multiple target platforms (e.g., Xen, KVM, and Linux userspace) so that it is possible to build multiple images, one for each platform, for a single application *without* requiring the application developer to do any additional, platform-specific work. In all, Unikraft is able to build specialized OSes and unikernels targeted at specific applications without requiring the time-consuming, expert work that is required today to build such images. +:warning: `kraftcleanup` will **erase any modification done in your build repositories**! Make sure to have separate repositories that you use for the actual development otherwise you might **loose data**! -## Typical Use Cases +## Backend-specific instructions -Unikraft is a new system for ultra-light virtualization of your services in the cloud or at the edge, as well as extremely efficient software stacks to run bare metal on embedded devices. Smaller, quicker, and way more efficient than conventional systems: +### Intel MPK/PKU -⚑ **Cold boot virtual machines in a fraction of a second** - While Linux-based systems might take tens of seconds to boot, Unikraft will be up in a blink. +:warning: You can build on any machine, but **running requires a CPU that supports MPK**! -πŸ“ˆ **Deploy significantly more instances per physical machine** - Don’t waste CPU cycles on unneeded functionality – focus on your users' needs. - -πŸ“‰ **Drastic reductions in memory consumption** - With all your applications and data strongly separated into ultra light-weight virtual machines, scaling becomes a breeze. - -πŸ›‘οΈ **Ready for mission critical deployments** - Focus your trust on a minimal set of required components, significantly reduce your service's attack surface, and minimize certification costs. - -🏎 **Outstanding performance** - Specializing the OS to meet your application's needs is the key to achieving superior performance, making you ready to drive your infrastructure to the peak. - -## Supported Architectures and Platforms - -Unikraft supports the construction of multiple architectures, platforms, and images. The following tables give an overview of the current support. - -### πŸ’‘ Architecture Support - -| Architecture | Status | -|----------------------|------------------------------------------------| -| x86 | [`x86_64`][arch-x86_64] | -| Arm | [`armv7`][arch-arm], [`aarch64`][arch-arm64] | -| RISC-V | βš™οΈ [Issue #60][i60] | - -### πŸ’» Platform Support - -| Platform | `x86_64` | `arm32` | `arm64` | -|--------------------------------|:-------------------:|:-------------------:|:-------------------:| -| [Linux Userspace][plat-linuxu] | βœ… | βœ… | βœ… | -| [Linux KVM][plat-kvm] | βœ… | ❌ | βœ… | -| [Xen Hypervisor][plat-xen] | βœ… | βš™οΈ [Issue #34][i34] | βš™οΈ [Issue #62][i62] | -| [Solo5][plat-solo5] | βœ… | ❌ | βš™οΈ [Issue #63][i63] | -| VMWare | βš™οΈ [Issue #3][i3] | - | - | -| Hyper-V | βš™οΈ [Issue #61][i61] | - | - | - - -### ☁️ IaaS Providers - -| Cloud Provider | Images | -|-------------------------|:-------------------------------------------------| -| Amazon Web Services | [AMI][plat-aws], [Firecracker][plat-firecracker] | -| Google Compute Platform | [GCP Image][plat-gcp] | -| Digital Ocean | [Droplet][plat-digitalocean] | - -## Getting Started - -The fastest way to get started configuring, building and deploying Unikraft unikernels is to use our companion tool, [**kraft**][kraft]. - -With kraft installed, you can download Unikraft components, configure your unikernel to your needs, build it and run it -- there's no need to be an expert! - -## Contributing - -Contributions are welcome! Please see our [Contributing Guide][unikraft-contributing] for more details. A good starting point is the list of [open projects][github-projects]. If one of these interests you or you are interested in finding out more information, please drop us a line via the [mailing list][mailing-list] or directly at . - -## Further Resources -For more information about Unikraft, including user and developer guides, please refer to the [`docs/guides`][unikraft-guides] directory or vist the our [documentation][unikraft-docs]. - -## License - -Unikraft is licensed under a BSD-3-Clause. For more information, please refer to [`COPYING.md`][unikraft-license]. - - -[unikraft-website]: http://unikraft.org -[unikraft-docs]: http://docs.unikraft.org -[unikraft-guides]: https://github.com/unikraft/unikraft/tree/staging/docs/guides -[unikraft-contributing]: https://github.com/unikraft/unikraft/blob/staging/CONTRIBUTING.md -[unikraft-ci]: http://ci.unikraft.org -[unikraft-license]: https://github.com/unikraft/unikraft/blob/staging/COPYING.md -[unikraft-latest]: https://github.com/unikraft/unikraft/tree/RELEASE-0.4 -[unikraft-gettingstarted]: http://www.unikraft.org/getting-started -[kraft]: https://github.com/unikraft/kraft/ -[github-issues]: https://github.com/unikraft/unikraft/issues -[github-projects]: https://github.com/unikraft/unikraft/labels/kind/project -[dockerhub-kraft]: https://hub.docker.com/r/unikraft/kraft -[mailing-list]: https://lists.xenproject.org/cgi-bin/mailman/listinfo/minios-devel -[unikernel-wikipedia]: https://en.wikipedia.org/wiki/Unikernel -[arch-x86_64]: https://github.com/unikraft/unikraft/tree/staging/arch/x86/x86_64 -[arch-arm]: https://github.com/unikraft/unikraft/tree/staging/arch/arm/arm -[arch-arm64]: https://github.com/unikraft/unikraft/tree/staging/arch/arm/arm64 -[plat-linuxu]: https://github.com/unikraft/unikraft/tree/staging/plat/linuxu -[plat-kvm]: https://github.com/unikraft/unikraft/tree/staging/plat/kvm -[plat-xen]: https://github.com/unikraft/unikraft/tree/staging/plat/xen -[plat-solo5]: https://github.com/unikraft/plat-solo5 -[plat-raspi]: https://github.com/unikraft/plat-raspi -[plat-gcp]: https://github.com/unikraft/plat-gcp -[plat-aws]: https://github.com/unikraft/plat-aws -[plat-digitalocean]: https://github.com/unikraft/plat-digitalocean -[plat-firecracker]: https://github.com/unikraft/plat-firecracker -[i3]: https://github.com/unikraft/unikraft/issues/3 -[i34]: https://github.com/unikraft/unikraft/issues/34 -[i60]: https://github.com/unikraft/unikraft/issues/60 -[i61]: https://github.com/unikraft/unikraft/issues/61 -[i62]: https://github.com/unikraft/unikraft/issues/62 -[i63]: https://github.com/unikraft/unikraft/issues/63 +Generally, if you don't have a Xeon Bronze/Silver/Gold/Platinum, you don't have MPK. You can try running the image, if your CPU doesn't support MPK, FlexOS will abort with an appropriate error message. +### VM/EPT + +For the shared memory on KVM, we use a simple shared memory device in QEMU. To add this new device to QEMU, it is necessary to compile it from source code and add a patch. More information on how to build QEMU for Linux can be found [here](https://wiki.qemu.org/Hosts/Linux). +Steps for building QEMU for x86_64 architecture, with support for the shared memory device: + +``` +$ git clone https://github.com/qemu/qemu.git +$ cd qemu +$ git apply 0001-Myshmem.patch +$ ./configure --target-list=x86_64-softmmu +$ vim build/build.ninja # add -lrt to LINK_ARGS for the target qemu-system-x86_64 +$ make -j8 +$ build/qemu-system-x86_64 -device myshmem,file=/test,size=0x1000,paddr=0x10000000 # example run to check if the device works +``` + +The above built QEMU binary should be used to run all Unikraft images that use the VM/EPT backend in FlexOS. You can find 0001-Myshmem.patch in this repository under `flexos-support`. + +Additional remark regarding the `LINK_ARGS` edit. The relevant line is the one that begins with `LINK_ARGS = `, following the the line that begins with `build qemu-system-x86_64`. + +To build the simple [example application](https://github.com/project-flexos/app-flexos-example), it has to be cloned twice. The two applications, in the `apps/` folder, could have, for example, the names `app-flexos-example` and `rpc-flexos-example`. + +Currently, to build any VM/EPT image, the lwip library has to be included in the build (there are wrappers written for functions from lwip, which yield undefined symbols otherwise). TODO: add compiler guards to wrappers to mitigate this issue. To include the lwip library to the build, add `$(UK_LIBS)/lwip` to the `Makefile` and adding `lwip` to the `kraft.yaml`, putting it in `comp1` (make sure to do this in both folders - app and rpc). Also, all internal libraries that have wrapper functions defined (for example, `ukswrand`) also have to be included in the build, in `make menuconfig`. + +Steps to build the VM/EPT FlexOS (beginning from `~/.unikraft/apps`): +``` +$ cd app-flexos-example +$ make menuconfig # select VM/EPT in Library Configuration -> flexos -> FlexOS backend and also the KVM platform in Platform Configuration +$ make fetch +$ kraft -v build --compartmentalize +$ cd ../rpc-flexos-example +$ make fetch +$ make menuconfig # select VM/EPT in Library Configuration -> flexos -> FlexOS backend + # AND select 'Build a library compartment (not main app)' +$ kraft -v build --compartmentalize +``` + +To run the application, the RPC server has to be run first and then the application (first you run `rpc-flexos-example` and then `app-flexos-example`). +Here is an example command for running the built compartments: +``` +/qemu/build/qemu-system-x86_64 -enable-kvm -nographic -device isa-debug-exit -gdb tcp::1237 \ + -device myshmem,file=/rpc,paddr=0x800000000,size=0x100000 \ + -device myshmem,file=/heap,paddr=0x4000000000,size=0x8000000 \ + -device myshmem,file=/data_shared,paddr=0x105000,size= \ + -kernel $KERNEL \ + -m 2G \ +``` + +Replace the path to QEMU with the path on your system. To find out the size of the data_shared section, run the following command on the built compartments: +``` +$ readelf -SW ~/.unikraft/apps/app-flexos-example/build/app-flexos-example_kvm-x86_64.dbg +[...] +Section Headers: + [Nr] Name Type Address Off Size ES Flg Lk Inf Al + [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 + [ 1] .multiboot_header PROGBITS 0000000000100000 001000 005000 00 WA 0 0 4096 + [ 2] .data_shared PROGBITS 0000000000105000 006000 003000 00 WA 0 0 32 + [ 3] .text PROGBITS 0000000000108000 009000 0e7604 00 WAX 0 0 1 +[...] +``` + +### Function Calls (Debugging Backend) + +The `fcalls` debugging backend will replace all gates with simple function calls. You can build it on any machine and run it on any machine. + +## Porting Tips & Tricks + +### Gate insertion with MPK (and other intra-AS isolation technologies) + +The first part of porting a library to run as isolated component is to insert gates. + +It can be hard to ensure that all function calls to the component's API are wrapped with gates if the component doesn't have much internal data; without internal data accesses, technologies such as MPK might not trigger a crash if a gate is missing. + +In order to ensure that *all* function calls to the components API trigger a crash if a gate is missing, we can leverage GCC's instrumentation `-finstrument-functions`. With this instrumentation, we can make sure that each function of the target library writes to a library local variable before executing. If a gate is missing, it will crash. + +For this you simply have to add the compiler flag to the relevant library, e.g., for uktime: + +``` +diff --git a/lib/uktime/Makefile.uk b/lib/uktime/Makefile.uk +index 8e05ed7..ca1f085 100644 +--- a/lib/uktime/Makefile.uk ++++ b/lib/uktime/Makefile.uk +@@ -12,6 +12,7 @@ LIBUKTIME_SRCS-y += $(LIBUKTIME_BASE)/musl-imported/src/__tm_to_secs.c + LIBUKTIME_SRCS-y += $(LIBUKTIME_BASE)/musl-imported/src/__year_to_secs.c + LIBUKTIME_SRCS-y += $(LIBUKTIME_BASE)/time.c + LIBUKTIME_SRCS-y += $(LIBUKTIME_BASE)/timer.c ++LIBUKTIME_CFLAGS-y += -finstrument-functions -finstrument-functions-exclude-function-list=__cyg_profile_func_enter,__cyg_profile_func_exit + + UK_PROVIDED_SYSCALLS-$(CONFIG_LIBUKTIME) += nanosleep-2 + UK_PROVIDED_SYSCALLS-$(CONFIG_LIBUKTIME) += clock_gettime-2 +``` + +Then define the two instrumentation functions: + +``` +diff --git a/lib/uktime/musl-imported/include/time.h b/lib/uktime/musl-imported/include/time.h +index 7cfcdba..18125a0 100644 +--- a/lib/uktime/musl-imported/include/time.h ++++ b/lib/uktime/musl-imported/include/time.h +@@ -7,6 +7,11 @@ extern "C" { + + #include + ++void __cyg_profile_func_enter (void *this_fn, ++ void *call_site); ++void __cyg_profile_func_exit (void *this_fn, ++ void *call_site); ++ + #define __NEED_size_t + #define __NEED_time_t + #define __NEED_clock_t +diff --git a/lib/uktime/time.c b/lib/uktime/time.c +index 9290635..3917f39 100644 +--- a/lib/uktime/time.c ++++ b/lib/uktime/time.c +@@ -51,6 +51,19 @@ + #endif + #include + ++volatile int uktime_local; ++ ++void __cyg_profile_func_enter (void *this_fn, ++ void *call_site) ++{ ++ uktime_local = 0; ++} ++void __cyg_profile_func_exit (void *this_fn, ++ void *call_site) ++{ ++ uktime_local = 1; ++} ++ +``` + +Then compile as usual. The image will crash if gates are missing. diff --git a/README.unikraft.md b/README.unikraft.md new file mode 100644 index 0000000000..4031c5579c --- /dev/null +++ b/README.unikraft.md @@ -0,0 +1,116 @@ +# [![Unikraft](http://unikraft.org/assets/imgs/unikraft-logo-small.png)][unikraft-website] + +[![](https://img.shields.io/badge/version-v0.4%20(Rhea)-%23EC591A)][unikraft-latest] [![](https://img.shields.io/static/v1?label=license&message=BSD-3&color=%23385177)][unikraft-license] + +***Unleash the Power of Unikernels!*** + +![](http://unikraft.org/assets/imgs/monkey-business.gif) + +How Unikraft works + +Unikraft is an automated system for building specialized POSIX-compliant OSes known as [unikernels][unikernel-wikipedia]; these images are tailored to the needs of specific applications. Unikraft is based around the concept of small, modular libraries, each providing a part of the functionality commonly found in an operating system (e.g., memory allocation, scheduling, filesystem support, network stack, etc.). + +Unikraft supports multiple target platforms (e.g., Xen, KVM, and Linux userspace) so that it is possible to build multiple images, one for each platform, for a single application *without* requiring the application developer to do any additional, platform-specific work. In all, Unikraft is able to build specialized OSes and unikernels targeted at specific applications without requiring the time-consuming, expert work that is required today to build such images. + +## Typical Use Cases + +Unikraft is a new system for ultra-light virtualization of your services in the cloud or at the edge, as well as extremely efficient software stacks to run bare metal on embedded devices. Smaller, quicker, and way more efficient than conventional systems: + +⚑ **Cold boot virtual machines in a fraction of a second** + While Linux-based systems might take tens of seconds to boot, Unikraft will be up in a blink. + +πŸ“ˆ **Deploy significantly more instances per physical machine** + Don’t waste CPU cycles on unneeded functionality – focus on your users' needs. + +πŸ“‰ **Drastic reductions in memory consumption** + With all your applications and data strongly separated into ultra light-weight virtual machines, scaling becomes a breeze. + +πŸ›‘οΈ **Ready for mission critical deployments** + Focus your trust on a minimal set of required components, significantly reduce your service's attack surface, and minimize certification costs. + +🏎 **Outstanding performance** + Specializing the OS to meet your application's needs is the key to achieving superior performance, making you ready to drive your infrastructure to the peak. + +## Supported Architectures and Platforms + +Unikraft supports the construction of multiple architectures, platforms, and images. The following tables give an overview of the current support. + +### πŸ’‘ Architecture Support + +| Architecture | Status | +|----------------------|------------------------------------------------| +| x86 | [`x86_64`][arch-x86_64] | +| Arm | [`armv7`][arch-arm], [`aarch64`][arch-arm64] | +| RISC-V | βš™οΈ [Issue #60][i60] | + +### πŸ’» Platform Support + +| Platform | `x86_64` | `arm32` | `arm64` | +|--------------------------------|:-------------------:|:-------------------:|:-------------------:| +| [Linux Userspace][plat-linuxu] | βœ… | βœ… | βœ… | +| [Linux KVM][plat-kvm] | βœ… | ❌ | βœ… | +| [Xen Hypervisor][plat-xen] | βœ… | βš™οΈ [Issue #34][i34] | βš™οΈ [Issue #62][i62] | +| [Solo5][plat-solo5] | βœ… | ❌ | βš™οΈ [Issue #63][i63] | +| VMWare | βš™οΈ [Issue #3][i3] | - | - | +| Hyper-V | βš™οΈ [Issue #61][i61] | - | - | + + +### ☁️ IaaS Providers + +| Cloud Provider | Images | +|-------------------------|:-------------------------------------------------| +| Amazon Web Services | [AMI][plat-aws], [Firecracker][plat-firecracker] | +| Google Compute Platform | [GCP Image][plat-gcp] | +| Digital Ocean | [Droplet][plat-digitalocean] | + +## Getting Started + +The fastest way to get started configuring, building and deploying Unikraft unikernels is to use our companion tool, [**kraft**][kraft]. + +With kraft installed, you can download Unikraft components, configure your unikernel to your needs, build it and run it -- there's no need to be an expert! + +## Contributing + +Contributions are welcome! Please see our [Contributing Guide][unikraft-contributing] for more details. A good starting point is the list of [open projects][github-projects]. If one of these interests you or you are interested in finding out more information, please drop us a line via the [mailing list][mailing-list] or directly at . + +## Further Resources +For more information about Unikraft, including user and developer guides, please refer to the [`docs/guides`][unikraft-guides] directory or vist the our [documentation][unikraft-docs]. + +## License + +Unikraft is licensed under a BSD-3-Clause. For more information, please refer to [`COPYING.md`][unikraft-license]. + + +[unikraft-website]: http://unikraft.org +[unikraft-docs]: http://docs.unikraft.org +[unikraft-guides]: https://github.com/unikraft/unikraft/tree/staging/docs/guides +[unikraft-contributing]: https://github.com/unikraft/unikraft/blob/staging/CONTRIBUTING.md +[unikraft-ci]: http://ci.unikraft.org +[unikraft-license]: https://github.com/unikraft/unikraft/blob/staging/COPYING.md +[unikraft-latest]: https://github.com/unikraft/unikraft/tree/RELEASE-0.4 +[unikraft-gettingstarted]: http://www.unikraft.org/getting-started +[kraft]: https://github.com/unikraft/kraft/ +[github-issues]: https://github.com/unikraft/unikraft/issues +[github-projects]: https://github.com/unikraft/unikraft/labels/kind/project +[dockerhub-kraft]: https://hub.docker.com/r/unikraft/kraft +[mailing-list]: https://lists.xenproject.org/cgi-bin/mailman/listinfo/minios-devel +[unikernel-wikipedia]: https://en.wikipedia.org/wiki/Unikernel +[arch-x86_64]: https://github.com/unikraft/unikraft/tree/staging/arch/x86/x86_64 +[arch-arm]: https://github.com/unikraft/unikraft/tree/staging/arch/arm/arm +[arch-arm64]: https://github.com/unikraft/unikraft/tree/staging/arch/arm/arm64 +[plat-linuxu]: https://github.com/unikraft/unikraft/tree/staging/plat/linuxu +[plat-kvm]: https://github.com/unikraft/unikraft/tree/staging/plat/kvm +[plat-xen]: https://github.com/unikraft/unikraft/tree/staging/plat/xen +[plat-solo5]: https://github.com/unikraft/plat-solo5 +[plat-raspi]: https://github.com/unikraft/plat-raspi +[plat-gcp]: https://github.com/unikraft/plat-gcp +[plat-aws]: https://github.com/unikraft/plat-aws +[plat-digitalocean]: https://github.com/unikraft/plat-digitalocean +[plat-firecracker]: https://github.com/unikraft/plat-firecracker +[i3]: https://github.com/unikraft/unikraft/issues/3 +[i34]: https://github.com/unikraft/unikraft/issues/34 +[i60]: https://github.com/unikraft/unikraft/issues/60 +[i61]: https://github.com/unikraft/unikraft/issues/61 +[i62]: https://github.com/unikraft/unikraft/issues/62 +[i63]: https://github.com/unikraft/unikraft/issues/63 + diff --git a/arch/x86/x86_64/include/uk/asm/mm.h b/arch/x86/x86_64/include/uk/asm/mm.h new file mode 100644 index 0000000000..f05c859262 --- /dev/null +++ b/arch/x86/x86_64/include/uk/asm/mm.h @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Stefan Teodorescu + * + * Copyright (c) 2021, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Some of these macros here were inspired from Xen code. + * For example, from "xen/include/asm-x86/x86_64/page.h" file. + */ + +#ifndef __UKARCH_X86_64_MM__ +#define __UKARCH_X86_64_MM__ + +#include +#include +#include +#include "page.h" + +#define PAGETABLE_LEVELS 4 + +#define L1_PAGETABLE_SHIFT 12 +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 +#define L4_PAGETABLE_SHIFT 39 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 512 +#define L4_PAGETABLE_ENTRIES 512 + +static unsigned long pagetable_entries[PAGETABLE_LEVELS] = { + L1_PAGETABLE_ENTRIES, + L2_PAGETABLE_ENTRIES, + L3_PAGETABLE_ENTRIES, + L4_PAGETABLE_ENTRIES, +}; + +static unsigned long pagetable_shifts[PAGETABLE_LEVELS] __used = { + L1_PAGETABLE_SHIFT, + L2_PAGETABLE_SHIFT, + L3_PAGETABLE_SHIFT, + L4_PAGETABLE_SHIFT, +}; + +#define L1_OFFSET(vaddr) \ + (((vaddr) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) +#define L2_OFFSET(vaddr) \ + (((vaddr) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) +#define L3_OFFSET(vaddr) \ + (((vaddr) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) +#define L4_OFFSET(vaddr) \ + (((vaddr) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) + +#define Lx_OFFSET(vaddr, lvl) \ + (((vaddr) >> pagetable_shifts[lvl - 1]) \ + & (pagetable_entries[lvl - 1] - 1)) + +#define L1_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_USER) +#define L1_PROT_RO (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER) +#define L2_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_USER) +#define L3_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_USER) +#define L4_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_USER) + +static unsigned long pagetable_protections[PAGETABLE_LEVELS] = { + L1_PROT, + L2_PROT, + L3_PROT, + L4_PROT, +}; + +/* This variable represents the offset between the virtual address of the page + * table memory area and the physical address of it. This offset changes at + * runtime between the booting phase and the running phase after that. + * + * While booting, the physical addresses and the virtual addresses are equal + * (either running with paging disabled or with a linear mapping), which means + * this variable has the value 0. + * + * After initializing the new set of page tables, these can be placed at any + * virtual address. The offset in this case is PAGETABLES_VIRT_OFFSET, defined + * in include/uk/mem_layout.h + * + * Functions of the page table API use this variable to be agnostic of whether + * they are used in the booting phase or afterwards. + * + * TODO: find if there is a better way to achieve this behavior + */ +extern unsigned long _virt_offset; + +/** + * Create a PTE (page table entry) that maps to a given physical address with + * given protections and for the given page table level. + * + * @param paddr: physical address where the PTE points to + * @param prot: protection flags (values defined in include/uk/plat/mm.h) + * (e.g. page readable, writeable, executable) + * @param level: the level of the page table where the PTE will be written to + * (if we create a PTE inside a level 2 page table, for example, it means + * that it points to a large page, and the large page flag is set + * accordingly) + * + * @return: PTE with flags set accordingly + */ +static inline unsigned long ukarch_pte_create(unsigned long paddr, + unsigned long prot, size_t level) +{ + unsigned long flags = 0; + + /* For level == 2 it is a large page and level == 3 huge page */ + if (level >= 2) + flags |= _PAGE_PSE; + + if (prot == PAGE_PROT_NONE) + flags |= _PAGE_ACCESSED | _PAGE_PROTNONE; + else + flags |= pagetable_protections[level - 1]; + + if (!(prot & PAGE_PROT_WRITE)) + flags &= ~_PAGE_RW; + + if (!(prot & PAGE_PROT_EXEC)) + flags |= _PAGE_NX; + +#if CONFIG_LIBFLEXOS_INTELPKU + /* set protection key */ + if ((prot & PAGE_PROT_PKEY0)) + flags |= _PAGE_PKEY0; + else + flags &= ~_PAGE_PKEY0; + if ((prot & PAGE_PROT_PKEY1)) + flags |= _PAGE_PKEY1; + else + flags &= ~_PAGE_PKEY1; + if ((prot & PAGE_PROT_PKEY2)) + flags |= _PAGE_PKEY2; + else + flags &= ~_PAGE_PKEY2; + if ((prot & PAGE_PROT_PKEY3)) + flags |= _PAGE_PKEY3; + else + flags &= ~_PAGE_PKEY3; +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + + return paddr | flags; +} + +static inline int _ukarch_pte_write_raw(unsigned long pt, size_t offset, + unsigned long val, size_t level) +{ + UK_ASSERT(level >= 1 && level <= PAGETABLE_LEVELS); + UK_ASSERT(PAGE_ALIGNED(pt)); + UK_ASSERT(offset < pagetable_entries[level - 1]); + + *((unsigned long *) pt + offset) = val; + + return 0; +} + +static inline unsigned long ukarch_pte_read(unsigned long pt, size_t offset, + size_t level) +{ + UK_ASSERT(level >= 1 && level <= PAGETABLE_LEVELS); + UK_ASSERT(PAGE_ALIGNED(pt)); + UK_ASSERT(offset < pagetable_entries[level - 1]); + + return *((unsigned long *) pt + offset); +} + +#endif /* __UKARCH_X86_64_MM__ */ diff --git a/arch/x86/x86_64/include/uk/asm/mm_native.h b/arch/x86/x86_64/include/uk/asm/mm_native.h new file mode 100644 index 0000000000..e9400110f8 --- /dev/null +++ b/arch/x86/x86_64/include/uk/asm/mm_native.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Stefan Teodorescu + * + * Copyright (c) 2021, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Some of these macros here were inspired from Xen code. + * For example, from "xen/include/asm-x86/x86_64/page.h" file. + */ + +#ifndef __UKARCH_X86_64_MM_NATIVE__ +#define __UKARCH_X86_64_MM_NATIVE__ + +#include "mm.h" +#include +#include +#include + +#include + +#define pt_pte_to_virt(pte) (PTE_REMOVE_FLAGS(pte) + _virt_offset) +#define pt_virt_to_mfn(vaddr) ((vaddr - _virt_offset) >> PAGE_SHIFT) +#define pfn_to_mfn(pfn) (pfn) + +#define pte_to_pfn(pte) (PTE_REMOVE_FLAGS(pte) >> PAGE_SHIFT) +#define pfn_to_mframe(pfn) (pfn << PAGE_SHIFT) +#define mframe_to_pframe(mframe) (mframe) + +static inline unsigned long ukarch_read_pt_base(void) +{ + unsigned long cr3; + + __asm__ __volatile__("movq %%cr3, %0" : "=r"(cr3)::); + + /* + * For consistency with Xen implementation, which returns a virtual + * address, this should return the same. + */ + return pt_pte_to_virt(cr3); +} + +static inline void ukarch_write_pt_base(unsigned long cr3) +{ + __asm__ __volatile__("movq %0, %%cr3" :: "r"(cr3) : ); +} + +static inline int ukarch_flush_tlb_entry(unsigned long vaddr) +{ + __asm__ __volatile__("invlpg (%0)" ::"r" (vaddr) : "memory"); + + return 0; +} + +static inline int ukarch_pte_write(unsigned long pt, size_t offset, + unsigned long val, size_t level) +{ + return _ukarch_pte_write_raw(pt, offset, val, level); +} + +#endif /* __UKARCH_X86_64_MM_NATIVE__ */ diff --git a/arch/x86/x86_64/include/uk/asm/mm_pv.h b/arch/x86/x86_64/include/uk/asm/mm_pv.h new file mode 100644 index 0000000000..2b735a2c32 --- /dev/null +++ b/arch/x86/x86_64/include/uk/asm/mm_pv.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Stefan Teodorescu + * + * Copyright (c) 2021, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Some of these macros here were inspired from Xen code. + * For example, from "xen/include/asm-x86/x86_64/page.h" file. + */ + +#ifndef __UKARCH_X86_64_MM_PV__ +#define __UKARCH_X86_64_MM_PV__ + +#include "mm.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern unsigned long *phys_to_machine_mapping; + +#define pte_to_pfn(pte) (mfn_to_pfn(pte_to_mfn(pte))) +#define pfn_to_mframe(pfn) (pfn_to_mfn(pfn) << PAGE_SHIFT) +#define mframe_to_pframe(mframe) (mfn_to_pfn(mframe >> PAGE_SHIFT) << PAGE_SHIFT) +#define pt_pte_to_virt(pte) ((pte_to_pfn(pte) << PAGE_SHIFT) + _virt_offset) +#define pt_virt_to_mfn(vaddr) (pfn_to_mfn((vaddr - _virt_offset) >> PAGE_SHIFT)) + +static inline unsigned long ukarch_read_pt_base(void) +{ + return HYPERVISOR_start_info->pt_base; +} + +static inline void ukarch_write_pt_base(unsigned long cr3) +{ + mmuext_op_t uops[1]; + int rc; + + uops[0].cmd = MMUEXT_UNPIN_TABLE; + uops[0].arg1.mfn = pfn_to_mfn(ukarch_read_pt_base() >> PAGE_SHIFT); + rc = HYPERVISOR_mmuext_op(uops, 1, NULL, DOMID_SELF); + if (rc < 0) { + uk_pr_err("Could not unpin old PT base:" + "mmuext_op failed with rc=%d\n", rc); + return; + } + + uops[0].cmd = MMUEXT_PIN_L4_TABLE; + uops[0].arg1.mfn = pfn_to_mfn(cr3 >> PAGE_SHIFT); + rc = HYPERVISOR_mmuext_op(uops, 1, NULL, DOMID_SELF); + if (rc < 0) { + uk_pr_err("Could not pin new PT base:" + "mmuext_op failed with rc=%d\n", rc); + return; + } + + uops[0].cmd = MMUEXT_NEW_BASEPTR; + uops[0].arg1.mfn = pfn_to_mfn(cr3 >> PAGE_SHIFT); + rc = HYPERVISOR_mmuext_op(uops, 1, NULL, DOMID_SELF); + if (rc < 0) { + uk_pr_err("Could not set new PT base:" + "mmuext_op failed with rc=%d\n", rc); + return; + } + + HYPERVISOR_start_info->pt_base = PAGETABLES_AREA_START; + // pt_base = PAGETABLES_AREA_START; +} + +static inline int ukarch_flush_tlb_entry(unsigned long vaddr) +{ + /* + * XXX(optimization): use HYPERVISOR_update_va_mapping for L1 to update + * and flush at the same time. + */ + mmuext_op_t uops[1]; + int rc; + + uops[0].cmd = MMUEXT_INVLPG_ALL; + uops[0].arg1.linear_addr = vaddr; + rc = HYPERVISOR_mmuext_op(uops, 1, NULL, DOMID_SELF); + if (rc < 0) { + uk_pr_err("Could not flush TLB entry for 0x%016lx:" + "mmuext_op failed with rc=%d\n", vaddr, rc); + return rc; + } + + return 0; +} + +static inline int ukarch_pte_write(unsigned long pt_vaddr, size_t offset, + unsigned long val, size_t level) +{ + mmu_update_t mmu_updates[1]; + int rc; + + UK_ASSERT(level >= 1 && level <= PAGETABLE_LEVELS); + UK_ASSERT(PAGE_ALIGNED(pt_vaddr)); + UK_ASSERT(offset < pagetable_entries[level - 1]); + + mmu_updates[0].ptr = (pt_virt_to_mfn(pt_vaddr) << PAGE_SHIFT) + + sizeof(unsigned long) * offset; + mmu_updates[0].val = val; + rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF); + if (rc < 0) { + uk_pr_err("Could not write PTE: mmu_update failed with rc=%d\n", + rc); + return rc; + } + + return 0; +} + +#endif /* __UKARCH_X86_64_MM_PV__ */ diff --git a/arch/x86/x86_64/include/uk/asm/page.h b/arch/x86/x86_64/include/uk/asm/page.h new file mode 100644 index 0000000000..6080c598ac --- /dev/null +++ b/arch/x86/x86_64/include/uk/asm/page.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Stefan Teodorescu + * + * Copyright (c) 2020, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * THIS HEADER MAY NOT BE EXTRACTED OR MODIFIED IN ANY WAY. + * + * Some of these macros here were inspired from Xen code. + * For example, from "xen/include/asm-x86/x86_64/page.h" file. + */ +#include + +/* TODO: fix duplicate definitions of these macros */ +#define PAGE_SIZE 0x1000UL +#define PAGE_SHIFT 12 +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +#define PAGE_LARGE_SIZE 0x200000UL +#define PAGE_LARGE_SHIFT 21 +#define PAGE_LARGE_MASK (~(PAGE_LARGE_SIZE - 1)) + +#define PADDR_BITS 44 +#define PADDR_MASK ((1UL << PADDR_BITS) - 1) + +#define _PAGE_PRESENT 0x001UL +#define _PAGE_RW 0x002UL +#define _PAGE_USER 0x004UL +#define _PAGE_PWT 0x008UL +#define _PAGE_PCD 0x010UL +#define _PAGE_ACCESSED 0x020UL +#define _PAGE_DIRTY 0x040UL +#define _PAGE_PAT 0x080UL +#define _PAGE_PSE 0x080UL +#define _PAGE_GLOBAL 0x100UL +#define _PAGE_NX (1UL << 63) +#define _PAGE_PROTNONE (1UL << 58) /* one of the user available bits */ + +#if CONFIG_LIBFLEXOS_INTELPKU +#define _PAGE_PKEY0 (1UL << 59) +#define _PAGE_PKEY1 (1UL << 60) +#define _PAGE_PKEY2 (1UL << 61) +#define _PAGE_PKEY3 (1UL << 62) +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + +/* + * If the user maps the page with PROT_NONE, the _PAGE_PRESENT bit is not set, + * but PAGE_PRESENT must return true, so no other page is mapped on top. + */ +#define PAGE_PRESENT(vaddr) ((vaddr) & (_PAGE_PRESENT | _PAGE_PROTNONE)) +#define PAGE_LARGE(vaddr) ((vaddr) & _PAGE_PSE) +#define PAGE_HUGE(vaddr) ((vaddr) & _PAGE_PSE) + +#define L1_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_USER) +#define L1_PROT_RO (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER) +#define L2_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_USER) +#define L3_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_USER) +#define L4_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_USER) + +/* round down to nearest page address */ +#define PAGE_ALIGN_DOWN(vaddr) ALIGN_DOWN(vaddr, PAGE_SIZE) +#define PAGE_LARGE_ALIGN_DOWN(vaddr) ALIGN_DOWN(vaddr, PAGE_LARGE_SIZE) + +/* round up to nearest page address */ +#define PAGE_ALIGN_UP(vaddr) ALIGN_UP(vaddr, PAGE_SIZE) +#define PAGE_LARGE_ALIGN_UP(vaddr) ALIGN_UP(vaddr, PAGE_LARGE_SIZE) + +#define PAGE_ALIGNED(vaddr) (!((vaddr) & (PAGE_SIZE - 1))) +#define PAGE_LARGE_ALIGNED(vaddr) (!((vaddr) & (PAGE_LARGE_SIZE - 1))) + +#define PTE_REMOVE_FLAGS(pte) (((pte) & PADDR_MASK) & PAGE_MASK) + +/* Definitions for the API */ +#define PAGE_PROT_NONE 0x0 +#define PAGE_PROT_READ 0x1 +#define PAGE_PROT_WRITE 0x2 +#define PAGE_PROT_EXEC 0x4 + +#if CONFIG_LIBFLEXOS_INTELPKU +/* Memory Protection Keys, 4 bits, up to 16 keys */ +#define PAGE_PROT_PKEY0 0x10 /* pkey bit 0 */ +#define PAGE_PROT_PKEY1 0x20 /* pkey bit 1 */ +#define PAGE_PROT_PKEY2 0x40 /* pkey bit 2 */ +#define PAGE_PROT_PKEY3 0x80 /* pkey bit 3 */ +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + +#define PAGE_FLAG_LARGE 0x1 + +#define PAGE_PADDR_ANY ((unsigned long) -1) + +#define PAGE_INVALID ((unsigned long) -1) +#define PAGE_NOT_MAPPED 0 diff --git a/docker/flexos.dockerfile b/docker/flexos.dockerfile new file mode 100644 index 0000000000..66d04b6455 --- /dev/null +++ b/docker/flexos.dockerfile @@ -0,0 +1,87 @@ +# You can easily build it with the following command: +# $ MY_KEY=$(cat ~/.ssh/id_rsa) +# $ docker build --build-arg SSH_KEY="$MY_KEY" -f flexos.dockerfile --tag flexos-dev . +# +# IMPORTANT NOTE: YOUR SSH KEY MUST NOT HAVE A PASSWORD +# if so, remove it with ssh-keygen -p +# +# Once built, remove all intermediate containers with: +# $ docker rmi -f $(docker images -q --filter label=stage=intermediate) +# +# and run with: +# $ docker run -ti -v $(dirname $SSH_AUTH_SOCK):$(dirname $SSH_AUTH_SOCK) -e SSH_AUTH_SOCK=$SSH_AUTH_SOCK flexos-dev bash + +# Choose and name our temporary image. +FROM debian:10 as intermediate +# Add metadata identifying these images as our build containers (this will be useful later!) +LABEL stage=intermediate + +# Take an SSH key as a build argument. +ARG SSH_KEY + +COPY kraftrc.default /root/.kraftrc + +RUN apt update +RUN apt install -y python3-pip git + +# 1. Create the SSH directory. +# 2. Populate the private key file. +# 3. Set the required permissions. +# 4. Add github to our list of known hosts for ssh. +RUN mkdir -p /root/.ssh/ && \ + echo "$SSH_KEY" > /root/.ssh/id_rsa && \ + chmod -R 600 /root/.ssh/ && \ + ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts + +WORKDIR /root + +RUN cat /root/.ssh/id_rsa +RUN git clone git@github.com:ukflexos/kraft.git + +WORKDIR /root/kraft + +RUN pip3 install -e . + +RUN kraft list update +RUN kraft -v list pull flexos-microbenchmarks@staging iperf@staging \ + newlib@staging tlsf@staging flexos-example@staging \ + lwip@staging redis@staging unikraft@staging \ + pthread-embedded@staging nginx@staging + +# Choose the base image for our final image +FROM debian:10 + +COPY kraftcleanup /usr/local/bin/kraftcleanup +COPY kraftrc.default /root/.kraftrc + +# Copy across the files from our `intermediate` container +COPY --from=intermediate /root/kraft /root/kraft +COPY --from=intermediate /root/.unikraft /root/.unikraft + +RUN echo "deb-src http://deb.debian.org/debian buster main contrib non-free" >> /etc/apt/sources.list +RUN echo "deb-src http://security.debian.org/ buster/updates main contrib non-free" >> /etc/apt/sources.list +RUN echo "deb-src http://deb.debian.org/debian/ buster-updates main contrib non-free" >> /etc/apt/sources.list +RUN apt update +RUN apt build-dep -y coccinelle +RUN apt install -y python3-pip git flex bison wget unzip + +WORKDIR /root/kraft + +RUN pip3 install -e . + +WORKDIR /root + +RUN git clone https://github.com/coccinelle/coccinelle + +WORKDIR /root/coccinelle + +RUN ./autogen +RUN ./configure +RUN make +RUN make install + +# fix a bug in Coccinelle +RUN mkdir /usr/local/bin/lib +RUN ln -s /usr/local/lib/coccinelle /usr/local/bin/lib/coccinelle + +WORKDIR /root/.unikraft diff --git a/docker/kraftcleanup b/docker/kraftcleanup new file mode 100755 index 0000000000..0f7b23b4d3 --- /dev/null +++ b/docker/kraftcleanup @@ -0,0 +1,9 @@ +#!/bin/bash + +make properclean +# git checkout . && git clean -xdf in all repositories would be fine too +rm -rf /root/.kraftcache /root/.unikraft/libs /root/.unikraft/unikraft +kraft list update +kraft list pull flexos-microbenchmarks@staging iperf@staging newlib@staging \ + tlsf@staging flexos-example@staging lwip@staging redis@staging \ + unikraft@staging pthread-embedded@staging nginx@staging diff --git a/docker/kraftrc.default b/docker/kraftrc.default new file mode 100644 index 0000000000..eccb3c6da8 --- /dev/null +++ b/docker/kraftrc.default @@ -0,0 +1,28 @@ +[fetch] +prioritise_origin = true +mirrors = [ + "https://releases.unikraft.org/mirrors", +] + +[configure] +platform = "kvm" +architecture = "x86_64" + +[list] +origins = [ + "git@github.com:ukflexos/lib-newlib.git", + "git@github.com:ukflexos/lib-tlsf.git", + "git@github.com:ukflexos/app-flexos-example.git", + "git@github.com:ukflexos/app-flexos-microbenchmarks.git", + "git@github.com:ukflexos/lib-flexos-example.git", + "git@github.com:ukflexos/lib-flexos-microbenchmarks.git", + "git@github.com:ukflexos/lib-lwip.git", + "git@github.com:ukflexos/lib-nginx.git", + "git@github.com:ukflexos/app-nginx.git", + "git@github.com:ukflexos/lib-redis.git", + "git@github.com:ukflexos/app-redis.git", + "git@github.com:ukflexos/lib-iperf.git", + "git@github.com:ukflexos/app-iperf.git", + "git@github.com:ukflexos/unikraft.git", + "git@github.com:ukflexos/lib-pthread-embedded.git" +] diff --git a/flexos-support/0001-Myshmem.patch b/flexos-support/0001-Myshmem.patch new file mode 100644 index 0000000000..d773278953 --- /dev/null +++ b/flexos-support/0001-Myshmem.patch @@ -0,0 +1,146 @@ +From 3d52c49ba35dc9c673b8cf43b8623a6874a1ff43 Mon Sep 17 00:00:00 2001 +From: Stefan Teodorescu +Date: Wed, 3 Mar 2021 09:48:43 +0200 +Subject: [PATCH] Myshmem + +--- + hw/misc/meson.build | 1 + + hw/misc/myshmem.c | 88 +++++++++++++++++++++++++++++++++++++++++++++ + softmmu/runstate.c | 1 + + softmmu/vl.c | 1 + + 4 files changed, 91 insertions(+) + create mode 100644 hw/misc/myshmem.c + +diff --git a/hw/misc/meson.build b/hw/misc/meson.build +index 6292839..34dcaa1 100644 +--- a/hw/misc/meson.build ++++ b/hw/misc/meson.build +@@ -39,6 +39,7 @@ softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_pm.c')) + subdir('macio') + + softmmu_ss.add(when: 'CONFIG_IVSHMEM_DEVICE', if_true: files('ivshmem.c')) ++softmmu_ss.add(files('myshmem.c')) + + softmmu_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-h3-ccu.c')) + specific_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-cpucfg.c')) +diff --git a/hw/misc/myshmem.c b/hw/misc/myshmem.c +new file mode 100644 +index 0000000..b990205 +--- /dev/null ++++ b/hw/misc/myshmem.c +@@ -0,0 +1,88 @@ ++#include "qemu/osdep.h" ++#include "qemu/units.h" ++#include "qapi/error.h" ++#include "qemu/cutils.h" ++#include "hw/qdev-properties.h" ++#include "hw/qdev-properties-system.h" ++#include "sysemu/kvm.h" ++#include "migration/blocker.h" ++#include "migration/vmstate.h" ++#include "qemu/error-report.h" ++#include "qemu/event_notifier.h" ++#include "qemu/module.h" ++#include "qom/object_interfaces.h" ++#include "chardev/char-fe.h" ++#include "sysemu/hostmem.h" ++#include "sysemu/qtest.h" ++#include "qapi/visitor.h" ++#include "exec/address-spaces.h" ++ ++#include ++#include /* For mode constants */ ++#include /* For O_* constants */ ++ ++#include "qom/object.h" ++ ++#define TYPE_MY_SHMEM "myshmem" ++ ++struct MyShmem { ++ DeviceState parent_obj; ++ ++ char *file; ++ uint64_t size; ++ uint64_t paddr; ++ MemoryRegion region; ++}; ++ ++struct MyShmemClass { ++ ++}; ++ ++typedef struct MyShmem MyShmem; ++typedef struct MyShmemClass MyShmemClass; ++ ++OBJECT_DEFINE_TYPE(MyShmem, ++ my_shmem, ++ MY_SHMEM, ++ DEVICE); ++ ++static Property myshmem_properties[] = { ++ DEFINE_PROP_STRING("file", MyShmem, file), ++ DEFINE_PROP_UINT64("size", MyShmem, size, 0), ++ DEFINE_PROP_UINT64("paddr", MyShmem, paddr, 0), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ ++static void my_shmem_realize(DeviceState *dev, Error **errp) ++{ ++ unsigned long shm_size; ++ int shm_fd; ++ void *shared_memory; ++ ++ MyShmem *myshmem = (MyShmem *) dev; ++ shm_size = myshmem->size; ++ shm_fd = shm_open(myshmem->file, O_CREAT | O_RDWR, 0666); ++ if (ftruncate(shm_fd, shm_size)) ++ printf("ftruncate failed\n"); ++ shared_memory = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); ++ ++ memory_region_init_ram_ptr(&myshmem->region, NULL, "vram", shm_size, shared_memory); ++ memory_region_add_subregion(get_system_memory(), myshmem->paddr, &myshmem->region); ++} ++ ++static void my_shmem_class_init(ObjectClass *oc, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(oc); ++ ++ device_class_set_props(dc, myshmem_properties); ++ dc->realize = my_shmem_realize; ++} ++ ++static void my_shmem_init(Object *obj) ++{ ++} ++ ++static void my_shmem_finalize(Object *obj) ++{ ++ printf("my shmem finalize called\n"); ++} +diff --git a/softmmu/runstate.c b/softmmu/runstate.c +index 2874417..3ab41a0 100644 +--- a/softmmu/runstate.c ++++ b/softmmu/runstate.c +@@ -713,6 +713,7 @@ static bool main_loop_should_exit(void) + return false; + } + ++extern void *shared_memory; + void qemu_main_loop(void) + { + #ifdef CONFIG_PROFILER +diff --git a/softmmu/vl.c b/softmmu/vl.c +index b219ce1..862aa9c 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -3448,6 +3448,7 @@ void qemu_init(int argc, char **argv, char **envp) + case QEMU_OPTION_nouserconfig: + /* Nothing to be parsed here. Especially, do not error out below. */ + break; ++ + default: + if (os_parse_cmd_args(popt->index, optarg)) { + error_report("Option not supported in this build"); +-- +2.30.2 + diff --git a/include/uk/mem_layout.h b/include/uk/mem_layout.h new file mode 100644 index 0000000000..a42bdebc1a --- /dev/null +++ b/include/uk/mem_layout.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Stefan Teodorescu + * + * Copyright (c) 2021, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __UK_MEM_LAYOUT__ +#define __UK_MEM_LAYOUT__ + +#include +#include + +/* These regions exist only for KVM and are mapped 1:1 */ +#ifdef CONFIG_PLAT_KVM +/* + * The VGA buffer is always mapped at this physical address (0xb8000) + * (https://wiki.osdev.org/Printing_To_Screen) and we map it at the same + * virtual address for convenience. + */ +#define VGABUFFER_AREA_START 0xb8000 +#define VGABUFFER_AREA_END 0xc0000 +#define VGABUFFER_AREA_SIZE (VGABUFFER_AREA_END - VGABUFFER_AREA_START) + +/* + * This area is reserved by QEMU for the multiboot info struct + * (https://github.com/qemu/qemu/blob/master/hw/i386/multiboot.c#L44) + * See more about this struct in in plat/kvm/include/kvm-x86/multiboot.h + */ +#define MBINFO_AREA_START 0x9000 +#define MBINFO_AREA_END 0xa000 +#define MBINFO_AREA_SIZE (MBINFO_AREA_END - MBINFO_AREA_START) +#endif /* CONFIG_PLAT_KVM */ + +#ifdef CONFIG_PARAVIRT +#define SHAREDINFO_PAGE 0x1000 +#endif /* CONFIG_PARAVIRT */ + +/* + * This is the area where the kernel binary is mapped, starting from 1MB in the + * virtual space. + * Here are the regions: Code + Data + BSS + Rodata etc. + * + * TODO: This has to be broken down further into the composing regions: + * Code - R-X + * Data - RW- + * Rodata - R-- + * etc. + */ +#define KERNEL_AREA_START (1UL << 20) /* 1MB */ +#define KERNEL_AREA_END PAGE_LARGE_ALIGN_UP(__END) +#define KERNEL_AREA_SIZE (KERNEL_AREA_END - KERNEL_AREA_START) + +/* + * The virtual memory area for storing the page tables. The size is hardcoded + * for now to 16MB. TODO: figure out a way to dynamically alloc this. + */ +#define BOOKKEEP_AREA_START (1UL << 32) /* 4GB */ +#define BOOKKEEP_AREA_END (BOOKKEEP_AREA_START + 0x1000000) /* 16MB */ +#define BOOKKEEP_AREA_SIZE (BOOKKEEP_AREA_END - BOOKKEEP_AREA_START) + +/* + * TODO this number has depends on how much memory we have + * On Xen it can't be too high because not much memory is initially mapped. + */ +#define PAGETABLES_AREA_START (BOOKKEEP_AREA_START + 0x200000) /* 2MB */ +#define PAGETABLES_AREA_END BOOKKEEP_AREA_END +#define PAGETABLES_AREA_SIZE (PAGETABLES_AREA_END - PAGETABLES_AREA_START) + +/* + * The virtual memory area where all stacks will be allocated from. It is now + * hardcoded for 256 stacks. These are not actually mapped from the beginning, + * they are allocated as they are needed. The number of available stacks + * directly gives the number of threads that can be created (one stack per + * thread). TODO: check if this number needs to be increased or dynamically + * calculated. + */ +#define STACK_COUNT 256 +#define STACK_AREA_END (1UL << 47) /* 128TB */ +#define STACK_AREA_START (STACK_AREA_END - STACK_COUNT * __STACK_SIZE) +#define STACK_AREA_SIZE (STACK_COUNT * __STACK_SIZE) + +/* + * This is a general use area that is reserved to be used when creating + * mappings with the internal API, for example by drivers that need to create + * mappings with page granularity for IO, or any other pages by the kernel. + */ +#define MAPPINGS_AREA_START (1UL << 33) /* 8GB */ +#define MAPPINGS_AREA_END (1UL << 34) /* 16GB */ +#define MAPPINGS_AREA_SIZE (MAPPINGS_AREA_END - MAPPINGS_AREA_START) + +/* + * Next are the heap and the mmap areas. + * + * The heap memory is the one managed by the memory allocator(s). + * + * If the POSIX mmap library is included, the chunk of virtual memory + * address space is divided between the heap and the mmap area, with 32TB for + * the heap and the rest ~96TB for mmap. When an mmap() call is done, the + * returned address is in this area (calls with MAP_FIXED outside of this area + * will fail). + * + * If POSIX mmap is not included, this chunk between the general use mappings + * area and the stack is reserved for the heap. + * + * Immediately after these areas is the heap, at the end of the virtual + * address space. + */ +#define HEAP_AREA_START MAPPINGS_AREA_END +#ifdef CONFIG_LIBPOSIX_MMAP +#define HEAP_AREA_END (1UL << 45) /* 32TB */ +#define HEAP_AREA_SIZE (HEAP_AREA_END - HEAP_AREA_START) + +#define MMAP_AREA_START HEAP_AREA_END +#define MMAP_AREA_END STACK_AREA_START +#define MMAP_AREA_SIZE (MMAP_AREA_END - MMAP_AREA_START) + +#else /* CONFIG_LIBPOSIX_MMAP */ +/* When we don't use mmap, heap is the rest of the memory */ +#define HEAP_AREA_END STACK_AREA_START +#define HEAP_AREA_SIZE (HEAP_AREA_END - HEAP_AREA_START) + +#define MMAP_AREA_START 0x0 +#define MMAP_AREA_END 0x0 +#define MMAP_AREA_SIZE 0x0 +#endif /* CONFIG_LIBPOSIX_MMAP */ + +#endif /* __UK_MEM_LAYOUT__ */ + diff --git a/include/uk/plat/bootstrap.h b/include/uk/plat/bootstrap.h index 6af18ca0e9..cd07e9d0ed 100644 --- a/include/uk/plat/bootstrap.h +++ b/include/uk/plat/bootstrap.h @@ -50,7 +50,8 @@ extern "C" { * @param argc Number of arguments * @param args Array to '\0'-terminated arguments */ -void ukplat_entry(int argc, char *argv[]) __noreturn; +void ukplat_entry(int argc, char *argv[]) __noreturn +__attribute__((section(".text_comp_exclusive"))); /** * Called by platform library during initialization diff --git a/include/uk/plat/memory.h b/include/uk/plat/memory.h index ac3a1896f6..8471a9280e 100644 --- a/include/uk/plat/memory.h +++ b/include/uk/plat/memory.h @@ -139,8 +139,10 @@ static inline int ukplat_memregion_find_next(int i, int sflags, * A return value < 0 means that there is no initrd module, * `mrd` may be filled out with undefined values. */ -#define ukplat_memregion_find_initrd0(mrd) \ - ukplat_memregion_find_next(-1, UKPLAT_MEMRF_INITRD, (mrd)) +static inline int ukplat_memregion_find_initrd0(struct ukplat_memregion_desc *mrd) +{ + return ukplat_memregion_find_next(-1, UKPLAT_MEMRF_INITRD, (mrd)); +} /** * Sets the platform memory allocator and triggers the platform memory mappings diff --git a/include/uk/plat/mm.h b/include/uk/plat/mm.h new file mode 100644 index 0000000000..6abd8982e3 --- /dev/null +++ b/include/uk/plat/mm.h @@ -0,0 +1,353 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Stefan Teodorescu + * + * Copyright (c) 2021, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __UKPLAT_MM__ +#define __UKPLAT_MM__ + +#include +#include + +#ifndef CONFIG_PT_API +#error Using this header requires enabling the virtual memory management API +#endif /* CONFIG_PT_API */ + +#define MEM_REGIONS_NUMBER 4 + +struct phys_mem_region +{ + unsigned long start_addr; + size_t length; + + unsigned long bitmap_start_addr; + size_t bitmap_length; + + // TODO + size_t last_offset; +}; + +extern size_t _phys_mem_region_list_size; +extern struct phys_mem_region _phys_mem_region_list[MEM_REGIONS_NUMBER]; + +#ifdef CONFIG_PARAVIRT +#include +#else +#include +#endif /* CONFIG_PARAVIRT */ + +/** + * Get a free frame in the physical memory where a new mapping can be created. + * + * @param flags: specify any criteria that the frame has to meet (e.g. a 2MB + * frame for a large page). These are constructed by or'ing PAGE_FLAG_* flags. + * + * @return: physical address of an unused frame or PAGE_INVALID on failure. + */ +static inline unsigned long uk_get_next_free_frame(unsigned long flags) +{ + size_t i; + unsigned long offset; + unsigned long pfn; + unsigned long frame_size; + + unsigned long phys_bitmap_start_addr; + size_t phys_bitmap_length; + + unsigned long phys_mem_start_addr; + size_t phys_mem_length; + + size_t last_offset; + +#ifdef CONFIG_PARAVIRT + /* + * Large/Huge pages are not supported in PV guests on Xen. + * https://wiki.xenproject.org/wiki/Huge_Page_Support + */ + if (flags & PAGE_FLAG_LARGE) { + uk_pr_err("Large pages are not supported on PV guest\n"); + return PAGE_INVALID; + } +#endif /* CONFIG_PARAVIRT */ + + if (flags & PAGE_FLAG_LARGE) + frame_size = PAGE_LARGE_SIZE >> PAGE_SHIFT; + else + frame_size = 1; + + for (i = 0; i < _phys_mem_region_list_size; i++) { + phys_mem_start_addr = + _phys_mem_region_list[i].start_addr; + phys_mem_length = + _phys_mem_region_list[i].length; + phys_bitmap_start_addr = + _phys_mem_region_list[i].bitmap_start_addr; + phys_bitmap_length = + _phys_mem_region_list[i].bitmap_length; + last_offset = + _phys_mem_region_list[i].last_offset; + + if (phys_bitmap_length - last_offset <= 1) { + last_offset = 0; + _phys_mem_region_list[i].last_offset = 0; + } + + offset = uk_bitmap_find_next_zero_area( + (unsigned long *) phys_bitmap_start_addr, + phys_bitmap_length, + last_offset /* start */, + frame_size /* nr */, + frame_size - 1 /* align_mask */); + + if (offset * PAGE_SIZE > phys_mem_length) + continue; + + uk_bitmap_set((unsigned long *) phys_bitmap_start_addr, offset, + frame_size); + _phys_mem_region_list[i].last_offset = offset + frame_size - 1; + + pfn = (phys_mem_start_addr >> PAGE_SHIFT) + offset; + + return pfn_to_mframe(pfn); + } + + uk_pr_err("Out of physical memory\n"); + return PAGE_INVALID; +} + +static inline int uk_frame_reserve(unsigned long paddr, unsigned long frame_size, + int val) +{ + size_t i; + unsigned long offset; + + unsigned long bitmap_start_addr; + + unsigned long mem_start_addr; + unsigned long mem_length; + + /* TODO: add huge pages */ + if (frame_size != PAGE_SIZE && frame_size != PAGE_LARGE_SIZE) + return -1; + + if (paddr & (frame_size - 1)) + return -1; + + frame_size >>= PAGE_SHIFT; + + for (i = 0; i < _phys_mem_region_list_size; i++) { + mem_start_addr = + _phys_mem_region_list[i].start_addr; + mem_length = + _phys_mem_region_list[i].length; + bitmap_start_addr = + _phys_mem_region_list[i].bitmap_start_addr; + + if (!IN_RANGE(paddr, mem_start_addr, mem_length)) + continue; + + offset = (paddr - mem_start_addr) >> PAGE_SHIFT; + if (val) { + uk_bitmap_set((unsigned long *) bitmap_start_addr, + offset, frame_size); + } else { + uk_bitmap_clear((unsigned long *) bitmap_start_addr, + offset, frame_size); + } + return 0; + } + + return -1; +} + +/** + * Create a mapping from a virtual address to a physical address, with given + * protections and flags. + * + * @param vaddr: the virtual address of the page that is to be mapped. + * @param paddr: the physical address of the frame to which the virtual page + * is mapped to. This parameter can be equal to PAGE_PADDR_ANY when the caller + * is not interested in the physical address where the mapping is created. + * @param prot: protection permissions of the page (obtained by or'ing + * PAGE_PROT_* flags). + * @param flags: flags of the page (obtained by or'ing PAGE_FLAG_* flags). + * + * @return: 0 on success and -1 on failure. The uk_page_map call can fail if: + * - the given physical or virtual addresses are not aligned to page size; + * - any page in the region is already mapped to another frame; + * - if PAGE_PADDR_ANY flag is selected and there are no more available + * free frames in the physical memory; + * - (on Xen PV) if flags contains PAGE_FLAG_LARGE - large pages are not + * supported on PV guests; + * - (on Xen PV) the hypervisor rejected the mapping. + * + * In case of failure, the mapping is not created. + */ +int uk_page_map(unsigned long vaddr, unsigned long paddr, unsigned long prot, + unsigned long flags); + +/** + * Create a mapping from a region starting at a virtual address to a physical + * address, with given protections and flags. + * + * @param vaddr: the virtual address of the page where the region that is to be + * mapped starts. + * @param paddr: the physical address of the starting frame of the region to + * which the virtual region is mapped to. This parameter can be equal to + * PAGE_PADDR_ANY when the caller is not interested in the physical address + * where the mappings are created. + * @param prot: protection permissions of the pages (obtained by or'ing + * PAGE_PROT_* flags). + * @param flags: flags of the page (obtained by or'ing PAGE_FLAG_* flags). + * + * @return: 0 on success and -1 on failure. The uk_page_map call can fail if: + * - the given physical or virtual addresses are not aligned to page size; + * - any page in the region is already mapped to another frame; + * - if PAGE_PADDR_ANY flag is selected and there are no more available + * free frames in the physical memory; + * - (on Xen PV) if flags contains PAGE_FLAG_LARGE - large pages are not + * supported on PV guests; + * - (on Xen PV) the hypervisor rejected any of the mappings. + * + * In case of failure, no new mapping is created. + */ +int uk_map_region(unsigned long vaddr, unsigned long paddr, + unsigned long pages, unsigned long prot, unsigned long flags); + +/** + * Frees a mapping for a page. + * + * @param vaddr: the virtual address of the page that is to be unmapped. + * + * @return: 0 in case of success and -1 on failure. The call fails if: + * - the given page is not mapped to any frame; + * - the virtual address given is not aligned to page (simple/large/huge) size. + * - (on Xen PV) the hypervisor rejected the unmapping. + */ +int uk_page_unmap(unsigned long vaddr); + +/** + * Sets new protections for a given page. + * + * @param vaddr: the virtual address of the page whose protections are updated. + * @param new_prot: new protections that will be set to the page (obtained by + * or'ing PAGE_PROT_* flags). + * + * @return: 0 in case of success and -1 on failure. The call fails if: + * - the given page is not mapped to any frame; + * - the virtual address given is not aligned to page (simple/large/huge) size. + * - (on Xen PV) the hypervisor rejected the unmapping. + */ +int uk_page_set_prot(unsigned long vaddr, unsigned long new_prot); + +/** + * Return page table entry corresponding to given virtual address. + * @param vaddr: the virtual address, aligned to the corresponding page + * dimesion (simple, large or huge) size. + * @return: page table entry (PTE) obtained by doing a page table walk. + */ +unsigned long uk_virt_to_pte(unsigned long vaddr); + +/** + * Initialize internal page table bookkeeping for using the PT API when + * attaching to an existing page table. + * @param pt_area_start: the virtual address of the area for page tables and + * internal bookkeeping. + * @param paddr_start: the physical address of the beginning of the area that + * should be managed by the API. + * @param len: the length of the (physical) memory area that should be managed. + */ +void uk_pt_init(unsigned long pt_area_start, unsigned long paddr_start, + size_t len); + +/** + * TODO: params + */ +int uk_pt_add_mem(unsigned long paddr_start, unsigned long len); + +/** + * Build page table structure from scratch + * @param paddr_start: the first address in the usable physical memory. + * @param len: the length (in bytes) of the physical memory that will be + * managed by the API. + * TODO params + * + * This function builds a structure of page tables (by calling _pt_create), + * initializes the page table API (by calling uk_pt_init), maps the kernel in + * the virtual address space (with _mmap_kernel), switches to the new address + * space and sets the _virt_offset variable. + */ +void uk_pt_build(unsigned long paddr_start, unsigned long len, + unsigned long kernel_start_vaddr, + unsigned long kernel_start_paddr, + unsigned long kernel_area_size); + +/** + * Allocate a new stack and return address to its lower address. + * + * @return: the lower address of the stack. If the returned address is `addr`, + * then the allocated stack region is [`addr`, `addr + __STACK_SIZE`]. The + * maximum number of stacks that can be allocated denotes the maximum number + * of threads that can co-exist. More details about the number of stacks in + * include/uk/mem_layout.h. Returns NULL in case of failure. + */ +void *uk_stack_alloc(); + +/** + * Frees a stack previously allocated with uk_stack_alloc(). + * + * @param vaddr: the virtual address of the beginning of the stack (i.e. the + * address returned by uk_stack_alloc()). + * + * @return: 0 in case of success and -1 on failure. The call can fail if: + * - the given address is not a stack address previously returned by + * uk_stack_alloc (which is between STACK_AREA_BEGIN and STACK_AREA_END); + * - the given address is not page aligned; + * - (on Xen) the hypervisor rejected the unmapping. + */ +int uk_stack_free(void *vaddr); + +/** + * Create virtual mappings for a new heap of a given length at a given virtual + * address. + * + * @param vaddr: the virtual address of the beginning of the area where the + * heap will be mapped. + * @param len: the length (in bytes) of the heap. + * + * @return: 0 in case of success and -1 on failure. The call can fail if: + * - the given interval [vaddr, vaddr + len] is not contained in the interval + * [HEAP_AREA_BEGIN, HEAP_AREA_END]; + * - uk_mmap_region fails. + */ +int uk_heap_map(unsigned long vaddr, unsigned long len); + +#endif /* __UKPLAT_MM__ */ + diff --git a/include/uk/sections.h b/include/uk/sections.h new file mode 100644 index 0000000000..0dbbfe5b40 --- /dev/null +++ b/include/uk/sections.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Wei Chen + * + * Copyright (c) 2018, Arm Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PLAT_CMN_SECTIONS_H__ +#define __PLAT_CMN_SECTIONS_H__ + +#ifndef __ASSEMBLY__ +/* + * Following global variables are defined in image link scripts, and some + * variables are optional and may be unavailable on some architectures + * or configurations. + */ + +/* _dtb: start of device tree */ +extern char _dtb[]; + +/* [_text, _etext]: contains .text.* sections */ +extern char _text[], _etext[]; + +/* [__eh_frame_start, __eh_frame_end]: contains .eh_frame section */ +extern char __eh_frame_start[], __eh_frame_end[]; + +/* [__eh_frame_hdr_start, __eh_frame_hdr_end]: contains .eh_frame_hdr section */ +extern char __eh_frame_hdr_start[], __eh_frame_hdr_end[]; + +/* [_rodata, _erodata]: contains .rodata.* sections */ +extern char _rodata[], _erodata[]; + +/* [_data, _edata]: contains .data.* sections */ +extern char _data[], _edata[]; + +/* [_ctors, _ectors]: contains constructor tables (read-only) */ +extern char _ctors[], _ectors[]; + +/* [_tls_start, _tls_end]: contains .tdata.* and .tbss.* sections */ +extern char _tls_start[], _tls_end[]; +/* _etdata: denotes end of .tdata (and start of .tbss */ +extern char _etdata[]; + +/* [_sshared, _eshared] contains shared data across all + * compartments (key 15) */ +extern char _sshared[], _eshared[]; + +/* __bss_start: start of BSS sections */ +extern char __bss_start[]; + +/* __bss_start: start of the interrupt stack section */ +extern char _intrstack_start[]; + +/* _end: end of kernel image */ +extern char _end[]; + +#define __uk_image_symbol(addr) ((unsigned long)(addr)) + +#define __DTB __uk_image_symbol(_dtb) +#define __TEXT __uk_image_symbol(_text) +#define __ETEXT __uk_image_symbol(_etext) +#define __EH_FRAME_START __uk_image_symbol(__eh_frame_start) +#define __EH_FRAME_END __uk_image_symbol(__eh_frame_end) +#define __EH_FRAME_HDR_START __uk_image_symbol(__eh_frame_hdr_start) +#define __EH_FRAME_HDR_END __uk_image_symbol(__eh_frame_hdr_end) +#define __RODATA __uk_image_symbol(_rodata) +#define __ERODATA __uk_image_symbol(_erodata) +#define __DATA __uk_image_symbol(_data) +#define __EDATA __uk_image_symbol(_edata) +#define __CTORS __uk_image_symbol(_ctors) +#define __ECTORS __uk_image_symbol(_ectors) +#define __SHARED_START __uk_image_symbol(_sshared) +#define __SHARED_END __uk_image_symbol(_eshared) +#define __BSS_START __uk_image_symbol(__bss_start) +#define __INTRSTACK_START __uk_image_symbol(_intrstack_start) +#define __END __uk_image_symbol(_end) + +#endif /*__ASSEMBLY__*/ + +/* + * Because the section is 4KB alignment, and we will assign different + * attributes for different sections. We roundup image size to 2MB to + * avoid making holes in L3 table + * + * L2 table + * |-----------| L3 table + * | 2MB |===>|-----------| + * |-----------| | 4KB | entry#0 + * |-----------| + * | ... | + * | | + * |-----------| + * | 4KB | entry# for last page of real image + * |-----------| + * | 4KB | entry# for round up memory + * |-----------| + * | ... | + * |-----------| + * | 4KB | entry#511 + * |-----------| + * If we don't roundup the image size to 2MB, some memory that is not + * occupied by image but shared the same 2MB block with image tail will + * not be mapped in page table. + */ +#define IMAGE_ROUNDUP_SHIFT 21 +#define IMAGE_ROUNDUP_SIZE (0x1 << (IMAGE_ROUNDUP_SHIFT)) + +#endif /* __PLAT_CMN_SECTIONS_H__ */ diff --git a/lib/Makefile.uk b/lib/Makefile.uk index 6b653fdd5e..bc9c16b8ff 100644 --- a/lib/Makefile.uk +++ b/lib/Makefile.uk @@ -4,6 +4,7 @@ # ################################################################################ +$(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/flexos-core)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/ukboot)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/ukswrand)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/posix-user)) @@ -22,6 +23,7 @@ $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/fdt)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/syscall_shim)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/vfscore)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/ramfs)) +$(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/cpio)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/devfs)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/9pfs)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/uklock)) @@ -39,3 +41,6 @@ $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/ukblkdev)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/posix-process)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/uksp)) $(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/uksignal)) +$(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/posix-mmap)) +$(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/ubsan)) +$(eval $(call _import_lib,$(CONFIG_UK_BASE)/lib/kasan)) diff --git a/lib/cpio/Makefile.uk b/lib/cpio/Makefile.uk new file mode 100644 index 0000000000..c3411bfc46 --- /dev/null +++ b/lib/cpio/Makefile.uk @@ -0,0 +1,7 @@ +$(eval $(call addlib_s,libcpio,$(CONFIG_LIBCPIO))) + +# LIBCPIO_CFLAGS-$(call gcc_version_ge,8,0) += -Wno-cast-function-type + +CINCLUDES-$(CONFIG_LIBCPIO) += -I$(LIBCPIO_BASE)/include +CXXINCLUDES-$(CONFIG_LIBCPIO) += -I$(LIBCPIO_BASE)/include +LIBCPIO_SRCS-y += $(LIBCPIO_BASE)/cpio.c diff --git a/lib/cpio/cpio.c b/lib/cpio/cpio.c new file mode 100644 index 0000000000..5236dea541 --- /dev/null +++ b/lib/cpio/cpio.c @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Robert Hrusecky + * Omar Jamil + * Sachin Beldona + * + * Copyright (c) 2017, NEC Europe Ltd., NEC Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * THIS HEADER MAY NOT BE EXTRACTED OR MODIFIED IN ANY WAY. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define CPIO_MAGIC_NEWC "070701" +#define CPIO_MAGIC_CRC "070702" +#define FILE_TYPE_MASK 0170000 +#define DIRECTORY_BITS 040000 +#define FILE_BITS 0100000 + +#define ALIGN_4(ptr) ((void *)ALIGN_UP((uintptr_t)(ptr), 4)) + +#define IS_FILE_OF_TYPE(mode, bits) (((mode) & (FILE_TYPE_MASK)) == (bits)) +#define IS_FILE(mode) IS_FILE_OF_TYPE((mode), (FILE_BITS)) +#define IS_DIR(mode) IS_FILE_OF_TYPE((mode), (DIRECTORY_BITS)) + +#define GET_MODE(hdr) ((mode_t)strhex8_to_u32((hdr)->mode)) + +struct cpio_header { + char magic[6]; + char inode_num[8]; + char mode[8]; + char uid[8]; + char gid[8]; + char nlink[8]; + char mtime[8]; + char filesize[8]; + char major[8]; + char minor[8]; + char ref_major[8]; + char ref_minor[8]; + char namesize[8]; + char chksum[8]; +}; + +static bool valid_magic(struct cpio_header *header) +{ + return memcmp(header->magic, CPIO_MAGIC_NEWC, 6) == 0 + || memcmp(header->magic, CPIO_MAGIC_CRC, 6) == 0; +} + +/* Function to convert len digits of hexadecimal string loc + * to an integer. + * Returns the converted unsigned integer value on success. + * Returns 0 on error. + */ +static unsigned int snhex_to_int(size_t len, char *loc) +{ + int val = 0; + size_t i; + + for (i = 0; i < len; i++) { + val *= 16; + if (*(loc + i) >= '0' && *(loc + i) <= '9') + val += (*(loc + i) - '0'); + else if (*(loc + i) >= 'A' && *(loc + i) <= 'F') + val += (*(loc + i) - 'A') + 10; + else if (*(loc + i) >= 'a' && *(loc + i) <= 'f') + val += (*(loc + i) - 'a') + 10; + else + return 0; + } + return val; +} + +static uint32_t strhex8_to_u32(char *loc) +{ + return snhex_to_int(8, loc); +} + +static inline char *filename(struct cpio_header *header) +{ + return (char *)header + sizeof(struct cpio_header); +} + +static char *absolute_path(char *path_to_prepend, char *path) +{ + int dir_slash_included = + *(path_to_prepend + strlen(path_to_prepend) - 1) == '/' ? 1 : 2; + char *abs_path = (char *)malloc(strlen(path) + strlen(path_to_prepend) + + dir_slash_included); + if (abs_path == NULL) + return NULL; + memcpy(abs_path, path_to_prepend, strlen(path_to_prepend)); + if (dir_slash_included == 2) + *(abs_path + strlen(path_to_prepend)) = '/'; + memcpy(abs_path + strlen(path_to_prepend) + dir_slash_included - 1, + path, strlen(path)); + *(abs_path + strlen(path) + strlen(path_to_prepend) + dir_slash_included + - 1) = '\0'; + return abs_path; +} + +static enum cpio_error read_section(struct cpio_header **header_ptr, + char *mount_loc, uintptr_t last) +{ + if (strcmp(filename(*header_ptr), "TRAILER!!!") == 0) { + *header_ptr = NULL; + return CPIO_SUCCESS; + } + + if (!valid_magic(*header_ptr)) { + *header_ptr = NULL; + return -CPIO_INVALID_HEADER; + } + + if (mount_loc == NULL) { + *header_ptr = NULL; + return -CPIO_NO_MOUNT_LOCATION; + } + + struct cpio_header *header = *header_ptr; + char *path_from_root = absolute_path(mount_loc, filename(header)); + + if (path_from_root == NULL) { + *header_ptr = NULL; + return -CPIO_NOMEM; + } + mode_t header_mode = GET_MODE(header); + uint32_t header_filesize = strhex8_to_u32(header->filesize); + uint32_t header_namesize = strhex8_to_u32(header->namesize); + + if ((uintptr_t)header + sizeof(struct cpio_header) > last) { + *header_ptr = NULL; + return -CPIO_MALFORMED_FILE; + } + if (IS_FILE(header_mode) && header_filesize != 0) { + //flexos_gate(ukdebug, uk_pr_debug, "Creating file %s...\n", path_from_root); + int fd = open(path_from_root, O_CREAT | O_RDWR); + + if (fd < 0) { + *header_ptr = NULL; + return -CPIO_FILE_CREATE_FAILED; + } + //flexos_gate(ukdebug, uk_pr_debug, "File %s created\n", path_from_root); + char *data_location = (char *)ALIGN_4( + (char *)(header) + sizeof(struct cpio_header) + + header_namesize); + + if ((uintptr_t)data_location + header_filesize > last) { + *header_ptr = NULL; + return -CPIO_MALFORMED_FILE; + } + uint32_t bytes_to_write = header_filesize; + int bytes_written = 0; + + while (bytes_to_write > 0) { + if ((bytes_written = + write(fd, data_location + bytes_written, + bytes_to_write)) + < 0) { + *header_ptr = NULL; + return -CPIO_FILE_WRITE_FAILED; + } + bytes_to_write -= bytes_written; + } + //if (chmod(path_from_root, header_mode & 0777) < 0) + //flexos_gate(ukdebug, uk_pr_info, "chmod on file %s failed\n", path_from_root); + if (close(fd) < 0) { + *header_ptr = NULL; + return -CPIO_FILE_CLOSE_FAILED; + } + } else if (IS_DIR(header_mode)) { + if (strcmp(".", filename(header)) != 0 + && mkdir(path_from_root, header_mode & 0777) < 0) { + *header_ptr = NULL; + return -CPIO_MKDIR_FAILED; + } + } + free(path_from_root); + struct cpio_header *next_header = (struct cpio_header *)ALIGN_4( + (char *)header + sizeof(struct cpio_header) + header_namesize); + + next_header = (struct cpio_header *)ALIGN_4((char *)next_header + + header_filesize); + *header_ptr = next_header; + return CPIO_SUCCESS; +} + +enum cpio_error cpio_extract(char *mount_loc, void *memory_region, size_t len) +{ + enum cpio_error error = CPIO_SUCCESS; + struct cpio_header *header = (struct cpio_header *)(memory_region); + struct cpio_header **header_ptr = &header; + uintptr_t end = (uintptr_t)header; + + if (mount_loc == NULL) + return -CPIO_NO_MOUNT_LOCATION; + + while (error == CPIO_SUCCESS && header != NULL) { + error = read_section(header_ptr, mount_loc, end + len); + header = *header_ptr; + } + return error; +} diff --git a/lib/cpio/exportsyms.uk b/lib/cpio/exportsyms.uk new file mode 100644 index 0000000000..090dd8da35 --- /dev/null +++ b/lib/cpio/exportsyms.uk @@ -0,0 +1 @@ +cpio_extract diff --git a/lib/cpio/include/uk/cpio.h b/lib/cpio/include/uk/cpio.h new file mode 100644 index 0000000000..86bc002276 --- /dev/null +++ b/lib/cpio/include/uk/cpio.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Robert Hrusecky + * Omar Jamil + * Sachin Beldona + * + * Copyright (c) 2017, NEC Europe Ltd., NEC Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * THIS HEADER MAY NOT BE EXTRACTED OR MODIFIED IN ANY WAY. + */ + +#ifndef __CPIO_H__ +#define __CPIO_H__ +#include + +enum cpio_error { + CPIO_SUCCESS = 0, + CPIO_INVALID_HEADER, + CPIO_FILE_CREATE_FAILED, + CPIO_FILE_WRITE_FAILED, + CPIO_FILE_CHMOD_FAILED, + CPIO_FILE_CLOSE_FAILED, + CPIO_MKDIR_FAILED, + CPIO_MOUNT_FAILED, + CPIO_NO_MEMREGION, + CPIO_MALFORMED_FILE, + CPIO_NOMEM, + CPIO_NO_MOUNT_LOCATION +}; + +enum cpio_error cpio_extract(char *loc, void *mem, size_t len); +#endif /*__CPIO_H__*/ diff --git a/lib/devfs/device.c b/lib/devfs/device.c index d67aee4a30..7ce71a1c8c 100644 --- a/lib/devfs/device.c +++ b/lib/devfs/device.c @@ -56,8 +56,9 @@ #include #include +#include -static struct uk_mutex devfs_lock = UK_MUTEX_INITIALIZER(devfs_lock); +static struct uk_mutex devfs_lock __attribute__((flexos_whitelist)) = UK_MUTEX_INITIALIZER(devfs_lock); /* list head of the devices */ static struct device *device_list; diff --git a/lib/flexos-core/Config.uk b/lib/flexos-core/Config.uk new file mode 100644 index 0000000000..215f5741ee --- /dev/null +++ b/lib/flexos-core/Config.uk @@ -0,0 +1,80 @@ +menuconfig LIBFLEXOS + bool "flexos - FlexOS core libraries" + select UKPLAT_MEMRNAME + default y + +if LIBFLEXOS +choice + prompt "FlexOS backend (no nested isolation for now)" + default LIBFLEXOS_NONE + +config LIBFLEXOS_INTELPKU + bool "Enable Intel MPK/PKU support in FlexOS" + select HAVE_X86PKU + select DYNAMIC_PT + select PT_API + +config LIBFLEXOS_VMEPT + bool "Enable VM/EPT support in FlexOS" + select DYNAMIC_PT + select PT_API + +if LIBFLEXOS_VMEPT +config LIBFLEXOS_VMEPT_LIBRARY + bool "Build a library compartment (not main app)" +endif # LIBFLEXOS_VMEPT + +config LIBFLEXOS_NONE + bool "Function call gate instanciations (debugging)" +endchoice + +if LIBFLEXOS_INTELPKU +choice + prompt "System-wide gate instrumentations" + default LIBFLEXOS_GATE_INTELPKU_NO_INSTRUMENT + +config LIBFLEXOS_GATE_INTELPKU_DBG + bool "Enable gate sanity checks" + +config LIBFLEXOS_GATE_INTELPKU_COUNT_EXECUTIONS + bool "Count gate executions" + help + Count gate executions and show functions that triggered the switch. + +config LIBFLEXOS_GATE_INTELPKU_NO_INSTRUMENT + bool "Do not instrument the gate" +endchoice + +choice + prompt "PKU Gate type" + default LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS + help + Set the default gate type. + +config LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS + bool "Never share the stack" + help + Each thread has one stack per compartment. + +config LIBFLEXOS_GATE_INTELPKU_SHARED_STACKS + bool "Always share the stack" + help + Always share the stack. Only the heap is isolated! +endchoice + +if LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS +# note: DSS and shared stacks doesn't make sense +# note: for now we only support DSS with PKU +config LIBFLEXOS_ENABLE_DSS + bool "Enable Data Shadow Stack" +endif # LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS + +endif # LIBFLEXOS_GATE_INTELPKU + +config LIBFLEXOS_COMP_HEAP_SIZE + int "Size of per-compartment heaps" + default "10000" + +config LIBFLEXOS_DEBUG + bool "Enable various debugging tools" +endif # LIBFLEXOS diff --git a/lib/flexos-core/Makefile.uk b/lib/flexos-core/Makefile.uk new file mode 100644 index 0000000000..ca935ed300 --- /dev/null +++ b/lib/flexos-core/Makefile.uk @@ -0,0 +1,48 @@ +# libflexos Makefile.uk +# +# Copyright (c) 2020-2021 Hugo Lefeuvre +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +################################################################################ +# Library registration +################################################################################ +$(eval $(call addlib_s,libflexos,$(CONFIG_LIBFLEXOS))) + +################################################################################ +# Library includes +################################################################################ +CINCLUDES-$(CONFIG_LIBFLEXOS) += -I$(LIBFLEXOS_BASE)/include/ +CXXINCLUDES-$(CONFIG_LIBFLEXOS) += -I$(LIBFLEXOS_BASE)/include/ + +################################################################################ +# Glue code +################################################################################ +LIBFLEXOS_SRCS-$(CONFIG_LIBFLEXOS_INTELPKU) += $(LIBFLEXOS_BASE)/intelpku.c +LIBFLEXOS_SRCS-$(CONFIG_LIBFLEXOS_VMEPT) += $(LIBFLEXOS_BASE)/vmept.c +# LIBFLEXOS_SRCS-$(CONFIG_LIBFLEXOS_VMEPT) += $(LIBFLEXOS_BASE)/wrappers.c + +LIBFLEXOS_CFLAGS-y += -fno-sanitize=kernel-address diff --git a/lib/flexos-core/include/flexos/impl/bitfield.h b/lib/flexos-core/include/flexos/impl/bitfield.h new file mode 100644 index 0000000000..74a69eff42 --- /dev/null +++ b/lib/flexos-core/include/flexos/impl/bitfield.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2021, Sebastian Rauch + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FLEXOS_VMEPT_BITFIELD_H +#define FLEXOS_VMEPT_BITFIELD_H + +#include + +/* a bitfield containing 256 bits */ +struct flexos_vmept_bitfield_256 { + uint64_t eightbytes[4]; +}; + +/* returns the smalles integer i such that eightbytes & (1 << i) == 0 + * or -1 if eightbytes is 0 + * TODO: maybe use specific instructions to do this faster? */ +static inline int _flexos_vmept_find_first_zero_bit_64(uint64_t eightbytes) { + /* x & (x - 1) clears the rightmost set bit + * thus x - (x & (x - 1)) only keeps the righmost set bit (if any) */ + uint64_t x = ~eightbytes; + x = x - (x & (x - 1)); + + switch (x) { + case (1ULL << 0): return 0; + case (1ULL << 1): return 1; + case (1ULL << 2): return 2; + case (1ULL << 3): return 3; + case (1ULL << 4): return 4; + case (1ULL << 5): return 5; + case (1ULL << 6): return 6; + case (1ULL << 7): return 7; + case (1ULL << 8): return 8; + case (1ULL << 9): return 9; + case (1ULL << 10): return 10; + case (1ULL << 11): return 11; + case (1ULL << 12): return 12; + case (1ULL << 13): return 13; + case (1ULL << 14): return 14; + case (1ULL << 15): return 15; + case (1ULL << 16): return 16; + case (1ULL << 17): return 17; + case (1ULL << 18): return 18; + case (1ULL << 19): return 19; + case (1ULL << 20): return 20; + case (1ULL << 21): return 21; + case (1ULL << 22): return 22; + case (1ULL << 23): return 23; + case (1ULL << 24): return 24; + case (1ULL << 25): return 25; + case (1ULL << 26): return 26; + case (1ULL << 27): return 27; + case (1ULL << 28): return 28; + case (1ULL << 29): return 29; + case (1ULL << 30): return 30; + case (1ULL << 31): return 31; + case (1ULL << 32): return 32; + case (1ULL << 33): return 33; + case (1ULL << 34): return 34; + case (1ULL << 35): return 35; + case (1ULL << 36): return 36; + case (1ULL << 37): return 37; + case (1ULL << 38): return 38; + case (1ULL << 39): return 39; + case (1ULL << 40): return 40; + case (1ULL << 41): return 41; + case (1ULL << 42): return 42; + case (1ULL << 43): return 43; + case (1ULL << 44): return 44; + case (1ULL << 45): return 45; + case (1ULL << 46): return 46; + case (1ULL << 47): return 47; + case (1ULL << 48): return 48; + case (1ULL << 49): return 49; + case (1ULL << 50): return 50; + case (1ULL << 51): return 51; + case (1ULL << 52): return 52; + case (1ULL << 53): return 53; + case (1ULL << 54): return 54; + case (1ULL << 55): return 55; + case (1ULL << 56): return 56; + case (1ULL << 57): return 57; + case (1ULL << 58): return 58; + case (1ULL << 59): return 59; + case (1ULL << 60): return 60; + case (1ULL << 61): return 61; + case (1ULL << 62): return 62; + case (1ULL << 63): return 63; + default: return -1; + } +} + +static inline int flexos_vmept_has_zero_bit_256(const struct flexos_vmept_bitfield_256 *bf_256) { + return (~bf_256->eightbytes[0]) || (~bf_256->eightbytes[1]) + || (~bf_256->eightbytes[2]) || (~bf_256->eightbytes[3]); +} + +static inline int flexos_vmept_find_first_zero_bit_256(const struct flexos_vmept_bitfield_256 *bf_256) { + if (~bf_256->eightbytes[0]) + return _flexos_vmept_find_first_zero_bit_64(bf_256->eightbytes[0]); + if (~bf_256->eightbytes[1]) + return 64 + _flexos_vmept_find_first_zero_bit_64(bf_256->eightbytes[1]); + if (~bf_256->eightbytes[2]) + return 128 + _flexos_vmept_find_first_zero_bit_64(bf_256->eightbytes[2]); + if (~bf_256->eightbytes[3]) + return 192 + _flexos_vmept_find_first_zero_bit_64(bf_256->eightbytes[3]); + return -1; +} + +static inline void flexos_vmept_set_bit_256(struct flexos_vmept_bitfield_256 *bf_256, uint8_t i) { + bf_256->eightbytes[i / 64] |= 1ULL << (i % 64); +} + +static inline void flexos_vmept_clear_bit_256(struct flexos_vmept_bitfield_256 *bf_256, uint8_t i) { + bf_256->eightbytes[i / 64] &= ~(1ULL << (i % 64)); +} + +static inline void flexos_vmept_init_bitfield_256(struct flexos_vmept_bitfield_256 *bf_256) { + bf_256->eightbytes[0] = 0ULL; + bf_256->eightbytes[1] = 0ULL; + bf_256->eightbytes[2] = 0ULL; + bf_256->eightbytes[3] = 0ULL; +} + +#endif /* FLEXOS_VMEPT_BITFIELD_H */ diff --git a/lib/flexos-core/include/flexos/impl/intelpku-impl.h b/lib/flexos-core/include/flexos/impl/intelpku-impl.h new file mode 100644 index 0000000000..3e39314db0 --- /dev/null +++ b/lib/flexos-core/include/flexos/impl/intelpku-impl.h @@ -0,0 +1,1808 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2020-2021, Hugo Lefeuvre + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FLEXOS_INTELPKU_IMPL_H +#define FLEXOS_INTELPKU_IMPL_H + +#include +#include +#include /* STACK_SIZE */ +#include /* ALIGN_UP */ +#include +#include /* UK_CRASH */ +#include /* memcpy */ +#include /* round_pgup() */ + +#define PKRU(key1) (0x3fffffff & ~(1UL << ( key1 * 2)) \ + & ~(1UL << ((key1 * 2) + 1))) + +static inline +int uk_thread_get_tid(void) +{ + unsigned long sp = ukarch_read_sp(); + return *((int *) round_pgup((unsigned long) ((sp & STACK_MASK_TOP) + 1))); +} + +/* ========================================================================== + * Implementation of PKU gate instrumentation + * ========================================================================== */ + +/* Enable/Disable gate instrumentations. + * - CONFIG_LIBFLEXOS_COUNT_GATE_EXECUTIONS: count number of gates + * executed. + * - CONFIG_LIBFLEXOS_GATE_INTELPKU_DBG: gate sanity checks. */ +#if CONFIG_LIBFLEXOS_COUNT_GATE_EXECUTIONS && CONFIG_LIBFLEXOS_GATE_INTELPKU_DBG +#error "The debug gate is incompatible with the gate execution counter!" +#elif CONFIG_LIBFLEXOS_COUNT_GATE_EXECUTIONS +extern volatile unsigned long flexos_intelpku_in_gate_counter; +extern volatile unsigned long flexos_intelpku_out_gate_counter; +#define _flexos_intelpku_gate_inst_in(f, t, fname) \ + __flexos_intelpku_gate_counter_in(f, t, fname) +#define _flexos_intelpku_gate_inst_out(f, t) \ + __flexos_intelpku_gate_counter_out(f, t) +#elif CONFIG_LIBFLEXOS_GATE_INTELPKU_DBG +#define _flexos_intelpku_gate_inst_in(f, t, fname) \ + __flexos_intelpku_gate_dbg_in(f, t) +#define _flexos_intelpku_gate_inst_out(f, t) +#else /* No instrumentation */ +#define _flexos_intelpku_gate_inst_in(f, t, fname) +#define _flexos_intelpku_gate_inst_out(f, t) +#endif + +#if CONFIG_LIBFLEXOS_COUNT_GATE_EXECUTIONS +#define flexos_intelpku_print_in_counter() \ +do { \ + printf("Number of 'in' gates executed: %lu\n", \ + flexos_intelpku_in_gate_counter); \ +} while (0) + +#define flexos_intelpku_print_out_counter() \ +do { \ + printf("Number of 'out' gates executed: %lu\n", \ + flexos_intelpku_out_gate_counter); \ +} while (0) + +#define flexos_intelpku_reset_gate_counters() \ +do { \ + flexos_intelpku_out_gate_counter = 0; \ + flexos_intelpku_in_gate_counter = 0; \ +} while (0) + +#define __flexos_intelpku_gate_counter_out(k_from, k_to) \ +do { \ + flexos_intelpku_out_gate_counter++; \ +} while (0) + +#define __flexos_intelpku_gate_counter_in(k_from, k_to, fname) \ +do { \ + /* only temporary to enable access to printf */ \ + wrpkru(0x3ffffff0); \ + printf("switch triggered by %s (%d -> %d)\n", fname, \ + k_from, k_to); \ + \ + flexos_intelpku_in_gate_counter++; \ +} while (0) +#endif /* CONFIG_LIBFLEXOS_COUNT_GATE_EXECUTIONS */ + +/* debug gate: this has a cost, don't use it for benchmarks */ +#define __flexos_intelpku_gate_dbg_in(key_from, key_to) \ +do { \ + /* sanity check: did we enter this code from an */ \ + /* unexpected domain? */ \ + uint32_t pkru = rdpkru(); \ + \ + if (pkru != PKRU(key_from)) { \ + /* at this point we detected a fatal bug, so just */ \ + /* take full permissions and crash */ \ + wrpkru(0x0); \ + UK_CRASH("ERROR IN GATE got %#010x, " \ + "expected %#010x", pkru, PKRU(key_from)); \ + } \ +} while (0) + + +/* ========================================================================== + * Implementation of PKU gates + * ========================================================================== */ + +#if CONFIG_LIBFLEXOS_GATE_INTELPKU_SHARED_STACKS + +#define __flexos_intelpku_gate_swpkru(key_from, key_to) \ +do { \ + /* switch thread permissions, this includes anti-ROP checks */ \ + asm volatile ( "1:\n\t" /* define local label */ \ + "movq %0, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %0, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + :: "i"(PKRU(key_to)) : "rax", "rcx", "rdx"); \ +} while (0) + +#define _flexos_intelpku_gate(N, key_from, key_to, fname, ...) \ +do { \ + __flexos_intelpku_gate_swpkru(key_from, key_to); \ + fname(__VA_ARGS__); \ + __flexos_intelpku_gate_swpkru(key_to, key_from); \ +} while (0) + +#define _flexos_intelpku_gate_r(N, key_from, key_to, retval, fname, ...)\ +do { \ + __flexos_intelpku_gate_swpkru(key_from, key_to); \ + retval = fname(__VA_ARGS__); \ + __flexos_intelpku_gate_swpkru(key_to, key_from); \ +} while (0) + +#else + +/* We can clobber %r11 here */ +#define __ASM_BACKUP_TSB(tsb_comp) \ + /* load tid into %r12 */ \ + "movq %%r15,%%r11\n\t" \ + /* %r12 = tid * sizeof(struct uk_thread_status_block) */ \ + "shl $0x4,%%r11\n\t" \ + /* %r12 = &tsb_compN[tid * sizeof(struct uk_thread_status_block)] */\ + "addq $" STRINGIFY(tsb_comp) ",%%r11\n\t" \ + /* push tsb_compN[tid * sizeof(struct uk_thread_status_block)].sp */\ + "push (%%r11)\n\t" \ + /* push tsb_compN[tid * sizeof(struct uk_thread_status_block)].bp */\ + "addq $0x8,%%r11\n\t" \ + "push (%%r11)\n\t" + +#define __ASM_UPDATE_TSB_TMP(tsb_comp) \ + /* load tid into %r12 */ \ + "movq %%r15,%%r11\n\t" \ + /* %r12 = tid * sizeof(struct uk_thread_status_block) */ \ + "shl $0x4,%%r11\n\t" \ + /* %r12 = &tsb_compN[tid * sizeof(struct uk_thread_status_block)] */\ + "addq $" STRINGIFY(tsb_comp) ",%%r11\n\t" \ + /* %rcx = &tsb_compN[tid * sizeof(struct uk_thread_status_block)] */\ + "lea (%%r11),%%rcx\n\t" \ + /* tsb_compN[tid * sizeof(struct uk_thread_status_block)].sp = %rsp */\ + "movq %%rsp,(%%r11)\n\t" \ + /* tsb_compN[tid * sizeof(struct uk_thread_status_block)].bp = %rbp */\ + "movq %%rbp,0x8(%%rcx)\n\t" + +/* Do not clobber anything here */ +#define __ASM_SWITCH_STACK(tsb_comp) \ + /* load tid into %rsp */ \ + "movq %%r15,%%rsp\n\t" \ + /* %rsp = tid * sizeof(struct uk_thread_status_block) */ \ + "shl $0x4,%%rsp\n\t" \ + /* %rsp = &tsb_compN[tid * sizeof(struct uk_thread_status_block)] */\ + "addq $" STRINGIFY(tsb_comp) ",%%rsp\n\t" \ + /* %rbp = &tsb_compN[tid * sizeof(struct uk_thread_status_block)] */\ + "movq %%rsp,%%rbp\n\t" \ + /* %rsp = tsb_compN[tid * sizeof(struct uk_thread_status_block)].sp */\ + "movq (%%rsp),%%rsp\n\t" \ + /* %rbp = tsb_compN[tid * sizeof(struct uk_thread_status_block)].bp */\ + "addq $0x8,%%rbp\n\t" \ + "movq (%%rbp),%%rbp\n\t" + +#define __ASM_ALIGN_AND_CALL(fname) \ + /* align at 16 bytes */ \ + "andq $-16, %%rsp\n\t" \ + /* call the actual function */ \ + "call " #fname "\n\t" \ + +/* We can clobber %r12 here */ +#define __ASM_RESTORE_TSB(tsb_comp) \ + /* load tid into %r12 */ \ + "movq %%r15,%%r12\n\t" \ + /* %r12 = tid * sizeof(struct uk_thread_status_block) */ \ + "shl $0x4,%%r12\n\t" \ + /* %r12 = &tsb_compN[tid * sizeof(struct uk_thread_status_block)] */\ + "addq $" STRINGIFY(tsb_comp) ",%%r12\n\t" \ + /* pop to tsb_compN[tid * sizeof(struct uk_thread_status_block)].bp */\ + "addq $0x8,%%r12\n\t" \ + "pop (%%r12)\n\t" \ + /* pop to tsb_compN[tid * sizeof(struct uk_thread_status_block)].sp */\ + "subq $0x8,%%r12\n\t" \ + "pop (%%r12)\n\t" + +/* TODO FLEXOS deduplicate this code */ + +#define __flexos_intelpku_gate0(key_from, key_to, fname) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* nothing to do: no parameters */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + /* nothing to do: no parameters */ \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + /* nothing to do: no return value */ \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %2, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %2, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + /* nothing to do: no return value */ \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ +} while (0) + +#define __flexos_intelpku_gate0_r(key_from, key_to, retval, fname) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + register uint64_t _ret asm("r11"); \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* nothing to do: no parameters */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %4, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %4, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + /* nothing to do: no parameters */ \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + "movq %%rax, %%rsi\n\t" \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + "movq %%rsi, %%r11\n\t" \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=r" (_ret), /* always %%r11 */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ + \ + /* this will be optimized by the compiler */ \ + if (sizeof(retval) == 1) /* 8 bit */ \ + asm volatile ( \ + "mov %%r11b, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + if (sizeof(retval) == 2) /* 16 bit */ \ + asm volatile ( \ + "mov %%r11w, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 4) /* 32 bit */ \ + asm volatile ( \ + "mov %%r11d, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 8) /* 64 bit */ \ + asm volatile ( \ + "mov %%r11, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ +} while (0) + +/* This is living hell. */ + +#define __flexos_intelpku_gate1(key_from, key_to, fname, arg1) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: arg 1 via input constraints */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + /* nothing to do: 1 already final */ \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + /* nothing to do: no return value */ \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %2, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %2, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + /* nothing to do: no return value */ \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), /* ask for %rdi */ \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ +} while (0) + +#define __flexos_intelpku_gate1_r(key_from, key_to, retval, fname, arg1)\ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + register uint64_t _ret asm("r11"); \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: arg 1 via input constraints */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %4, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %4, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + /* nothing to do: 1 already final */ \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + "movq %%rax, %%rsi\n\t" \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + "movq %%rsi, %%r11\n\t" \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=r" (_ret), /* always %%r11 */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ + \ + /* this will be optimized by the compiler */ \ + if (sizeof(retval) == 1) /* 8 bit */ \ + asm volatile ( \ + "mov %%r11b, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + if (sizeof(retval) == 2) /* 16 bit */ \ + asm volatile ( \ + "mov %%r11w, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 4) /* 32 bit */ \ + asm volatile ( \ + "mov %%r11d, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 8) /* 64 bit */ \ + asm volatile ( \ + "mov %%r11, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ +} while (0) + +#define __flexos_intelpku_gate2(key_from, key_to, fname, arg1, arg2) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: args 1 & 2 via input constraints */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + /* nothing to do: 1 & 2 already final */ \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + /* nothing to do: no return value */ \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %2, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %2, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + /* nothing to do: no return value */ \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), \ + "S"((uint64_t)((arg2))), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ +} while (0) + +#define __flexos_intelpku_gate2_r(key_from, key_to, retval, fname, \ + arg1, arg2) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + register uint64_t _ret asm("r11"); \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: args 1 & 2 via input constraints */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %4, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %4, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + /* nothing to do: 1 & 2 already final */ \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + "movq %%rax, %%rsi\n\t" \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + "movq %%rsi, %%r11\n\t" \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=r" (_ret), /* always %%r11 */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), \ + "S"((uint64_t)((arg2))), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ + \ + /* this will be optimized by the compiler */ \ + if (sizeof(retval) == 1) /* 8 bit */ \ + asm volatile ( \ + "mov %%r11b, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + if (sizeof(retval) == 2) /* 16 bit */ \ + asm volatile ( \ + "mov %%r11w, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 4) /* 32 bit */ \ + asm volatile ( \ + "mov %%r11d, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 8) /* 64 bit */ \ + asm volatile ( \ + "mov %%r11, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ +} while (0) + +#define __flexos_intelpku_gate3(key_from, key_to, fname, arg1, arg2, \ + arg3) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + \ + /* No inline asm input constraints for r12, so do */ \ + /* it this way */ \ + register uint64_t _arg3 asm("r12") = (uint64_t) arg3; \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: arg 1 & 2 via input constraints */ \ + /* note: arg 3 in r12 for now */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + "movq %%r12, %%rdx\n\t" \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + /* nothing to do: no return value */ \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %2, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %2, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + /* nothing to do: no return value */ \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), \ + "S"((uint64_t)((arg2))), \ + "r"(_arg3), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ +} while (0) + +#define __flexos_intelpku_gate3_r(key_from, key_to, retval, fname, \ + arg1, arg2, arg3) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + register uint64_t _ret asm("r11"); \ + \ + /* No inline asm input constraints for r12, so do */ \ + /* it this way */ \ + register uint64_t _arg3 asm("r12") = (uint64_t) arg3; \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: arg 1 & 2 via input constraints */ \ + /* note: arg 3 in r12 for now */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %4, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %4, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + "movq %%r12, %%rdx\n\t" \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + "movq %%rax, %%rsi\n\t" \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + "movq %%rsi, %%r11\n\t" \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=r" (_ret), /* always %%r11 */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), \ + "S"((uint64_t)((arg2))), \ + "r"(_arg3), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ + \ + /* this will be optimized by the compiler */ \ + if (sizeof(retval) == 1) /* 8 bit */ \ + asm volatile ( \ + "mov %%r11b, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + if (sizeof(retval) == 2) /* 16 bit */ \ + asm volatile ( \ + "mov %%r11w, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 4) /* 32 bit */ \ + asm volatile ( \ + "mov %%r11d, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 8) /* 64 bit */ \ + asm volatile ( \ + "mov %%r11, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ +} while (0) + +#define __flexos_intelpku_gate4(key_from, key_to, fname, arg1, arg2, \ + arg3, arg4) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + \ + /* No inline asm input constraints for r12-13, so do */ \ + /* it this way */ \ + register uint64_t _arg3 asm("r12") = (uint64_t) arg3; \ + register uint64_t _arg4 asm("r13") = (uint64_t) arg4; \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + "push %%r13\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: arg 1 & 2 via input constraints */ \ + /* note: arg 3 & 4 in r12-13 for now */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + "movq %%r12, %%rdx\n\t" \ + "movq %%r13, %%rcx\n\t" \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + /* nothing to do: no return value */ \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %2, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %2, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r13\n\t" \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + /* nothing to do: no return value */ \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), \ + "S"((uint64_t)((arg2))), \ + "r"(_arg3), \ + "r"(_arg4), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ +} while (0) + +#define __flexos_intelpku_gate4_r(key_from, key_to, retval, fname, \ + arg1, arg2, arg3, arg4) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + register uint64_t _ret asm("r11"); \ + \ + /* No inline asm input constraints for r12-13, so do */ \ + /* it this way */ \ + register uint64_t _arg3 asm("r12") = (uint64_t) arg3; \ + register uint64_t _arg4 asm("r13") = (uint64_t) arg4; \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + "push %%r13\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: arg 1 & 2 via input constraints */ \ + /* note: arg 3 & 4 in r12-13 for now */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %4, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %4, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + "movq %%r12, %%rdx\n\t" \ + "movq %%r13, %%rcx\n\t" \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + "movq %%rax, %%rsi\n\t" \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r13\n\t" \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + "movq %%rsi, %%r11\n\t" \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=r" (_ret), /* always %%r11 */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), \ + "S"((uint64_t)((arg2))), \ + "r"(_arg3), \ + "r"(_arg4), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ + \ + /* this will be optimized by the compiler */ \ + if (sizeof(retval) == 1) /* 8 bit */ \ + asm volatile ( \ + "mov %%r11b, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + if (sizeof(retval) == 2) /* 16 bit */ \ + asm volatile ( \ + "mov %%r11w, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 4) /* 32 bit */ \ + asm volatile ( \ + "mov %%r11d, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 8) /* 64 bit */ \ + asm volatile ( \ + "mov %%r11, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ +} while (0) + +#define __flexos_intelpku_gate5(key_from, key_to, fname, arg1, arg2, \ + arg3, arg4, \ + arg5) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + \ + /* No inline asm input constraints for r8,12-13, so do */ \ + /* it this way */ \ + register uint64_t _arg3 asm("r12") = (uint64_t) arg3; \ + register uint64_t _arg4 asm("r13") = (uint64_t) arg4; \ + register uint64_t _arg5 asm("r8") = (uint64_t) arg5; \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + "push %%r13\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: arg 1 & 2 via input constraints */ \ + /* note: arg 3 & 4 in r12-13 for now */ \ + /* note: arg 5 via asm("r8") */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + "movq %%r12, %%rdx\n\t" \ + "movq %%r13, %%rcx\n\t" \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + /* nothing to do: no return value */ \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %2, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %2, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r13\n\t" \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + /* nothing to do: no return value */ \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), \ + "S"((uint64_t)((arg2))), \ + "r"(_arg3), \ + "r"(_arg4), \ + "r"(_arg5), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ +} while (0) + +#define __flexos_intelpku_gate5_r(key_from, key_to, retval, fname, \ + arg1, arg2, arg3, arg4, arg5) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + register uint64_t _ret asm("r11"); \ + \ + /* No inline asm input constraints for r8,12-13, so do */ \ + /* it this way */ \ + register uint64_t _arg3 asm("r12") = (uint64_t) arg3; \ + register uint64_t _arg4 asm("r13") = (uint64_t) arg4; \ + register uint64_t _arg5 asm("r8") = (uint64_t) arg5; \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + "push %%r13\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: arg 1 & 2 via input constraints */ \ + /* note: arg 3 & 4 in r12-13 for now */ \ + /* note: arg 5 via asm("r8") */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %4, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %4, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + "movq %%r12, %%rdx\n\t" \ + "movq %%r13, %%rcx\n\t" \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + "movq %%rax, %%rsi\n\t" \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r13\n\t" \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + "movq %%rsi, %%r11\n\t" \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=r" (_ret), /* always %%r11 */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), \ + "S"((uint64_t)((arg2))), \ + "r"(_arg3), \ + "r"(_arg4), \ + "r"(_arg5), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ + \ + /* this will be optimized by the compiler */ \ + if (sizeof(retval) == 1) /* 8 bit */ \ + asm volatile ( \ + "mov %%r11b, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + if (sizeof(retval) == 2) /* 16 bit */ \ + asm volatile ( \ + "mov %%r11w, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 4) /* 32 bit */ \ + asm volatile ( \ + "mov %%r11d, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 8) /* 64 bit */ \ + asm volatile ( \ + "mov %%r11, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ +} while (0) + +#define __flexos_intelpku_gate6(key_from, key_to, fname, arg1, arg2, \ + arg3, arg4, \ + arg5, arg6) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + \ + /* No inline asm input constraints for r8-9,12-13, so do */ \ + /* it this way */ \ + register uint64_t _arg3 asm("r12") = (uint64_t) arg3; \ + register uint64_t _arg4 asm("r13") = (uint64_t) arg4; \ + register uint64_t _arg5 asm("r8") = (uint64_t) arg5; \ + register uint64_t _arg6 asm("r9") = (uint64_t) arg6; \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + "push %%r13\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: arg 1 & 2 via input constraints */ \ + /* note: arg 3 & 4 in r12-13 for now */ \ + /* note: arg 5 & 6 via asm("r8/9") */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + "movq %%r12, %%rdx\n\t" \ + "movq %%r13, %%rcx\n\t" \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + /* nothing to do: no return value */ \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %2, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %2, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r13\n\t" \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + /* nothing to do: no return value */ \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), \ + "S"((uint64_t)((arg2))), \ + "r"(_arg3), \ + "r"(_arg4), \ + "r"(_arg5), \ + "r"(_arg6), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ +} while (0) + +#define __flexos_intelpku_gate6_r(key_from, key_to, retval, fname, \ + arg1, arg2, arg3, arg4, arg5, arg6) \ +do { \ + /* we have to call this with a valid/accessible stack, */ \ + /* so do it before switching thread permissions. Note */ \ + /* that the stack won't be accessible after switching */ \ + /* permissions, so we HAVE to store this in a register. */ \ + register uint32_t tid asm("r15") = uk_thread_get_tid(); \ + register uint64_t _ret asm("r11"); \ + \ + /* No inline asm input constraints for r8-9,12-13, so do */ \ + /* it this way */ \ + register uint64_t _arg3 asm("r12") = (uint64_t) arg3; \ + register uint64_t _arg4 asm("r13") = (uint64_t) arg4; \ + register uint64_t _arg5 asm("r8") = (uint64_t) arg5; \ + register uint64_t _arg6 asm("r9") = (uint64_t) arg6; \ + \ + asm volatile ( \ + /* save remaining parameter registers */ \ + /* TODO do we actually HAVE to do this from the */ \ + /* perspective of the C calling convention? */ \ + "push %%rsi\n\t" \ + "push %%rdi\n\t" \ + "push %%r8\n\t" \ + "push %%r9\n\t" \ + /* save caller-saved registers (r10-11). */ \ + "push %%r10\n\t" \ + "push %%r11\n\t" \ + /* protecting registers? save callee-saved registers */ \ + /* and zero them out (r12-15). */ \ + /* TODO */ \ + "push %%r12\n\t" \ + "push %%r13\n\t" \ + /* backup source domain's stack/frame pointers */ \ + __ASM_BACKUP_TSB(tsb_comp ## key_from) \ + __ASM_UPDATE_TSB_TMP(tsb_comp ## key_from) \ + /* put parameters in registers */ \ + /* note: arg 1 & 2 via input constraints */ \ + /* note: arg 3 & 4 in r12-13 for now */ \ + /* note: arg 5 & 6 via asm("r8/9") */ \ + /* switch thread permissions */ \ + "1:\n\t" /* define local label */ \ + "movq %4, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %4, %%rax\n\t" \ + "jne 1b\n\t" /* ROP detected, re-do it */ \ + /* put parameters in final registers */ \ + "movq %%r12, %%rdx\n\t" \ + "movq %%r13, %%rcx\n\t" \ + /* we're ready, switch stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_to) \ + __ASM_ALIGN_AND_CALL(fname) \ + /* backup return value in rsi */ \ + "movq %%rax, %%rsi\n\t" \ + /* switch back thread permissions */ \ + "2:\n\t" /* define local label */ \ + "movq %3, %%rax\n\t" \ + "xor %%rcx, %%rcx\n\t" \ + "xor %%rdx, %%rdx\n\t" \ + "wrpkru\n\t" \ + "lfence\n\t" /* TODO necessary? */ \ + "cmpq %3, %%rax\n\t" \ + "jne 2b\n\t" /* ROP detected, re-do it */ \ + /* switch back the stack */ \ + __ASM_SWITCH_STACK(tsb_comp ## key_from) \ + __ASM_RESTORE_TSB(tsb_comp ## key_from) \ + /* protecting registers? restore callee-saved registers */ \ + /* TODO */ \ + "pop %%r13\n\t" \ + "pop %%r12\n\t" \ + /* restore caller-saved registers */ \ + "pop %%r11\n\t" \ + "pop %%r10\n\t" \ + /* restore parameter registers */ \ + "pop %%r9\n\t" \ + /* save return value from rsi (into r11) */ \ + "movq %%rsi, %%r11\n\t" \ + "pop %%r8\n\t" \ + "pop %%rdi\n\t" \ + "pop %%rsi\n\t" \ + \ + : /* output */ \ + "=r" (_ret), /* always %%r11 */ \ + "=m" (tsb_comp ## key_from), \ + "=m" (tsb_comp ## key_to) \ + : /* input */ \ + "i"(PKRU(key_from)), \ + "i"(PKRU(key_to)), \ + "D"((uint64_t)((arg1))), \ + "S"((uint64_t)((arg2))), \ + "r"(_arg3), \ + "r"(_arg4), \ + "r"(_arg5), \ + "r"(_arg6), \ + "r"(tid), \ + "i"(fname) \ + : /* clobbers */ \ + "rax", "rcx", "rdx", \ + "memory" /* TODO should we clobber memory? */ \ + ); \ + \ + /* this will be optimized by the compiler */ \ + if (sizeof(retval) == 1) /* 8 bit */ \ + asm volatile ( \ + "mov %%r11b, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + if (sizeof(retval) == 2) /* 16 bit */ \ + asm volatile ( \ + "mov %%r11w, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 4) /* 32 bit */ \ + asm volatile ( \ + "mov %%r11d, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ + else if (sizeof(retval) == 8) /* 64 bit */ \ + asm volatile ( \ + "mov %%r11, %0\n\t" \ + : /* output */ \ + "=m" (retval) \ + : /* input */ \ + "r"(_ret) \ + ); \ +} while (0) + +/* Multiplex depending on the number of arguments. + * TODO support more than 6 parameters (passed on the stack then...) + */ + +#define _flexos_intelpku_gate(N, key_from, key_to, fname, ...) \ +do { \ + UK_CTASSERT(N <= 6); \ + __flexos_intelpku_gate ## N (key_from, key_to, fname __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#define _flexos_intelpku_gate_r(N, key_from, key_to, retval, fname, ...)\ +do { \ + UK_CTASSERT(N <= 6); \ + __flexos_intelpku_gate ## N ## _r (key_from, key_to, retval, fname __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#endif +#endif /* FLEXOS_INTELPKU_IMPL_H */ diff --git a/lib/flexos-core/include/flexos/impl/intelpku.h b/lib/flexos-core/include/flexos/impl/intelpku.h new file mode 100644 index 0000000000..7dedee0c94 --- /dev/null +++ b/lib/flexos-core/include/flexos/impl/intelpku.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2020-2021, Pierre Olivier + * Hugo Lefeuvre + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FLEXOS_INTELPKU_H +#define FLEXOS_INTELPKU_H + +#include +#include +#include + +struct uk_alloc; + +/* Shared allocator */ +extern struct uk_alloc *flexos_shared_alloc; + +/* flexos_comp0_alloc is just an alias for the standard, default allocator: + * offers memory in domain zero. + */ +#define flexos_comp0_alloc _uk_alloc_head + +/* The toolchain will insert allocator declarations here, e.g.: + * + * extern struct uk_alloc *flexos_comp1_alloc; + * + * for compartment 1. + */ +/* __FLEXOS MARKER__: insert compartment allocator decls here. */ + +typedef enum { + PKU_RW, + PKU_RO, + PKU_NONE +} flexos_intelpku_perm; + +/* Set the key associated with passed set of pages to key */ +int flexos_intelpku_mem_set_key(void *page_boundary, uint64_t num_pages, uint8_t key); + +/* Get the key associated with passed page */ +int flexos_intelpku_mem_get_key(void *page_boundary); + +/* Set permission for a given key and update PKRU accordingly */ +int flexos_intelpku_set_perm(uint8_t key, flexos_intelpku_perm perm); + +/* Low level C wrapper for RDPKRU: return the current protection key or + * -ENOSPC if the CPU does not support PKU */ +__attribute__((always_inline)) static inline uint32_t rdpkru(void) +{ + uint32_t res; + asm volatile ( "xor %%ecx, %%ecx;" + "rdpkru;" + "movl %%eax, %0" : "=r"(res) :: "rax", "rdx", "ecx"); + + return res; +} + +/* Regarding the lfence here, see Spectre 1.1 paper, 'Speculative Buffer + * Overflows: Attacks and Defenses' */ +__attribute__((always_inline)) static inline void wrpkru(uint32_t val) +{ + /* FIXME FLEXOS: should we clobber "memory" here? */ + asm volatile ( "mov %0, %%eax;" + "xor %%ecx, %%ecx;" + "xor %%edx, %%edx;" + "wrpkru;" + "lfence" + :: "r"(val) : "eax", "ecx", "edx"); +} + +/* The following Coccinelle rule is very useful to find gate calls + * with more than 6 arguments. + * + * @@ + * expression list[n > 9] EL; + * type T; + * @@ + * + // invalid + * flexos_intelpku_gate(EL) + */ + +/* Nasty argument counting trick from + * https://stackoverflow.com/questions/4421681 */ +#define ELEVENTH_ARGUMENT(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, ...) a11 +#define COUNT_ARGUMENTS(...) ELEVENTH_ARGUMENT(dummy, ## __VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* flexos_intelpku_gate(1, 0, printf, "hello\n") + * -> execute printf("hello\n") is protection domain 0 */ +/* FIXME FLEXOS HUGE HACK! Disable gates for interrupt handlers. This + * is a huge security hole. DO NOT DO THAT IN PRACTICE. We only do it + * for SOSP because we don't have time for a proper fix and it doesn't + * impact performance. + */ +#define _eflexos_intelpku_gate(N, key_from, key_to, fname, ...) \ + _flexos_intelpku_gate(N, key_from, key_to, fname, ## __VA_ARGS__) +#define flexos_intelpku_gate(key_from, key_to, fname, ...) \ +do { \ + if (ukarch_read_sp() >= __INTRSTACK_START && \ + ukarch_read_sp() <= __END) { \ + fname(__VA_ARGS__); \ + } else { \ + _flexos_intelpku_gate_inst_in(key_from, key_to, #fname);\ + _eflexos_intelpku_gate(COUNT_ARGUMENTS(__VA_ARGS__), \ + key_from, key_to, fname, ## __VA_ARGS__); \ + _flexos_intelpku_gate_inst_out(key_from, key_to); \ + } \ +} while (0) + +#define _eflexos_intelpku_gate_r(N, key_from, key_to, retval, fname, ...)\ + _flexos_intelpku_gate_r(N, key_from, key_to, retval, fname, ## __VA_ARGS__) +/* second level of indirection is required to expand N */ +#define flexos_intelpku_gate_r(key_from, key_to, retval, fname, ...) \ +do { \ + if (ukarch_read_sp() >= __INTRSTACK_START && \ + ukarch_read_sp() <= __END) { \ + retval = fname(__VA_ARGS__); \ + } else { \ + _flexos_intelpku_gate_inst_in(key_from, key_to, #fname);\ + _eflexos_intelpku_gate_r(COUNT_ARGUMENTS(__VA_ARGS__), \ + key_from, key_to, retval, fname, ## __VA_ARGS__);\ + _flexos_intelpku_gate_inst_out(key_from, key_to); \ + } \ +} while (0) + +/* TODO FLEXOS this does no harm, but it's code duplication. + * Seems necessary to compile nginx. + */ +#ifndef flexos_nop_gate +#define flexos_nop_gate(key_from, key_to, func, ...) func(__VA_ARGS__) +#define flexos_nop_gate_r(key_from, key_to, ret, func, ...) ret = func(__VA_ARGS__) +#endif + +/* TODO FLEXOS: sharing or not the stack should be decided on a + * per-compartment basis, so this config option is certainly not the + * right way to do it. */ +#if !CONFIG_LIBFLEXOS_GATE_INTELPKU_SHARED_STACKS +struct uk_thread_status_block { + uint64_t sp; + uint64_t bp; +}; + +extern struct uk_thread_status_block tsb_comp0[32]; +/* __FLEXOS MARKER__: insert tsb extern decls here. */ +#endif + +/* Sanitize options a little bit more */ + +#if CONFIG_LIBFLEXOS_GATE_INTELPKU_SHARED_STACKS && CONFIG_LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS +#error "Shared stacks and private stacks options are incompatible!" +#endif /* CONFIG_LIBFLEXOS_GATE_INTELPKU_SHARED_STACKS && CONFIG_LIBFLEXOS_ENABLE_DSS */ + +#if CONFIG_LIBFLEXOS_GATE_INTELPKU_SHARED_STACKS && CONFIG_LIBFLEXOS_ENABLE_DSS +#error "Shared stacks and DSS options are incompatible!" +#endif /* CONFIG_LIBFLEXOS_GATE_INTELPKU_SHARED_STACKS && CONFIG_LIBFLEXOS_ENABLE_DSS */ + +#endif /* FLEXOS_INTELPKU_H */ diff --git a/lib/flexos-core/include/flexos/impl/main_annotation.h b/lib/flexos-core/include/flexos/impl/main_annotation.h new file mode 100644 index 0000000000..c7178dee28 --- /dev/null +++ b/lib/flexos-core/include/flexos/impl/main_annotation.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2021, Sebastian Rauch + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MAIN_ANNOTATION_H +#define MAIN_ANNOTATION_H + +/* make sure all necessary macros are defined */ +#ifndef FLEXOS_VMEPT_COMP_ID // this compartment +#error "FLEXOS_VMEPT_COMP_ID must be defined" +#endif +#ifndef FLEXOS_VMEPT_COMP_COUNT // total number of compartments +#error "FLEXOS_VMEPT_COMP_COUNT must be defined" +#endif +#ifndef FLEXOS_VMEPT_APPCOMP // compartment containing the app +#error "FLEXOS_VMEPT_APPCOMP must be defined" +#endif + +#if (FLEXOS_VMEPT_APPCOMP) == (FLEXOS_VMEPT_COMP_ID) +#define FLEXOS_VMEPT_MAIN_ANNOTATION __attribute__ ((section (".text_comp_exclusive"))) +#else +#define FLEXOS_VMEPT_MAIN_ANNOTATION __attribute__ ((section ("/DISCARD/"))) +#endif + +int main(int argc, char *argv[]) FLEXOS_VMEPT_MAIN_ANNOTATION; + +#endif /* MAIN_ANNOTATION_H */ diff --git a/lib/flexos-core/include/flexos/impl/typecheck.h b/lib/flexos-core/include/flexos/impl/typecheck.h new file mode 100644 index 0000000000..7e126eaa4e --- /dev/null +++ b/lib/flexos-core/include/flexos/impl/typecheck.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2021, Sebastian Rauch + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FLEXOS_TYPECHECK_H +#define FLEXOS_TYPECHECK_H + +/* integer types at most 8 bytes long (excluding pointers) */ +#define FLEXOS_TYPECLASS_INTEGER 1 + +/* floating point types at most 8 bytes long */ +#define FLEXOS_TYPECLASS_SSE 2 + +/* floating point types longer than 8 bytes */ +#define FLEXOS_TYPECLASS_SSE_EX 3 + +/* can be a pointer or a composite/aggregate type */ +#define FLEXOS_TYPECLASS_UNKNOWN 4 + +#define _flexos_typeclass_of(x) _Generic((x), \ + _Bool : FLEXOS_TYPECLASS_INTEGER, \ + signed char : FLEXOS_TYPECLASS_INTEGER, \ + unsigned char : FLEXOS_TYPECLASS_INTEGER, \ + short : FLEXOS_TYPECLASS_INTEGER, \ + unsigned short : FLEXOS_TYPECLASS_INTEGER, \ + int : FLEXOS_TYPECLASS_INTEGER, \ + unsigned int : FLEXOS_TYPECLASS_INTEGER, \ + long : FLEXOS_TYPECLASS_INTEGER, \ + unsigned long : FLEXOS_TYPECLASS_INTEGER, \ + long long : FLEXOS_TYPECLASS_INTEGER, \ + unsigned long long : FLEXOS_TYPECLASS_INTEGER, \ + float : FLEXOS_TYPECLASS_SSE, \ + double : FLEXOS_TYPECLASS_SSE, \ + long double : FLEXOS_TYPECLASS_SSE_EX, \ + default : FLEXOS_TYPECLASS_UNKNOWN) + +/* TYPECLASS_UNKNOWN of size sizeof(void*) is assumed + * to be a pointer. Note that this might be incorrect + * since any struct of that size also is classified as such. + * When used only via flexos_heuristic_typeclass_of, no such + * miscalssification should happen because structs cause errors in + * _force_array_decay_or_error. */ +#define _felxos_classify(tc, s) \ +(((tc) == FLEXOS_TYPECLASS_INTEGER) || ((tc) == FLEXOS_TYPECLASS_SSE) ||\ +((tc) == FLEXOS_TYPECLASS_SSE_EX)) ? (tc) : ((s) == sizeof(void*) ? \ +FLEXOS_TYPECLASS_INTEGER : FLEXOS_TYPECLASS_UNKNOWN) + +/* forces arrays to decay to pointers, + * doesn't change basic types (char, int, float, pointers, ...) + * and results in an error for structs */ +#define _force_array_decay_or_error(x) ((x) + 0) + +/* Note: if this causes an error, this might indicate that some + * unsupported type (e.g. a struct) was passed as argument to a gate */ +#define flexos_heuristic_typeclass_of(x) \ +_felxos_classify(_flexos_typeclass_of(_force_array_decay_or_error(x)), \ +sizeof(_force_array_decay_or_error(x))) + +#define flexos_is_heuristic_typeclass_integer(x) \ +(flexos_heuristic_typeclass_of(x) == FLEXOS_TYPECLASS_INTEGER) + +#endif diff --git a/lib/flexos-core/include/flexos/impl/vmept.h b/lib/flexos-core/include/flexos/impl/vmept.h new file mode 100644 index 0000000000..3afb0e8e3a --- /dev/null +++ b/lib/flexos-core/include/flexos/impl/vmept.h @@ -0,0 +1,705 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2021, Hugo Lefeuvre + * Stefan Teodorescu + * Sebastian Rauch + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FLEXOS_VMEPT_H +#define FLEXOS_VMEPT_H + +#include "typecheck.h" + +#include +#include +#include +#include +#include + +#include + +/* make sure all necessary macros are defined */ +#ifndef FLEXOS_VMEPT_COMP_ID // this compartment +#error "FLEXOS_VMEPT_COMP_ID must be defined" +#endif +#ifndef FLEXOS_VMEPT_COMP_COUNT // total number of compartments +#error "FLEXOS_VMEPT_COMP_COUNT must be defined" +#endif +#ifndef FLEXOS_VMEPT_APPCOMP // compartment containing the app +#error "FLEXOS_VMEPT_APPCOMP must be defined" +#endif + +extern volatile uint8_t flexos_vmept_comp_id; + + +/* to enable/disable debug prints */ +#define FLEXOS_VMEPT_DEBUG 0 +#define FLEXOS_VMEPT_DEBUG_PRINT_ADDR 0 + +#if FLEXOS_VMEPT_DEBUG + #include + #define FLEXOS_VMEPT_DEBUG_PRINT(x) printf x +#else + #define FLEXOS_VMEPT_DEBUG_PRINT(x) +#endif + + +struct uk_alloc; + +/* Shared allocator */ +extern struct uk_alloc *flexos_shared_alloc; +/* flexos_comp0_alloc is just an alias for the standard, default allocator: + * offers memory in domain zero. + */ +#define flexos_comp0_alloc _uk_alloc_head + +#define FLEXOS_VMEPT_MAX_THREADS 256 +#define FLEXOS_VMEPT_MAX_COMPS 16 + +/* we could use FLEXOS_VMEPT_COMP_COUNT instead of FLEXOS_VMEPT_MAX_COMPS, but this produces variable alignment */ +#define FLEXOS_VMEPT_THREAD_MAP_SIZE (FLEXOS_VMEPT_MAX_COMPS * FLEXOS_VMEPT_MAX_THREADS) + +struct flexos_vmept_thread_map { + struct uk_thread *threads[FLEXOS_VMEPT_THREAD_MAP_SIZE]; +}; + +static inline void __attribute__((always_inline)) flexos_vmept_thread_map_init(struct flexos_vmept_thread_map *map) +{ + for (size_t i = 0; i < FLEXOS_VMEPT_THREAD_MAP_SIZE; ++i) { + map->threads[i] = NULL; + } +} + +static inline struct __attribute__((always_inline)) uk_thread *flexos_vmept_thread_map_lookup( +const struct flexos_vmept_thread_map *map, uint8_t comp_id, uint8_t local_tid) +{ + return map->threads[comp_id * FLEXOS_VMEPT_MAX_THREADS + local_tid]; +} + +static inline void __attribute__((always_inline)) flexos_vmept_thread_map_put( + struct flexos_vmept_thread_map *map, uint8_t comp_id, uint8_t local_tid, struct uk_thread *thread_ptr) +{ + map->threads[comp_id * FLEXOS_VMEPT_MAX_THREADS + local_tid] = thread_ptr; +} + +/* master rpc states are integers and have two parts: + * the lower 8 bits encode the state constant (see below) + * the higher 24 bits encode a value that has a state-dependent meaning */ +#define FLEXOS_VMEPT_MASTER_RPC_STATE_IDLE 0 +#define FLEXOS_VMEPT_MASTER_RPC_STATE_CALLED 1 +#define FLEXOS_VMEPT_MASTER_RPC_STATE_RETURNED 2 + +#define FLEXOS_VMEPT_MASTER_RPC_STATE_CONSTANT_MASK 0x000000ff +#define FLEXOS_VMEPT_MASTER_RPC_VALUE_PART_MASK 0xffffff00 + +#define FLEXOS_VMEPT_BUILD_MASTER_RPC_STATE(state_constant, value) \ +(((value) << 8) | ((state_constant) & FLEXOS_VMEPT_MASTER_RPC_STATE_CONSTANT_MASK)) + +#define FLEXOS_VMEPT_MASTER_RPC_STATE_EXTRACT_VALUE(state) \ +((state) >> 8) + +/* a return code other than 0 signals an error */ +#define FLEXOS_VMEPT_BUILD_MASTER_RPC_RETURN_STATE(ret_code) \ +FLEXOS_VMEPT_BUILD_MASTER_RPC_STATE(FLEXOS_VMEPT_MASTER_RPC_STATE_RETURNED, ret_code) + +#define FLEXOS_VMEPT_MASTER_RPC_ACTION_CREATE 1 +#define FLEXOS_VMEPT_MASTER_RPC_ACTION_DESTROY 2 + +/* (maximum) size for flexos_vmept_master_rpc_ctrl and flexos_vmept_rpc_ctrl */ +#define FLEXOS_VMEPT_RPC_CTRL_SIZE 256 + +// TODO: ensure maximum size of 256 bytes +struct flexos_vmept_master_rpc_ctrl { + /* to make sure access is atomic always align at 8 byte boundary */ + int lock __attribute__ ((aligned (8))); + int initialized; + int state; + uint8_t action; + uint8_t from; + uint8_t to; + int local_tid; // tid of the normal thread created +}; + +static inline void __attribute__((always_inline)) flexos_vmept_init_master_rpc_ctrl(struct flexos_vmept_master_rpc_ctrl *ctrl) +{ + ctrl->lock = 0; + ctrl->state = FLEXOS_VMEPT_MASTER_RPC_STATE_IDLE; + ctrl->initialized = 1; + FLEXOS_VMEPT_DEBUG_PRINT(("Initialized master ctrl at %p.\n", ctrl)); +} +/* rpc states are integers and have two parts: + * the lower 8 bits encode the state constant (see below) + * the higher 24 bits encode a value that has a state-dependent meaning */ + +#define FLEXOS_VMEPT_RPC_STATE_IDLE 0 +#define FLEXOS_VMEPT_RPC_STATE_FROZEN 1 +#define FLEXOS_VMEPT_RPC_STATE_CALLED 2 +#define FLEXOS_VMEPT_RPC_STATE_RETURNED 3 + +#define FLEXOS_VMEPT_RPC_STATE_CONSTANT_MASK 0x000000ff +#define FLEXOS_VMEPT_RPC_VALUE_PART_MASK 0xffffff00 + +#define FLEXOS_VMEPT_BUILD_RPC_STATE(state_constant, value) \ +(((value) << 8) | ((state_constant) & FLEXOS_VMEPT_RPC_STATE_CONSTANT_MASK)) + +#define FLEXOS_VMEPT_RPC_STATE_EXTRACT_VALUE(state) \ +((state) >> 8) + +#define FLEXOS_VMEPT_FINFO_ARGC_MASK 0x00ff +#define FLEXOS_VMEPT_FINFO_RET_MASK 0xff00 + +#define FLEXOS_VMEPT_BUILD_FINFO(argc, returns_val) \ +((argc & FLEXOS_VMEPT_FINFO_ARGC_MASK) | ((returns_val << 8 ) & FLEXOS_VMEPT_FINFO_RET_MASK)) + +#define FLEXOS_VMEPT_FINFO_EXTRACT_ARGC(finfo) \ +((finfo) & FLEXOS_VMEPT_FINFO_ARGC_MASK) + +#define FLEXOS_VMEPT_FINFO_EXTRACT_RET(finfo) \ +(((finfo) & FLEXOS_VMEPT_FINFO_RET_MASK) >> 8) + +// TODO: esure maximum size of 256 bytes +struct flexos_vmept_rpc_ctrl { + uint64_t extended_state __attribute__ ((aligned (8))); + int recursion; + void *f_ptr; + uint64_t f_info; + uint64_t parameters[6]; + uint64_t ret; +}; + +/* All shared memory areas below are hardcoded for now to these values. For the + * memory sharing mechanism itself, we use our own shared memory device in QEMU + * which receives addresses and sizes as parameters for multiple memory areas. + * + * TODO: These addresses and sizes should be defined by the toolchain (both + * here and as parameters when running QEMU). + */ + +/* TODO: adapt for up to 256 compartments (?) + * currently it supports a maximum of 16 */ +#define FLEXOS_VMEPT_RPC_PAGES_ADDR 0x800000000 +#define FLEXOS_VMEPT_RPC_PAGES_SIZE ((size_t) 16 * 256 * 256) // FIXME: use correct size +//#define FLEXOS_VMEPT_RPC_PAGES_SIZE ((size_t) 16 * 256 * 256 + 16 * 256) + +/* This memory area is used for the shared heap. */ +#define FLEXOS_VMEPT_SHARED_MEM_ADDR 0x4000000000 +#define FLEXOS_VMEPT_SHARED_MEM_SIZE ((size_t) 128 * 1024 * 1024) + +/* The shared_data section between all Unikraft binaries. The loader places + * this section directly in the shared memory, so all compartments access the + * same thing. + */ +#define FLEXOS_VMEPT_SHARED_DATA_ADDR __SHARED_START +#define FLEXOS_VMEPT_SHARED_DATA_SIZE ((size_t) __SHARED_END - __SHARED_START) + +/* the maximum number of parameters of a vmept gate + * this is because of the calling convention + * don't simply change this number */ +#define FLEXOS_VMEPT_MAX_PARAMS 6 + +extern unsigned long shmem_rpc_page; + +#define FLEXOS_VMEPT_MASTER_RPC_CTRL_BLOCK_START ((uint8_t *) shmem_rpc_page) +#define FLEXOS_VMEPT_MASTER_RPC_CTRL_BLOCK_SIZE ((FLEXOS_VMEPT_RPC_CTRL_SIZE) * (FLEXOS_VMEPT_MAX_COMPS)) + +#define FLEXOS_VMEPT_RPC_CTRL_BLOCK_START (((uint8_t *) shmem_rpc_page) + FLEXOS_VMEPT_MASTER_RPC_CTRL_BLOCK_SIZE) +#define FLEXOS_VMEPT_RPC_CTRL_BLOCK_SIZE ((FLEXOS_VMEPT_MAX_COMPS) * (FLEXOS_VMEPT_MAX_THREADS) * (FLEXOS_VMEPT_RPC_CTRL_SIZE)) + +#define flexos_vmept_master_rpc_ctrl(comp_id) \ +(volatile struct flexos_vmept_master_rpc_ctrl*) ((FLEXOS_VMEPT_MASTER_RPC_CTRL_BLOCK_START) + (comp_id) * (FLEXOS_VMEPT_RPC_CTRL_SIZE)) + +/* from the compartment ID of the normal thread an its local tid get the rpc_ctrl struct to listen to */ +#define flexos_vmept_rpc_ctrl(comp_id, local_tid) \ +(volatile struct flexos_vmept_rpc_ctrl*) (FLEXOS_VMEPT_RPC_CTRL_BLOCK_START + (comp_id) * FLEXOS_VMEPT_MAX_THREADS * FLEXOS_VMEPT_RPC_CTRL_SIZE + FLEXOS_VMEPT_RPC_CTRL_SIZE * local_tid) + +/* unique per thread and compartment */ +#define flexos_vmept_build_lock_value(local_tid) \ +((1 << 16) | (flexos_vmept_comp_id << 8) | ((local_tid) & 0xff)) + +#define flexos_vmept_master_rpc_call_create(key_from, key_to, local_tid) \ +flexos_vmept_master_rpc_call((key_from), (key_to), (local_tid), FLEXOS_VMEPT_MASTER_RPC_ACTION_CREATE) + +#define flexos_vmept_master_rpc_call_destroy(key_from, key_to, local_tid) \ +flexos_vmept_master_rpc_call((key_from), (key_to), (local_tid), FLEXOS_VMEPT_MASTER_RPC_ACTION_DESTROY) + +static inline __attribute__((always_inline)) void flexos_vmept_init_master_rpc_ctrls() +{ + for (size_t i = 0; i < FLEXOS_VMEPT_MASTER_RPC_CTRL_BLOCK_SIZE; ++i) { + ((uint8_t *) FLEXOS_VMEPT_MASTER_RPC_CTRL_BLOCK_START)[i] = 0; + } + FLEXOS_VMEPT_DEBUG_PRINT(("Zero-initialized master rpc control data structures.\n")); +} + +int flexos_vmept_master_rpc_call(uint8_t key_from, uint8_t key_to, uint8_t local_tid, uint8_t action); + +int flexos_vmept_master_rpc_call_main(uint8_t key_from, uint8_t key_to, uint8_t local_tid, uint8_t action); + +void flexos_vmept_wait_for_rpc(); + +void flexos_vmept_master_rpc_loop(); +void flexos_vmept_rpc_loop(); + +struct uk_thread; + + +static inline __attribute__((always_inline)) int flexos_vmept_extract_state(uint64_t extended_state) +{ + return (int) extended_state; +} + +static inline __attribute__((always_inline)) uint8_t flexos_vmept_extract_key_from(uint64_t extended_state) +{ + return (uint8_t) ((extended_state >> 32) & 0xff); +} + +static inline __attribute__((always_inline)) uint8_t flexos_vmept_extract_key_to(uint64_t extended_state) +{ + return (uint8_t) ((extended_state >> 40) & 0xff); +} + +/* to facilitate debugging */ +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_set_extended_state( + volatile struct flexos_vmept_rpc_ctrl *ctrl, int state, uint8_t key_from, uint8_t key_to) +{ + uint64_t ext_state = (((uint64_t) key_from) << 32) | (((uint64_t) key_to) << 40) | ((uint32_t) state); + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d setting extended_state to %016lx.\n", ctrl, flexos_vmept_comp_id, ext_state)); + ctrl->extended_state = ext_state; +} + +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_set_state( + volatile struct flexos_vmept_rpc_ctrl *ctrl, int state) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d setting state to %d.\n", ctrl, flexos_vmept_comp_id, state)); + uint8_t from = flexos_vmept_extract_key_from(ctrl->extended_state); + uint8_t to = flexos_vmept_extract_key_to(ctrl->extended_state); + flexos_vmept_ctrl_set_extended_state(ctrl, state, from, to); +} + +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_inc_recursion( + volatile struct flexos_vmept_rpc_ctrl *ctrl) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d inc recursion from %d to %d.\n", ctrl, flexos_vmept_comp_id, + ctrl->recursion, ctrl->recursion + 1)); + ctrl->recursion++; +} + +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_dec_recursion( + volatile struct flexos_vmept_rpc_ctrl *ctrl) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d dec recursion from %d to %d.\n", ctrl, flexos_vmept_comp_id, + ctrl->recursion, ctrl->recursion - 1)); + ctrl->recursion--; +} + +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_set_func( + volatile struct flexos_vmept_rpc_ctrl *ctrl, void *fptr, uint8_t argc, uint8_t returns_val) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d setting f_ptr to %p and f_info to %016lx.\n", ctrl, flexos_vmept_comp_id, + fptr, FLEXOS_VMEPT_BUILD_FINFO(argc, returns_val))); + ctrl->f_ptr = fptr; + ctrl->f_info = FLEXOS_VMEPT_BUILD_FINFO(argc, returns_val); +} + +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_set_ret( + volatile struct flexos_vmept_rpc_ctrl *ctrl, uint64_t ret) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d setting ret to %016lx.\n", ctrl, flexos_vmept_comp_id, ret)); + ctrl->ret = ret; +} + +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_set_args1( + volatile struct flexos_vmept_rpc_ctrl *ctrl, uint64_t arg1) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d setting %d args (%016lx).\n", + ctrl, flexos_vmept_comp_id, 1, arg1)); + ctrl->parameters[0] = arg1; +} + +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_set_args2( + volatile struct flexos_vmept_rpc_ctrl *ctrl, uint64_t arg1, uint64_t arg2) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d setting %d args (%016lx, %016lx).\n", + ctrl, flexos_vmept_comp_id, 2, arg1, arg2)); + ctrl->parameters[0] = arg1; + ctrl->parameters[1] = arg2; +} + +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_set_args3( + volatile struct flexos_vmept_rpc_ctrl *ctrl, + uint64_t arg1, uint64_t arg2, uint64_t arg3) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d setting %d args (%016lx, %016lx, %016lx).\n", + ctrl, flexos_vmept_comp_id, 3, arg1, arg2, arg3)); + ctrl->parameters[0] = arg1; + ctrl->parameters[1] = arg2; + ctrl->parameters[2] = arg3; +} + +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_set_args4( + volatile struct flexos_vmept_rpc_ctrl *ctrl, + uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d setting %d args (%016lx, %016lx, %016lx, %016lx).\n", + ctrl, flexos_vmept_comp_id, 4, arg1, arg2, arg3, arg4)); + ctrl->parameters[0] = arg1; + ctrl->parameters[1] = arg2; + ctrl->parameters[2] = arg3; + ctrl->parameters[3] = arg4; +} + +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_set_args5( + volatile struct flexos_vmept_rpc_ctrl *ctrl, + uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d setting %d args (%016lx, %016lx, %016lx, %016lx, %016lx).\n", + ctrl, flexos_vmept_comp_id, 5, arg1, arg2, arg3, arg4, arg5)); + ctrl->parameters[0] = arg1; + ctrl->parameters[1] = arg2; + ctrl->parameters[2] = arg3; + ctrl->parameters[3] = arg4; + ctrl->parameters[4] = arg5; +} + +static inline __attribute__((always_inline)) void flexos_vmept_ctrl_set_args6( + volatile struct flexos_vmept_rpc_ctrl *ctrl, + uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(ctrl %p) comp %d setting %d args (%016lx, %016lx, %016lx, %016lx, %016lx, %016lx).\n", + ctrl, flexos_vmept_comp_id, 6, arg1, arg2, arg3, arg4, arg5, arg6)); + ctrl->parameters[0] = arg1; + ctrl->parameters[1] = arg2; + ctrl->parameters[2] = arg3; + ctrl->parameters[3] = arg4; + ctrl->parameters[4] = arg5; + ctrl->parameters[5] = arg6; +} + +static inline __attribute__((always_inline)) void flexos_vmept_init_rpc_ctrl(struct flexos_vmept_rpc_ctrl *ctrl) +{ + ctrl->recursion = 0; + // key_from and _key_to are set to 0 + ctrl->extended_state = FLEXOS_VMEPT_RPC_STATE_IDLE; +} + +#define flexos_vmept_gate0(key_from, key_to, fptr) \ +do { \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 0, 0); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate0_r(key_from, key_to, retval, fptr) \ +do { \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 0, 1); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + (retval) = _gate_internal_ctrl->ret; \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate1(key_from, key_to, fptr, arg1) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 1, 0); \ + flexos_vmept_ctrl_set_args1(_gate_internal_ctrl, (uint64_t) arg1); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate1_r(key_from, key_to, retval, fptr, arg1) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 1, 1); \ + flexos_vmept_ctrl_set_args1(_gate_internal_ctrl, (uint64_t) arg1); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + (retval) = _gate_internal_ctrl->ret; \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate2(key_from, key_to, fptr, arg1, arg2) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg2)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 2, 0); \ + flexos_vmept_ctrl_set_args2(_gate_internal_ctrl, (uint64_t) arg1, (uint64_t) arg2); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate2_r(key_from, key_to, retval, fptr, arg1, arg2) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg2)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 2, 1); \ + flexos_vmept_ctrl_set_args2(_gate_internal_ctrl, (uint64_t) arg1, (uint64_t) arg2); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + (retval) = _gate_internal_ctrl->ret; \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate3(key_from, key_to, fptr, arg1, arg2, arg3) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg2)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg3)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 3, 0); \ + flexos_vmept_ctrl_set_args3(_gate_internal_ctrl, (uint64_t) arg1, (uint64_t) arg2, \ + (uint64_t) arg3); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate3_r(key_from, key_to, retval, fptr, arg1, arg2, arg3) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg2)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg3)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 3, 1); \ + flexos_vmept_ctrl_set_args3(_gate_internal_ctrl, (uint64_t) arg1, (uint64_t) arg2, \ + (uint64_t) arg3); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + (retval) = _gate_internal_ctrl->ret; \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate4(key_from, key_to, fptr, arg1, arg2, arg3, arg4) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg2)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg3)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg4)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 4, 0); \ + flexos_vmept_ctrl_set_args4(_gate_internal_ctrl, (uint64_t) arg1, (uint64_t) arg2, \ + (uint64_t) arg3, (uint64_t) arg4); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate4_r(key_from, key_to, retval, fptr, arg1, arg2, arg3, arg4) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg2)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg3)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg4)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 4, 1); \ + flexos_vmept_ctrl_set_args4(_gate_internal_ctrl, (uint64_t) arg1, (uint64_t) arg2, \ + (uint64_t) arg3, (uint64_t) arg4); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + (retval) = _gate_internal_ctrl->ret; \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate5(key_from, key_to, fptr, arg1, arg2, arg3, arg4, arg5) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg2)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg3)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg4)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg5)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 5, 0); \ + flexos_vmept_ctrl_set_args5(_gate_internal_ctrl, (uint64_t) arg1, (uint64_t) arg2, \ + (uint64_t) arg3, (uint64_t) arg4, (uint64_t) arg5); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate5_r(key_from, key_to, retval, fptr, arg1, arg2, arg3, arg4, \ + arg5) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg2)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg3)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg4)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg5)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 5, 1); \ + flexos_vmept_ctrl_set_args5(_gate_internal_ctrl, (uint64_t) arg1, (uint64_t) arg2, \ + (uint64_t) arg3, (uint64_t) arg4, (uint64_t) arg5); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + (retval) = _gate_internal_ctrl->ret; \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate6(key_from, key_to, fptr, arg1, arg2, arg3, arg4, arg5, arg6) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg2)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg3)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg4)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg5)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg6)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 6, 0); \ + flexos_vmept_ctrl_set_args6(_gate_internal_ctrl, (uint64_t) arg1, (uint64_t) arg2, \ + (uint64_t) arg3, (uint64_t) arg4, (uint64_t) arg5, (uint64_t) arg6); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +#define flexos_vmept_gate6_r(key_from, key_to, retval, fptr, arg1, arg2, arg3, arg4, \ + arg5, arg6) \ +do { \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg1)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg2)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg3)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg4)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg5)); \ + UK_CTASSERT(flexos_is_heuristic_typeclass_integer(arg6)); \ + volatile struct flexos_vmept_rpc_ctrl *_gate_internal_ctrl = uk_thread_current()->ctrl; \ + UK_ASSERT(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + flexos_vmept_ctrl_set_func(_gate_internal_ctrl, (void *) fptr, 6, 1); \ + flexos_vmept_ctrl_set_args6(_gate_internal_ctrl, (uint64_t) arg1, (uint64_t) arg2, \ + (uint64_t) arg3, (uint64_t) arg4, (uint64_t) arg5, (uint64_t) arg6); \ + flexos_vmept_ctrl_inc_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_extended_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_CALLED, key_from, key_to); \ + flexos_vmept_wait_for_rpc(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); \ + (retval) = _gate_internal_ctrl->ret; \ + flexos_vmept_ctrl_dec_recursion(_gate_internal_ctrl); \ + flexos_vmept_ctrl_set_state(_gate_internal_ctrl, FLEXOS_VMEPT_RPC_STATE_IDLE); \ +} while (0) + +/* A variation of the argument counting trick to choose the right gate based on the + * number of arguments given. Don't use more than 6 arguments! */ +#define CHOOSE_GATE(dummy, g6, g5, g4, g3, g2, g1, g0, ...) g0 + +/* flexos_vmept_gate(1, 0, printf, "hello\n") + * -> execute printf("hello\n") is protection domain (VM) 0 */ +#define flexos_vmept_gate(key_from, key_to, fname, ...) \ +CHOOSE_GATE(dummy, ## __VA_ARGS__, \ + flexos_vmept_gate6(key_from, key_to, &(fname), __VA_ARGS__), \ + flexos_vmept_gate5(key_from, key_to, &(fname), __VA_ARGS__), \ + flexos_vmept_gate4(key_from, key_to, &(fname), __VA_ARGS__), \ + flexos_vmept_gate3(key_from, key_to, &(fname), __VA_ARGS__), \ + flexos_vmept_gate2(key_from, key_to, &(fname), __VA_ARGS__), \ + flexos_vmept_gate1(key_from, key_to, &(fname), __VA_ARGS__), \ + flexos_vmept_gate0(key_from, key_to, &(fname)) \ +) + +#define flexos_vmept_gate_r(key_from, key_to, retval, fname, ...) \ +CHOOSE_GATE(dummy, ## __VA_ARGS__, \ + flexos_vmept_gate6_r(key_from, key_to, retval, &(fname), __VA_ARGS__), \ + flexos_vmept_gate5_r(key_from, key_to, retval, &(fname), __VA_ARGS__), \ + flexos_vmept_gate4_r(key_from, key_to, retval, &(fname), __VA_ARGS__), \ + flexos_vmept_gate3_r(key_from, key_to, retval, &(fname), __VA_ARGS__), \ + flexos_vmept_gate2_r(key_from, key_to, retval, &(fname), __VA_ARGS__), \ + flexos_vmept_gate1_r(key_from, key_to, retval, &(fname), __VA_ARGS__), \ + flexos_vmept_gate0_r(key_from, key_to, retval, &(fname)) \ +) + +#endif /* FLEXOS_VMEPT_H */ diff --git a/lib/flexos-core/include/flexos/isolation.h b/lib/flexos-core/include/flexos/isolation.h new file mode 100644 index 0000000000..f25567cecf --- /dev/null +++ b/lib/flexos-core/include/flexos/isolation.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2021, Hugo Lefeuvre + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FLEXOS_H +#define FLEXOS_H + +#include + +/* Enable/Disable Intel MPK/PKU support */ +#if CONFIG_LIBFLEXOS_INTELPKU +#include +#include +#else +/* If we build with these gates without CONFIG_LIBFLEXOS_INTELPKU + * then there is a configuration mistake */ +#define flexos_intelpku_gate(...) UK_CTASSERT(0) +#define flexos_intelpku_gate_r(...) UK_CTASSERT(0) +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + +/* Enable/Disable VM/EPT support */ +#if CONFIG_LIBFLEXOS_VMEPT +#include +#else +/* If we build with these gates without CONFIG_LIBFLEXOS_VMEPT + * then there is a configuration mistake */ +#define flexos_vmept_gate(...) UK_CTASSERT(0) +#define flexos_vmept_gate_r(...) UK_CTASSERT(0) +#endif /* CONFIG_LIBFLEXOS_VMEPT */ + +/* Build with function call instanciation (debugging) */ +#if (!CONFIG_LIBFLEXOS_INTELPKU && !CONFIG_LIBFLEXOS_VMEPT) +#include +#define flexos_shared_alloc _uk_alloc_head +#define flexos_comp0_alloc _uk_alloc_head +#endif /* (!CONFIG_LIBFLEXOS_INTELPKU && !CONFIG_LIBFLEXOS_VMEPT) */ + +/* Do not build with gate placeholders. These should be replaced by the + * toolchain before build. Encountering them at build time is almost + * certainly a bug. + */ +#define flexos_gate_r(...) UK_CTASSERT(0) +#define flexos_gate(...) UK_CTASSERT(0) +#define flexos_malloc_whitelist(...) UK_CTASSERT(0) +#define flexos_calloc_whitelist(...) UK_CTASSERT(0) +#define flexos_palloc_whitelist(...) UK_CTASSERT(0) +#define flexos_free_whitelist(...) UK_CTASSERT(0) + +/* NOP gate, this is just a function call. This gate is inserted whenever + * a cross-microlibrary call is realized within a compartment. + */ +#define flexos_nop_gate(key_from, key_to, func, ...) func(__VA_ARGS__) +#define flexos_nop_gate_r(key_from, key_to, ret, func, ...) ret = func(__VA_ARGS__) + +#include + +#endif /* FLEXOS_H */ diff --git a/lib/flexos-core/include/flexos/literals.h b/lib/flexos-core/include/flexos/literals.h new file mode 100644 index 0000000000..98c9230316 --- /dev/null +++ b/lib/flexos-core/include/flexos/literals.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2021, Hugo Lefeuvre + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef FLEXOS_LITERALS_H +#define FLEXOS_LITERALS_H + +/* Je ne sais pourquoi \ Mon esprit amer + * D'une aile inquiΓ¨te et folle vole sur la mer. + * + * Beautiful. We have a problem with string literals. The compiler always stores + * them in the library's .rodata section, meaning that it lands into .data_comp1 + * if the lib is isolated. Meaning that this call + * + * flexos_intelpku_gate(1, 0, uk_pr_info, "Hello!"); + * + * would crash even though gates are properly inserted. In a perfect world we'd + * have a compiler pass that detects this and puts the literal in a shared + * section. But "La vie est bien sΓ©vΓ¨re" and we don't have time for this. + * + * De la douceur, de la douceur, de la douceur ! + * + * Here is a macro that you can use to force the compiler to share the string + * literal, e.g.: + * + * flexos_intelpku_gate(1, 0, uk_pr_info, FLEXOS_SHARED_LITERAL("Hello!")); + */ +#define FLEXOS_SHARED_LITERAL(str) ( \ + { \ + static char __attribute__((section(".data_shared"))) \ + __str[] = str; __str; \ + } \ +) + +#endif /* FLEXOS_LITERALS_H */ diff --git a/lib/flexos-core/intelpku.c b/lib/flexos-core/intelpku.c new file mode 100644 index 0000000000..1716b006de --- /dev/null +++ b/lib/flexos-core/intelpku.c @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2020-2021, Pierre Olivier + * Hugo Lefeuvre + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +/* NOTE: no need to check that PKE has been enabled in CR4, this is guaranteed + * by the platform code as long as CONFIG_HAVE_X86PKU is set. */ +UK_CTASSERT(CONFIG_HAVE_X86PKU); + +#define PKEY_MASK (~(PAGE_PROT_PKEY0 | PAGE_PROT_PKEY1 | PAGE_PROT_PKEY2 | \ + PAGE_PROT_PKEY3)) + +#define CLEAR_PKEY(prot) (prot & PKEY_MASK) +#define INSTALL_PKEY(prot, pkey) (prot | pkey) + +#define __PTE_PKEY_MASK (~(_PAGE_PKEY0 | _PAGE_PKEY1 | _PAGE_PKEY2 | _PAGE_PKEY3)) +#define GET_PKEY_FROM_PTE(pte) ((pte & ~__PTE_PKEY_MASK) >> 59) + +#if CONFIG_LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS +/* Mark it "used" as it might potentially only be used in inline assembly */ +struct uk_thread_status_block tsb_comp0[32] __attribute__((used)); +/* The toolchain will insert TSB declarations here, e.g.: + * + * struct uk_thread_status_block tsb_comp1[32] __section(".data_comp1"); + * + * for compartment 1. + */ +/* __FLEXOS MARKER__: insert tsb decls here. */ +#endif /* CONFIG_LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS */ + +#if CONFIG_LIBFLEXOS_INTELPKU_COUNT_GATE_EXECUTIONS +volatile unsigned long flexos_intelpku_in_gate_counter __section(".data_shared") = 0; +volatile unsigned long flexos_intelpku_out_gate_counter __section(".data_shared") = 0; +#endif /* CONFIG_LIBFLEXOS_COUNT_GATE_EXECUTIONS */ + +/* Set RO permission for key 'key' in variable val intented to be used as a + * PKRU value */ +static int pkru_set_ro(uint8_t key, uint32_t *val) +{ + if(key > 15) + return -EINVAL; + + *val &= ~(1UL << (key * 2)); + *val |= 1UL << ((key*2) + 1); + + return 0; +} + +/* Set all (RW) permissions for key 'key' in variable val intented to be used + * as a PKRU value */ +static int pkru_set_rw(uint8_t key, uint32_t *val) +{ + if(key > 15) + return -EINVAL; + + *val &= ~(1UL << (key*2)); + *val &= ~(1UL << ((key*2) + 1)); + return 0; +} + +/* Set no access permission for key 'key' in variable val intented to be used + * as a PKRU value. Remember that PKU does not fault on instruction fetch */ +static int pkru_set_no_access(uint8_t key, uint32_t *val) +{ + if(key > 15) + return -EINVAL; + + *val |= 1UL << (key * 2); + *val |= 1UL << ((key * 2) + 1); + return 0; +} + +/* Set the key associated with passed set of pages to key */ +int flexos_intelpku_mem_set_key(void *paddr, uint64_t npages, uint8_t key) +{ + unsigned long prot, pte, pkey = 0; + int err = 0; + + /* convert key to be stored in pkey (usually the same as + * pkey = key << 4) */ + if (key & 0x01) + pkey |= PAGE_PROT_PKEY0; + if (key & 0x02) + pkey |= PAGE_PROT_PKEY1; + if (key & 0x04) + pkey |= PAGE_PROT_PKEY2; + if (key & 0x08) + pkey |= PAGE_PROT_PKEY3; + + /* treat each page separately; we don't want to loose W/X permissions */ + for (uint64_t i = 0; i < npages; i++) { + pte = uk_virt_to_pte((unsigned long) paddr + i * PAGE_SIZE); + if (pte == PAGE_NOT_MAPPED) { + uk_pr_info("error: page not mapped (%p)\n", paddr); + err = -1; + break; + } + + /* retrieve current page protections */ + prot = pte |= (PAGE_PROT_WRITE & PAGE_PROT_EXEC); + /* clear current pkey */ + prot = CLEAR_PKEY(prot); + /* install new pkey */ + prot = INSTALL_PKEY(prot, pkey); + /* set new page protections */ + err = uk_page_set_prot((unsigned long) paddr + i * PAGE_SIZE, prot); + + if (err) { + uk_pr_info("error: unable to set protections\n"); + break; + } + } + + return err; +} + +/* Get the key associated with passed page */ +int flexos_intelpku_mem_get_key(void *paddr) +{ + unsigned long pte; + + pte = uk_virt_to_pte((unsigned long) paddr); + if (pte == PAGE_NOT_MAPPED) { + return -1; + } + + return GET_PKEY_FROM_PTE(pte); +} + +/* Set the permission of the calling thread to 'perm' for the key 'key', update + * the PKRU in the process. */ +int flexos_intelpku_set_perm(uint8_t key, flexos_intelpku_perm perm) +{ + uint32_t pkru; + + pkru = rdpkru(); + + switch(perm) { + case PKU_RW: + pkru_set_rw(key, &pkru); + break; + + case PKU_RO: + pkru_set_ro(key, &pkru); + break; + + case PKU_NONE: + pkru_set_no_access(key, &pkru); + break; + + default: + return -EINVAL; + } + + wrpkru(pkru); + return 0; +} diff --git a/lib/flexos-core/vmept.c b/lib/flexos-core/vmept.c new file mode 100644 index 0000000000..7df5b89668 --- /dev/null +++ b/lib/flexos-core/vmept.c @@ -0,0 +1,339 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2021, Hugo Lefeuvre + * Stefan Teodorescu + * Sebastian Rauch + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include + +/* The RPC shared pages behave like a stack: when a new RPC is made, + * we push a new page and pop when it returns, similar to how the usual + * function call stack works. + * + * TODO: revisit this model when implementing multithreading. Most probably + * we'll need one RPC stack per thread. + */ +unsigned long shmem_rpc_page = FLEXOS_VMEPT_RPC_PAGES_ADDR; + +volatile uint8_t flexos_vmept_comp_id = FLEXOS_VMEPT_COMP_ID; + +// only for testing and debugging +extern int ping1(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); +extern int ping2(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); +extern int ping3(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); +extern int ping4(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); +extern int ping5(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); +extern int ping6(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); +extern void reset_runs(void); + +extern void pong1(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); +extern void pong2(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); +extern void pong3(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); +extern void pong4(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); +extern void pong5(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); +extern void pong6(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6); + +/* just for debugging, to make sure function pointers are the same + * across compartments */ +void _flexos_vmept_dbg_print_address_info() { +#if FLEXOS_VMEPT_DEBUG_PRINT_ADDR + static int first = 1; + if (!first) + return; + + first = 0; + printf("printing address info for compartment %d\n", flexos_vmept_comp_id); + printf("&ping1: %p\n", (void*) &ping1); + printf("&ping2: %p\n", (void*) &ping2); + printf("&ping3: %p\n", (void*) &ping3); + printf("&ping4: %p\n", (void*) &ping4); + printf("&ping5: %p\n", (void*) &ping5); + printf("&ping6: %p\n", (void*) &ping6); + + printf("&pong1: %p\n", (void*) &pong1); + printf("&pong2: %p\n", (void*) &pong2); + printf("&pong3: %p\n", (void*) &pong3); + printf("&pong4: %p\n", (void*) &pong4); + printf("&pong5: %p\n", (void*) &pong5); + printf("&pong6: %p\n", (void*) &pong6); + + printf("&reset_runs: %p\n", (void*) &reset_runs); +#endif /* FLEXOS_VMEPT_DEBUG_PRINT_ADDR */ +} + +static inline __attribute__((always_inline)) flexos_vmept_master_rpc_lock(struct flexos_vmept_master_rpc_ctrl *master_ctrl, int lock_value) +{ + FLEXOS_VMEPT_DEBUG_PRINT(("(master_ctrl: %p) Lock value: %08x, desired: %08x\n", master_ctrl, master_ctrl->lock, lock_value)); + int expected = 0; + while (!__atomic_compare_exchange_n(&master_ctrl->lock, &expected, lock_value, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { + expected = 0; + uk_sched_yield(); + } + FLEXOS_VMEPT_DEBUG_PRINT(("Acquired lock for master rpc ctrl %p.\n", master_ctrl)); +} + + +static inline __attribute__((always_inline)) flexos_vmept_master_rpc_unlock(struct flexos_vmept_master_rpc_ctrl *master_ctrl) +{ + master_ctrl->lock = 0; + FLEXOS_VMEPT_DEBUG_PRINT(("Released lock for master rpc ctrl %p.\n", master_ctrl)); +} + +/* The retun value of this funtion indicates whether there was a return or not: + * 0 means no return value, 1 means there was a return vale. + * If there is a return value, it is written to out_ret. */ +static int flexos_vmept_eval_func(struct flexos_vmept_rpc_ctrl *ctrl, uint64_t *out_ret) +{ + uint64_t finfo = ctrl->f_info; + uint8_t argc = FLEXOS_VMEPT_FINFO_EXTRACT_ARGC(finfo); + UK_ASSERT(argc <= FLEXOS_VMEPT_MAX_PARAMS); + + /* + uint64_t args[FLEXOS_VMEPT_MAX_PARAMS]; + for (size_t i = 0; i < FLEXOS_VMEPT_MAX_PARAMS; ++i) { + args[i] = ctrl->parameters[i]; + } */ + + uint8_t key_to = flexos_vmept_extract_key_to(ctrl->extended_state); + FLEXOS_VMEPT_DEBUG_PRINT(("Executing function at %p in compartment %ld, finfo=%016lx.\n", + ctrl->f_ptr, (int) key_to, finfo)); + + // rax is unused untill the call, so we use it to store the pointer + register uint64_t ret asm("rax") = (uint64_t) ctrl->f_ptr; + + asm volatile ( + "cmp $0, %[argc] \n" + "jz 1f \n" + "movq 0(%[args]), %%rdi \n" + "cmp $1, %[argc] \n" + "jz 1f \n" + "movq 8(%[args]), %%rsi \n" + "cmp $2, %[argc] \n" + "jz 1f \n" + "movq 16(%[args]), %%rdx \n" + "cmp $3, %[argc] \n" + "jz 1f \n" + "movq 24(%[args]), %%rcx \n" + "cmp $3, %[argc] \n" + "jz 1f \n" + "movq 32(%[args]), %%r8 \n" + "cmp $5, %[argc] \n" + "jz 1f \n" + "movq 40(%[args]), %%r9 \n" + "1: \n" + "call *%[ret] \n" + "movq %%rax, %[ret] \n" + : /* output constraints */ + [ret] "+&r" (ret) + : /* input constraints */ + [args] "r" (ctrl->parameters), + [argc] "r" (argc) + : /* clobbers */ + "rdi", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", "memory" + ); + + // copy so that a function call for debugging can safely overwrite rax (which holds ret) + uint64_t ret_copy = ret; + if (FLEXOS_VMEPT_FINFO_EXTRACT_RET(finfo)) { + FLEXOS_VMEPT_DEBUG_PRINT(("return value after call: %016lx\n", ret_copy)); + *out_ret = ret_copy; + return 1; + } + return 0; +} + + +/* wait for the RPC call to finish */ +void flexos_vmept_wait_for_rpc(volatile struct flexos_vmept_rpc_ctrl *ctrl) +{ + uint64_t ext_state; + int state_const; + uint8_t key_from; + uint8_t key_to; + int has_ret; + uint64_t retval; + FLEXOS_VMEPT_DEBUG_PRINT(("Comp %d waiting for call to finish.\n", flexos_vmept_comp_id)); + while (1) { + ext_state = ctrl->extended_state; + state_const = flexos_vmept_extract_state(ext_state) & FLEXOS_VMEPT_RPC_STATE_CONSTANT_MASK; + key_from = flexos_vmept_extract_key_from(ext_state); + key_to = flexos_vmept_extract_key_to(ext_state); + if (state_const == FLEXOS_VMEPT_RPC_STATE_CALLED && key_to == flexos_vmept_comp_id) { + // handle nested rpc call + FLEXOS_VMEPT_DEBUG_PRINT(("Handling nested call.\n")); + has_ret = flexos_vmept_eval_func(ctrl, &retval); + flexos_vmept_ctrl_set_state(ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); + if (has_ret) + flexos_vmept_ctrl_set_ret(ctrl, retval); + flexos_vmept_ctrl_set_extended_state(ctrl, FLEXOS_VMEPT_RPC_STATE_RETURNED, flexos_vmept_comp_id, key_from); + } else if (state_const == FLEXOS_VMEPT_RPC_STATE_RETURNED && key_to == flexos_vmept_comp_id) { + // return from rpc call + FLEXOS_VMEPT_DEBUG_PRINT(("Comp %d finished call.\n", flexos_vmept_comp_id)); + return; + } else { + uk_sched_yield(); + } + } +} + +void flexos_vmept_rpc_loop() +{ + _flexos_vmept_dbg_print_address_info(); // to make sure addresses match + + volatile struct flexos_vmept_rpc_ctrl *ctrl = NULL; + /* make sure the ctrl field in the thread was set */ // TODO: is this nesessary? + while ((ctrl = (volatile struct flexos_vmept_rpc_ctrl *) uk_thread_current()->ctrl) == NULL) { + uk_sched_yield(); + } + + FLEXOS_VMEPT_DEBUG_PRINT(("Starting RPC server, observing ctrl %p\n", ctrl)); + + uint64_t ext_state; + int state_const; + uint8_t key_from; + uint8_t key_to; + int has_ret; + uint64_t retval; + while(1) { + ext_state = ctrl->extended_state; + state_const = flexos_vmept_extract_state(ext_state) & FLEXOS_VMEPT_RPC_STATE_CONSTANT_MASK; + key_from = flexos_vmept_extract_key_from(ext_state); + key_to = flexos_vmept_extract_key_to(ext_state); + if (state_const == FLEXOS_VMEPT_RPC_STATE_CALLED && key_to == flexos_vmept_comp_id) { + // handle rpc call to this compartment + FLEXOS_VMEPT_DEBUG_PRINT(("Comp %d handling call from %d.\n", key_to, key_from)); + has_ret = flexos_vmept_eval_func(ctrl, &retval); + flexos_vmept_ctrl_set_state(ctrl, FLEXOS_VMEPT_RPC_STATE_FROZEN); + if (has_ret) + flexos_vmept_ctrl_set_ret(ctrl, retval); + flexos_vmept_ctrl_set_extended_state(ctrl, FLEXOS_VMEPT_RPC_STATE_RETURNED, flexos_vmept_comp_id, key_from); + FLEXOS_VMEPT_DEBUG_PRINT(("Comp %d returning from call made from %d.\n", key_to, key_from)); + } else if (state_const == FLEXOS_VMEPT_RPC_STATE_RETURNED && key_to == flexos_vmept_comp_id) { + // returns should never arrive here + printf("Unexpected return in rpc loop. This is a bug!\n"); + } else { + uk_sched_yield(); + } + } +} + +void flexos_vmept_master_rpc_loop() +{ + static struct flexos_vmept_thread_map thread_map; + flexos_vmept_thread_map_init(&thread_map); + volatile struct flexos_vmept_master_rpc_ctrl *ctrl = flexos_vmept_master_rpc_ctrl(flexos_vmept_comp_id); + flexos_vmept_init_master_rpc_ctrl(ctrl); + + FLEXOS_VMEPT_DEBUG_PRINT(("Starting master rpc loop. Observing master_rpc_ctrl at %p\n", ctrl)); + while (1) { + if (ctrl->state == FLEXOS_VMEPT_MASTER_RPC_STATE_CALLED && ctrl->to == flexos_vmept_comp_id) { + FLEXOS_VMEPT_DEBUG_PRINT(("Received master rpc call at %p.\n", ctrl)); + + int tid = ctrl->local_tid; + UK_ASSERT(tid >= 0 && tid < FLEXOS_VMEPT_MAX_THREADS); + uint8_t calling_comp = ctrl->from; + struct uk_thread *thread = NULL; + struct uk_sched *sched = uk_thread_current()->sched; + // there are only two actions: create or destroy a thread + switch (ctrl->action) { + case FLEXOS_VMEPT_MASTER_RPC_ACTION_CREATE: + // TODO: error handling + FLEXOS_VMEPT_DEBUG_PRINT(("Handling create.\n")); + thread = uk_sched_thread_create_rpc_only(sched, NULL, NULL, flexos_vmept_rpc_loop, NULL, ctrl->from, tid, &thread_map); + ctrl->state = FLEXOS_VMEPT_MASTER_RPC_STATE_RETURNED; + FLEXOS_VMEPT_DEBUG_PRINT(("Created thread with tid %d (ptr: %p) to handle RPC calls from thread with tid %d in compartment %d.\n", thread->tid, thread, tid, calling_comp)); + FLEXOS_VMEPT_DEBUG_PRINT(("Mapping is set up to track (comp=%d, local_tid=%d) -> %p.\n", calling_comp, tid, flexos_vmept_thread_map_lookup(&thread_map, calling_comp, tid))); + break; + case FLEXOS_VMEPT_MASTER_RPC_ACTION_DESTROY: + FLEXOS_VMEPT_DEBUG_PRINT(("Handling destroy.\n")); + thread = flexos_vmept_thread_map_lookup(&thread_map, calling_comp, (uint8_t) tid); + UK_ASSERT(thread); + // TODO: error handling + FLEXOS_VMEPT_DEBUG_PRINT(("Destroying thread with tid %d (ptr: %p) handling RPC calls from thread with tid %d in compartment %d.\n", thread->tid, thread, tid, calling_comp)); + uk_sched_thread_destroy_rpc_only(sched, thread, calling_comp, (uint8_t) tid, &thread_map); + FLEXOS_VMEPT_DEBUG_PRINT(("Mapping is set up to track (comp=%d, local_tid=%d) -> %p.\n", calling_comp, tid, flexos_vmept_thread_map_lookup(&thread_map, calling_comp, tid))); + ctrl->state = FLEXOS_VMEPT_MASTER_RPC_STATE_RETURNED; + break; + default: + printf("Bad action. This is a bug!\n"); + } + // TODO: error handling ? + ctrl->state = FLEXOS_VMEPT_BUILD_MASTER_RPC_RETURN_STATE(0); + } else { + uk_sched_yield(); + } + } +} + +int flexos_vmept_master_rpc_call(uint8_t key_from, uint8_t key_to, uint8_t local_tid, uint8_t action) +{ + volatile struct flexos_vmept_master_rpc_ctrl *master_ctrl = flexos_vmept_master_rpc_ctrl(key_to); + FLEXOS_VMEPT_DEBUG_PRINT(("Making master rpc call from comp %d to comp %d (master_ctrl at %p) with local_tid=%d, action=%d.\n", key_from, key_to, master_ctrl, local_tid, action)); + FLEXOS_VMEPT_DEBUG_PRINT(("Before init lock.\n")); + while (! master_ctrl->initialized) { + uk_sched_yield(); + } + FLEXOS_VMEPT_DEBUG_PRINT(("Past init lock.\n")); + + int lock_value = flexos_vmept_build_lock_value(local_tid); + flexos_vmept_master_rpc_lock(master_ctrl, lock_value); + + master_ctrl->from = key_from; + master_ctrl->to = key_to; + master_ctrl->local_tid = local_tid; + master_ctrl->action = action; + + // important: state should always be the last field that is set + master_ctrl->state = FLEXOS_VMEPT_MASTER_RPC_STATE_CALLED; + + // wait for call to return + while ((master_ctrl->state & FLEXOS_VMEPT_MASTER_RPC_STATE_CONSTANT_MASK) != FLEXOS_VMEPT_MASTER_RPC_STATE_RETURNED) { + uk_sched_yield(); + } + + // TODO: error handling + int ret = FLEXOS_VMEPT_MASTER_RPC_STATE_EXTRACT_VALUE(master_ctrl->state); + master_ctrl->state = FLEXOS_VMEPT_MASTER_RPC_STATE_IDLE; + flexos_vmept_master_rpc_unlock(master_ctrl); + return ret; +} + + +void flexos_vmept_create_rpc_loop_thread() +{ + struct uk_thread *thread = uk_thread_current(); + uk_sched_thread_create_rpc_only(thread->sched, NULL, NULL, &flexos_vmept_master_rpc_loop, NULL, + flexos_vmept_comp_id, 0, NULL); +} + +uk_lib_initcall(flexos_vmept_create_rpc_loop_thread); diff --git a/lib/kasan/Config.uk b/lib/kasan/Config.uk new file mode 100644 index 0000000000..02dd9ad93f --- /dev/null +++ b/lib/kasan/Config.uk @@ -0,0 +1,14 @@ +menuconfig LIBKASAN + bool "kasan: Kernel Address Sanitizer(KVM x86 only)" + default n + help + Experimental. Available only under KVM x86_64. + +if LIBKASAN + +config LIBKASAN_GLOBAL + bool "Global Kernel Address Sanitizer" + default n + help + Enable Kernel Address Sanitizer globally. +endif diff --git a/lib/kasan/Makefile.uk b/lib/kasan/Makefile.uk new file mode 100644 index 0000000000..570064e851 --- /dev/null +++ b/lib/kasan/Makefile.uk @@ -0,0 +1,8 @@ +$(eval $(call addlib_s,libkasan,$(CONFIG_LIBKASAN))) + +CINCLUDES-$(CONFIG_LIBKASAN) += -I$(LIBKASAN_BASE)/include +CXXINCLUDES-$(CONFIG_LIBKASAN) += -I$(LIBKASAN_BASE)/include + +COMPFLAGS-$(CONFIG_LIBKASAN_GLOBAL) += -fsanitize=kernel-address + +LIBKASAN_SRCS-y += $(LIBKASAN_BASE)/kasan.c diff --git a/lib/kasan/exportsyms.uk b/lib/kasan/exportsyms.uk new file mode 100644 index 0000000000..5ddeef6198 --- /dev/null +++ b/lib/kasan/exportsyms.uk @@ -0,0 +1,39 @@ +init_kasan +kasan_mark_valid +kasan_mark_invalid +kasan_mark +access_within_shadow_byte +shadow_1byte_isvalid +shadow_2byte_isvalid +shadow_4byte_isvalid +shadow_8byte_isvalid +shadow_Nbyte_isvalid +__asan_load1_noabort +__asan_load2_noabort +__asan_load4_noabort +__asan_load8_noabort +__asan_load16_noabort +__asan_store1_noabort +__asan_store2_noabort +__asan_store4_noabort +__asan_store8_noabort +__asan_store16_noabort +__asan_report_load1_noabort +__asan_report_load2_noabort +__asan_report_load4_noabort +__asan_report_load8_noabort +__asan_report_load16_noabort +__asan_report_store1_noabort +__asan_report_store2_noabort +__asan_report_store4_noabort +__asan_report_store8_noabort +__asan_report_store16_noabort +__asan_loadN_noabort +__asan_storeN_noabort +__asan_report_load_n_noabort +__asan_report_store_n_noabort +__asan_handle_no_return +__asan_register_globals +__asan_unregister_globals +__asan_alloca_poison +__asan_allocas_unpoison diff --git a/lib/kasan/include/uk/kasan.h b/lib/kasan/include/uk/kasan.h new file mode 100644 index 0000000000..3ace68156e --- /dev/null +++ b/lib/kasan/include/uk/kasan.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Badoiu Vlad-Andrei + * + * Copyright (c) 2021, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_KASAN_H_ +#define _SYS_KASAN_H_ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define KASAN_CODE_STACK_LEFT 0xF1 +#define KASAN_CODE_STACK_MID 0xF2 +#define KASAN_CODE_STACK_RIGHT 0xF3 + +/* Our own redzone codes */ +#define KASAN_CODE_GLOBAL_OVERFLOW 0xFA +#define KASAN_CODE_KMEM_FREED 0xFB +#define KASAN_CODE_POOL_OVERFLOW 0xFC +#define KASAN_CODE_POOL_FREED 0xFD +#define KASAN_CODE_KMALLOC_OVERFLOW 0xFE +#define KASAN_CODE_KMALLOC_FREED 0xFF + +/* Redzone sizes for instrumented allocators */ +#define KASAN_KMALLOC_REDZONE_SIZE 8 + +/* Shadow mem size */ +#define KASAN_MD_SHADOW_SIZE (1 << 24) /* 16 MB */ + +/* Initialize KASAN subsystem. */ +void init_kasan(void); + +/* Mark bytes as valid (in the shadow memory) */ +void kasan_mark_valid(const void *addr, size_t size); + +/* Mark bytes as invalid (in the shadow memory) */ +void kasan_mark_invalid(const void *addr, size_t size, uint8_t code); + +/* Mark first 'size' bytes as valid (in the shadow memory), and the remaining + * (size_with_redzone - size) bytes as invalid with given code. + */ +void kasan_mark(const void *addr, size_t size, size_t size_with_redzone, + uint8_t code); + +#ifdef __cplusplus +} +#endif + +#endif /* !_SYS_KASAN_H_ */ diff --git a/lib/kasan/kasan.c b/lib/kasan/kasan.c new file mode 100644 index 0000000000..68456f57f0 --- /dev/null +++ b/lib/kasan/kasan.c @@ -0,0 +1,333 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Badoiu Vlad-Andrei + * + * Copyright (c) 2021, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include "kasan_internal.h" +#include + +static int kasan_ready __attribute__((flexos_whitelist)); + +static const char *code_name(uint8_t code) +{ + switch (code) { + case KASAN_CODE_STACK_LEFT: + case KASAN_CODE_STACK_MID: + case KASAN_CODE_STACK_RIGHT: + return "stack buffer-overflow"; + case KASAN_CODE_GLOBAL_OVERFLOW: + return "global buffer-overflow"; + case KASAN_CODE_KMEM_FREED: + return "kmem use-after-free"; + case KASAN_CODE_POOL_OVERFLOW: + return "pool buffer-overflow"; + case KASAN_CODE_POOL_FREED: + return "pool use-after-free"; + case KASAN_CODE_KMALLOC_OVERFLOW: + return "buffer-overflow"; + case KASAN_CODE_KMALLOC_FREED: + return "use-after-free"; + case 1 ... 7: + return "partial redzone"; + default: + return "unknown redzone"; + } +} + +/* Check whether all bytes from range [addr, addr + size) are mapped to + * a single shadow byte + */ +static inline bool access_within_shadow_byte(uintptr_t addr, + size_t size) { + return (addr >> KASAN_SHADOW_SCALE_SHIFT) == + ((addr + size - 1) >> KASAN_SHADOW_SCALE_SHIFT); +} + +static inline bool shadow_1byte_isvalid(uintptr_t addr, + uint8_t *code) { + int8_t shadow_val = (int8_t)*kasan_md_addr_to_shad(addr); + int8_t last = addr & KASAN_SHADOW_MASK; + + if (likely(shadow_val == 0 || last < shadow_val)) + return true; + *code = shadow_val; + return false; +} + +static inline bool shadow_2byte_isvalid(uintptr_t addr, + uint8_t *code) { + if (!access_within_shadow_byte(addr, 2)) + return shadow_1byte_isvalid(addr, code) && + shadow_1byte_isvalid(addr + 1, code); + + int8_t shadow_val = *kasan_md_addr_to_shad(addr); + int8_t last = (addr + 1) & KASAN_SHADOW_MASK; + + if (likely(shadow_val == 0 || last < shadow_val)) + return true; + *code = shadow_val; + return false; +} + +static inline bool shadow_4byte_isvalid(uintptr_t addr, + uint8_t *code) { + if (!access_within_shadow_byte(addr, 4)) + return shadow_2byte_isvalid(addr, code) && + shadow_2byte_isvalid(addr + 2, code); + + int8_t shadow_val = *kasan_md_addr_to_shad(addr); + int8_t last = (addr + 3) & KASAN_SHADOW_MASK; + + if (likely(shadow_val == 0 || last < shadow_val)) + return true; + *code = shadow_val; + return false; +} + +static inline bool shadow_8byte_isvalid(uintptr_t addr, + uint8_t *code) +{ + if (!access_within_shadow_byte(addr, 8)) + return shadow_4byte_isvalid(addr, code) && + shadow_4byte_isvalid(addr + 4, code); + + int8_t shadow_val = *kasan_md_addr_to_shad(addr); + int8_t last = (addr + 7) & KASAN_SHADOW_MASK; + + if (likely(shadow_val == 0 || last < shadow_val)) + return true; + *code = shadow_val; + return false; +} + +static inline bool + shadow_Nbyte_isvalid(uintptr_t addr, size_t size, uint8_t *code) +{ + for (size_t i = 0; i < size; i++) + if (unlikely(!shadow_1byte_isvalid(addr + i, code))) + return false; + return true; +} + +static inline void shadow_check(uintptr_t addr, size_t size, + bool read) +{ + if (unlikely(!kasan_ready)) + return; + if (unlikely(!kasan_md_addr_supported(addr))) + return; + + uint8_t code = 0; + bool valid = true; + + if (__builtin_constant_p(size)) { + switch (size) { + case 1: + valid = shadow_1byte_isvalid(addr, &code); + break; + case 2: + valid = shadow_2byte_isvalid(addr, &code); + break; + case 4: + valid = shadow_4byte_isvalid(addr, &code); + break; + case 8: + valid = shadow_8byte_isvalid(addr, &code); + break; + } + } else { + valid = shadow_Nbyte_isvalid(addr, size, &code); + } + + if (unlikely(!valid)) { + } +} + +/* + * Memory is divided into 8-byte blocks aligned to 8-byte boundary. Each block + * has corresponding descriptor byte in the shadow memory. You can mark each + * block as valid (0x00) or invalid (0xF1 - 0xFF). Blocks can be partially valid + * (0x01 - 0x07) - i.e. prefix is valid, suffix is invalid. Other variants are + * NOT POSSIBLE! Thus `addr` and `total` must be block aligned. + */ +void kasan_mark(const void *addr, size_t valid, size_t total, uint8_t code) +{ + UK_ASSERT(is_aligned(addr, KASAN_SHADOW_SCALE_SIZE)); + UK_ASSERT(is_aligned(total, KASAN_SHADOW_SCALE_SIZE)); + UK_ASSERT(valid <= total); + + int8_t *shadow = kasan_md_addr_to_shad((uintptr_t)addr); + int8_t *end = shadow + total / KASAN_SHADOW_SCALE_SIZE; + + /* Valid bytes. */ + size_t len = valid / KASAN_SHADOW_SCALE_SIZE; + + __builtin_memset(shadow, 0, len); + shadow += len; + + /* At most one partially valid byte. */ + if (valid & KASAN_SHADOW_MASK) + *shadow++ = valid & KASAN_SHADOW_MASK; + + /* Invalid bytes. */ + if (shadow < end) + __builtin_memset(shadow, code, end - shadow); +} + +void kasan_mark_valid(const void *addr, size_t size) +{ + kasan_mark(addr, size, size, 0); +} + +void kasan_mark_invalid(const void *addr, size_t size, uint8_t code) +{ + kasan_mark(addr, 0, size, code); +} + +void init_kasan(void) +{ + /* Set entire shadow memory to zero */ + kasan_mark_valid((const void *)KASAN_MD_SANITIZED_START, + KASAN_MD_SANITIZED_SIZE); + + /* KASAN is ready to check for errors! */ + kasan_ready = 1; +} + +#define DEFINE_ASAN_LOAD_STORE(size) \ +void __asan_load##size##_noabort(uintptr_t addr) \ +{ \ + shadow_check(addr, size, true); \ +} \ +void __asan_store##size##_noabort(uintptr_t addr) \ +{ \ + shadow_check(addr, size, false); \ +} + + +#define DEFINE_ASAN_LOAD_STORE_CLANG(size) \ +void __asan_report_load##size##_noabort(uintptr_t addr) \ +{ \ + shadow_check(addr, size, true); \ +} \ +void __asan_report_store##size##_noabort(uintptr_t addr) \ +{ \ + shadow_check(addr, size, false); \ +} + + + +DEFINE_ASAN_LOAD_STORE(1); +DEFINE_ASAN_LOAD_STORE(2); +DEFINE_ASAN_LOAD_STORE(4); +DEFINE_ASAN_LOAD_STORE(8); +DEFINE_ASAN_LOAD_STORE(16); + +DEFINE_ASAN_LOAD_STORE_CLANG(1); +DEFINE_ASAN_LOAD_STORE_CLANG(2); +DEFINE_ASAN_LOAD_STORE_CLANG(4); +DEFINE_ASAN_LOAD_STORE_CLANG(8); +DEFINE_ASAN_LOAD_STORE_CLANG(16); + +// for GCC +void __asan_loadN_noabort(uintptr_t addr, size_t size) +{ + shadow_check(addr, size, true); +} + +void __asan_storeN_noabort(uintptr_t addr, size_t size) +{ + shadow_check(addr, size, false); +} + +// for clang +void __asan_report_load_n_noabort(uintptr_t addr, size_t size) +{ + shadow_check(addr, size, true); +} +void __asan_report_store_n_noabort(uintptr_t addr, size_t size) +{ + shadow_check(addr, size, false); +} + +/* TODO: Called at the end of every function marked as "noreturn". + * Performs cleanup of the current stack's shadow memory to prevent false + * positives. + */ +void __asan_handle_no_return(void) +{ +} + +void __asan_register_globals(struct __asan_global *globals, uintptr_t n) +{ + + for (size_t i = 0; i < n; i++) + kasan_mark((void *)globals[i].beg, globals[i].size, + globals[i].size_with_redzone, + KASAN_CODE_GLOBAL_OVERFLOW); +} + + +void __asan_unregister_globals(uintptr_t globals __unused, uintptr_t n __unused) +{ +} + +void __asan_alloca_poison(uintptr_t addr, uintptr_t size) +{ + void *left_redzone = (int8_t *)addr - KASAN_ALLOCA_REDZONE_SIZE; + size_t size_with_mid_redzone = roundup(size, KASAN_ALLOCA_REDZONE_SIZE); + void *right_redzone = (int8_t *)addr + size_with_mid_redzone; + + kasan_mark_invalid(left_redzone, KASAN_ALLOCA_REDZONE_SIZE, + KASAN_CODE_STACK_LEFT); + kasan_mark((void *)addr, size, size_with_mid_redzone, + KASAN_CODE_STACK_MID); + kasan_mark_invalid(right_redzone, KASAN_ALLOCA_REDZONE_SIZE, + KASAN_CODE_STACK_RIGHT); +} + +void __asan_allocas_unpoison(uintptr_t begin, uintptr_t size) +{ + kasan_mark_valid((void *)begin, size); +} + +#ifdef __cplusplus +} +#endif diff --git a/lib/kasan/kasan_internal.h b/lib/kasan/kasan_internal.h new file mode 100644 index 0000000000..ab072c68b7 --- /dev/null +++ b/lib/kasan/kasan_internal.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Badoiu Vlad-Andrei + * + * Copyright (c) 2021, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_KASAN_INTERNAL_H_ +#define _SYS_KASAN_INTERNAL_H_ + +#include +#include +#include +#include +#include + +/* Part of internal compiler interface */ +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_ALLOCA_REDZONE_SIZE 32 + +#define KASAN_SHADOW_SCALE_SIZE (1 << KASAN_SHADOW_SCALE_SHIFT) +#define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1) + +#define roundup(x, y) ((((x) + ((y)-1)) / (y)) * (y)) + +#define is_aligned(addr, size) \ + ({ \ + intptr_t _addr = (intptr_t)(addr); \ + intptr_t _size = (intptr_t)(size); \ + !(_addr & (_size - 1)); \ + }) + +#define KASAN_SHADOW_SCALE_SHIFT 3 + +extern void *md_base; +#define __THE_END ((unsigned long)(md_base)) + +#define SUPERPAGESIZE (1 << 22) /* 4 MB */ + +/* We reserve a portion from the start of the mem for the shadow memory */ +#define KASAN_MD_SHADOW_START ALIGN_UP((uintptr_t) __THE_END, __PAGE_SIZE) + +#define KASAN_MD_SHADOW_END (KASAN_MD_SHADOW_START + KASAN_MD_SHADOW_SIZE) + +/* Sanitized memory (accesses within this range are checked) */ +#define KASAN_MD_SANITIZED_START ALIGN_UP(KASAN_MD_SHADOW_START \ + + KASAN_MD_SHADOW_SIZE, __PAGE_SIZE) + +#define KASAN_MD_SANITIZED_SIZE \ + (KASAN_MD_SHADOW_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#define KASAN_MD_SANITIZED_END \ + (KASAN_MD_SANITIZED_START + KASAN_MD_SANITIZED_SIZE) + +#define KASAN_MD_OFFSET \ + (KASAN_MD_SHADOW_START - \ + (KASAN_MD_SANITIZED_START >> KASAN_SHADOW_SCALE_SHIFT)) + +struct __asan_global_source_location { + const char *filename; + int line_no; + int column_no; +}; + +struct __asan_global { + uintptr_t beg; /* The address of the global */ + uintptr_t size; /* The original size of the global */ + uintptr_t size_with_redzone; /* The size with the redzone */ + const char *name; /* Name as a C string */ + const char *module_name; /* Module name as a C string */ + /* Does the global have dynamic initializer */ + uintptr_t has_dynamic_init; /* Location of a global */ + struct __asan_global_source_location *location; + uintptr_t odr_indicator; /* The address of the ODR indicator symbol */ +}; + +static inline int8_t *kasan_md_addr_to_shad(uintptr_t addr) +{ + return (int8_t *)(KASAN_MD_OFFSET + (addr >> KASAN_SHADOW_SCALE_SHIFT)); +} + +bool kasan_md_addr_supported(uintptr_t addr) +{ + return addr >= KASAN_MD_SANITIZED_START + && addr < KASAN_MD_SANITIZED_END; +} + +#endif /* !_SYS_KASAN_INTERNAL_H_ */ diff --git a/lib/posix-mmap/Config.uk b/lib/posix-mmap/Config.uk new file mode 100644 index 0000000000..a7ceb044f6 --- /dev/null +++ b/lib/posix-mmap/Config.uk @@ -0,0 +1,4 @@ +config LIBPOSIX_MMAP + bool "POSIX mmap functions" + default n + select PT_API diff --git a/lib/posix-mmap/Makefile.uk b/lib/posix-mmap/Makefile.uk new file mode 100644 index 0000000000..d7193f3921 --- /dev/null +++ b/lib/posix-mmap/Makefile.uk @@ -0,0 +1,8 @@ +$(eval $(call addlib_s,libposix_mmap,$(CONFIG_LIBPOSIX_MMAP))) + +CINCLUDES-$(CONFIG_LIBPOSIX_MMAP) += -I$(LIBPOSIX_MMAP_BASE)/include +CXXINCLUDES-$(CONFIG_LIBPOSIX_MMAP) += -I$(LIBPOSIX_MMAP_BASE)/include + +LIBPOSIX_MMAP_SRCS-y += $(LIBPOSIX_MMAP_BASE)/mm.c +LIBPOSIX_MMAP_CINCLUDES-$(CONFIG_PLAT_XEN) += $(LIBXENPLAT_CINCLUDES-y) +UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_MMAP) += mmap-6 munmap-2 mprotect-3 diff --git a/lib/posix-mmap/exportsyms.uk b/lib/posix-mmap/exportsyms.uk new file mode 100644 index 0000000000..c2ea889429 --- /dev/null +++ b/lib/posix-mmap/exportsyms.uk @@ -0,0 +1,9 @@ +mmap +uk_syscall_e_mmap +uk_syscall_r_mmap +munmap +uk_syscall_e_munmap +uk_syscall_r_munmap +mprotect +uk_syscall_e_mprotect +uk_syscall_r_mprotect diff --git a/lib/posix-mmap/include/sys/mman.h b/lib/posix-mmap/include/sys/mman.h new file mode 100644 index 0000000000..0c4755cbd0 --- /dev/null +++ b/lib/posix-mmap/include/sys/mman.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Stefan Teodorescu + * + * Copyright (c) 2021, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __POSIX_MMAP__ +#define __POSIX_MMAP__ + +#define MAP_FAILED ((void *) -1) + +#define PROT_NONE 0x0 +#define PROT_READ 0x1 +#define PROT_WRITE 0x2 +#define PROT_EXEC 0x4 + +#define MAP_SHARED 0x1 +#define MAP_PRIVATE 0x2 + +#define MAP_FIXED 0x10 +#define MAP_ANONYMOUS 0x20 +#define MAP_ANON MAP_ANONYMOUS + +void *mmap(void *addr, size_t length, int prot, int flags, + int fd, off_t offset); + +int munmap(void *addr, size_t length); + +int mprotect(void *addr, size_t len, int prot); + +int msync(void *addr, size_t length, int flags); + +#endif /* __POSIX_MMAP__ */ diff --git a/lib/posix-mmap/mm.c b/lib/posix-mmap/mm.c new file mode 100644 index 0000000000..b4da96e9cd --- /dev/null +++ b/lib/posix-mmap/mm.c @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Stefan Teodorescu + * + * Copyright (c) 2021, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +/* + * XXX + * This does for now a linear search, starting from |start|, looking for a + * memory area with |length| bytes (aligned to page size) + */ +static unsigned long get_free_virtual_area(unsigned long start, size_t length) +{ + unsigned long page; + + if (!PAGE_ALIGNED(length)) + return -1; + + while (start <= MMAP_AREA_END - length) { + for (page = start; page < start + length; page += PAGE_SIZE) { + if (PAGE_PRESENT(uk_virt_to_pte(page))) + break; + } + + if (page == start + length) + return start; + + start = page + PAGE_SIZE; + } + + return -1; +} + +static int libc_to_internal_prot(int prot) +{ + int page_prot = PAGE_PROT_NONE; + + if (prot & PROT_READ) + page_prot |= PAGE_PROT_READ; + if (prot & PROT_WRITE) + page_prot |= PAGE_PROT_WRITE; + if (prot & PROT_EXEC) + page_prot |= PAGE_PROT_EXEC; + + return page_prot; +} +UK_SYSCALL_DEFINE(void *, mmap, void *, addr, size_t, length, int, prot, + int, flags, int, fd, off_t, offset) +{ + unsigned long page_addr = (unsigned long) addr; + unsigned long area_to_map, page_prot, page; + size_t i; + + if (flags & MAP_ANONYMOUS) { + if (fd != -1 || offset) { + errno = EINVAL; + return MAP_FAILED; + } + } else { + /* TODO: We don't currently support mapping files */ + errno = ENOTSUP; + return MAP_FAILED; + } + + + /* At least one of MAP_SHARED or MAP_PRIVATE has to be specified */ + if (!(flags & MAP_SHARED) && !(flags & MAP_PRIVATE)) { + errno = EINVAL; + return MAP_FAILED; + } + + if (!length) { + errno = EINVAL; + return MAP_FAILED; + } + + length = PAGE_ALIGN_UP(length); + if (!length) { + errno = ENOMEM; + return MAP_FAILED; + } + + if (flags & MAP_FIXED) { + /* Discard any overlapping mappings */ + // TODO: bug when unmapping memory from static pagetable + /* + if (munmap(addr, length)) { + errno = EINVAL; + return MAP_FAILED; + } + */ + page_addr = PAGE_ALIGN_UP(page_addr); + area_to_map = page_addr; + } else { + if ((void *) page_addr == NULL || page_addr < MMAP_AREA_START) + page_addr = MMAP_AREA_START; + else + page_addr = PAGE_ALIGN_UP(page_addr); + + area_to_map = get_free_virtual_area(page_addr, length); + } + + if (area_to_map == (unsigned long) -1) { + errno = ENOMEM; + return MAP_FAILED; + } + + for (i = 0; i < length; i += PAGE_SIZE) { + page = area_to_map + i; + if (uk_page_map(page, PAGE_PADDR_ANY, + PAGE_PROT_READ | PAGE_PROT_WRITE, 0)) { + munmap((void *) area_to_map, length); + errno = ENOMEM; + return MAP_FAILED; + } + } + + if (flags & MAP_ANONYMOUS) { + /* MAP_ANONYMOUS pages are zeroed out */ + /* + * XXX: there is a bug when building with performance + * optimizations flag that make this memset loop infintely. + * Using for loop for now. + */ + /* memset((void *) area_to_map, 0, length); */ + for (i = 0; i < length / sizeof(unsigned long); i++) + *((unsigned long *) area_to_map + i) = 0; + } else { + /* TODO: file mapping */ + } + + page_prot = libc_to_internal_prot(prot); + for (page = area_to_map; page < area_to_map + length; page += PAGE_SIZE) + uk_page_set_prot(page, page_prot); + + return (void *) area_to_map; +} + +UK_SYSCALL_DEFINE(int, munmap, void *, addr, size_t, length) +{ + unsigned long start = (unsigned long) addr; + unsigned long page; + + if (!PAGE_ALIGNED(start)) { + errno = EINVAL; + return -1; + } + + if (!length) + return 0; + + length = PAGE_ALIGN_UP(length); + for (page = start; page < start + length; page += PAGE_SIZE) + uk_page_unmap(page); + + return 0; +} + +UK_SYSCALL_DEFINE(int, mprotect, void*, addr, size_t, length, int, prot) +{ + unsigned long start = (unsigned long) addr; + unsigned long page_prot, page; + + if (PAGE_ALIGNED(start)) { + errno = EINVAL; + return -1; + } + + if (!length) + return 0; + + if ((prot & PROT_NONE) && (prot != PROT_NONE)) { + errno = EINVAL; + return -1; + } + + page_prot = PAGE_PROT_NONE; + if (prot & PROT_READ) + page_prot |= PAGE_PROT_READ; + if (prot & PROT_WRITE) + page_prot |= PAGE_PROT_WRITE; + if (prot & PROT_EXEC) + page_prot |= PAGE_PROT_EXEC; + + length = PAGE_ALIGN_UP(length); + for (page = start; page < start + length; page += PAGE_SIZE) + uk_page_set_prot(page, page_prot); + + return 0; +} + +int msync(void *addr __unused, size_t length __unused, int flags __unused) +{ + errno = ENOTSUP; + return -1; +} diff --git a/lib/ramfs/ramfs_vfsops.c b/lib/ramfs/ramfs_vfsops.c index 15be8ce21c..b30b5a7738 100644 --- a/lib/ramfs/ramfs_vfsops.c +++ b/lib/ramfs/ramfs_vfsops.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ramfs.h" @@ -79,7 +80,7 @@ ramfs_mount(struct mount *mp, const char *dev __unused, { struct ramfs_node *np; - uk_pr_debug("%s: dev=%s\n", __func__, dev); + //flexos_gate(ukdebug, uk_pr_debug, FLEXOS_SHARED_LITERAL("%s: dev=%s\n"), __func__, dev); /* Create a root node */ np = ramfs_allocate_node("/", VDIR); diff --git a/lib/ramfs/ramfs_vnops.c b/lib/ramfs/ramfs_vnops.c index 6eca9b264b..a982955644 100644 --- a/lib/ramfs/ramfs_vnops.c +++ b/lib/ramfs/ramfs_vnops.c @@ -54,8 +54,9 @@ #include #include #include +#include -static struct uk_mutex ramfs_lock = UK_MUTEX_INITIALIZER(ramfs_lock); +static struct uk_mutex ramfs_lock __attribute__((flexos_whitelist)) = UK_MUTEX_INITIALIZER(ramfs_lock); static uint64_t inode_count = 1; /* inode 0 is reserved to root */ static void @@ -125,7 +126,7 @@ ramfs_add_node(struct ramfs_node *dnp, char *name, int type) if (np == NULL) return NULL; - uk_mutex_lock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_lock, &ramfs_lock); /* Link to the directory list */ if (dnp->rn_child == NULL) { @@ -139,7 +140,7 @@ ramfs_add_node(struct ramfs_node *dnp, char *name, int type) set_times_to_now(&(dnp->rn_mtime), &(dnp->rn_ctime), NULL); - uk_mutex_unlock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_unlock, &ramfs_lock); return np; } @@ -151,7 +152,7 @@ ramfs_remove_node(struct ramfs_node *dnp, struct ramfs_node *np) if (dnp->rn_child == NULL) return EBUSY; - uk_mutex_lock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_lock, &ramfs_lock); /* Unlink from the directory list */ if (dnp->rn_child == np) { @@ -160,7 +161,7 @@ ramfs_remove_node(struct ramfs_node *dnp, struct ramfs_node *np) for (prev = dnp->rn_child; prev->rn_next != np; prev = prev->rn_next) { if (prev->rn_next == NULL) { - uk_mutex_unlock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_unlock, &ramfs_lock); return ENOENT; } } @@ -170,7 +171,7 @@ ramfs_remove_node(struct ramfs_node *dnp, struct ramfs_node *np) set_times_to_now(&(dnp->rn_mtime), &(dnp->rn_ctime), NULL); - uk_mutex_unlock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_unlock, &ramfs_lock); return 0; } @@ -214,7 +215,7 @@ ramfs_lookup(struct vnode *dvp, char *name, struct vnode **vpp) if (*name == '\0') return ENOENT; - uk_mutex_lock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_lock, &ramfs_lock); len = strlen(name); dnp = dvp->v_data; @@ -227,17 +228,17 @@ ramfs_lookup(struct vnode *dvp, char *name, struct vnode **vpp) } } if (found == 0) { - uk_mutex_unlock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_unlock, &ramfs_lock); return ENOENT; } if (vfscore_vget(dvp->v_mount, inode_count++, &vp)) { /* found in cache */ *vpp = vp; - uk_mutex_unlock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_unlock, &ramfs_lock); return 0; } if (!vp) { - uk_mutex_unlock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_unlock, &ramfs_lock); return ENOMEM; } vp->v_data = np; @@ -245,7 +246,7 @@ ramfs_lookup(struct vnode *dvp, char *name, struct vnode **vpp) vp->v_type = np->rn_type; vp->v_size = np->rn_size; - uk_mutex_unlock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_unlock, &ramfs_lock); *vpp = vp; @@ -257,7 +258,7 @@ ramfs_mkdir(struct vnode *dvp, char *name, mode_t mode) { struct ramfs_node *np; - uk_pr_debug("mkdir %s\n", name); + //flexos_gate(ukdebug, uk_pr_debug, FLEXOS_SHARED_LITERAL("mkdir %s\n"), name); if (strlen(name) > NAME_MAX) return ENAMETOOLONG; @@ -328,8 +329,8 @@ ramfs_rmdir(struct vnode *dvp, struct vnode *vp, char *name __unused) static int ramfs_remove(struct vnode *dvp, struct vnode *vp, char *name __maybe_unused) { - uk_pr_debug("remove %s in %s\n", name, - RAMFS_NODE(dvp)->rn_name); + //flexos_gate(ukdebug, uk_pr_debug, "remove %s in %s\n", name, + // RAMFS_NODE(dvp)->rn_name); return ramfs_remove_node(dvp->v_data, vp->v_data); } @@ -341,8 +342,8 @@ ramfs_truncate(struct vnode *vp, off_t length) void *new_buf; size_t new_size; - uk_pr_debug("truncate %s length=%lld\n", RAMFS_NODE(vp)->rn_name, - (long long) length); + //flexos_gate(ukdebug, uk_pr_debug, "truncate %s length=%lld\n", RAMFS_NODE(vp)->rn_name, + // (long long) length); np = vp->v_data; if (length == 0) { @@ -384,7 +385,7 @@ ramfs_create(struct vnode *dvp, char *name, mode_t mode) if (strlen(name) > NAME_MAX) return ENAMETOOLONG; - uk_pr_debug("create %s in %s\n", name, RAMFS_NODE(dvp)->rn_name); + //flexos_gate(ukdebug, uk_pr_debug, "create %s in %s\n", name, RAMFS_NODE(dvp)->rn_name); if (!S_ISREG(mode)) return EINVAL; @@ -540,7 +541,7 @@ ramfs_readdir(struct vnode *vp, struct vfscore_file *fp, struct dirent *dir) struct ramfs_node *np, *dnp; int i; - uk_mutex_lock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_lock, &ramfs_lock); set_times_to_now(&(((struct ramfs_node *) vp->v_data)->rn_atime), NULL, NULL); @@ -555,14 +556,14 @@ ramfs_readdir(struct vnode *vp, struct vfscore_file *fp, struct dirent *dir) dnp = vp->v_data; np = dnp->rn_child; if (np == NULL) { - uk_mutex_unlock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_unlock, &ramfs_lock); return ENOENT; } for (i = 0; i != (fp->f_offset - 2); i++) { np = np->rn_next; if (np == NULL) { - uk_mutex_unlock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_unlock, &ramfs_lock); return ENOENT; } } @@ -580,7 +581,7 @@ ramfs_readdir(struct vnode *vp, struct vfscore_file *fp, struct dirent *dir) fp->f_offset++; - uk_mutex_unlock(&ramfs_lock); + flexos_gate(uklock, uk_mutex_unlock, &ramfs_lock); return 0; } diff --git a/lib/ubsan/Config.uk b/lib/ubsan/Config.uk new file mode 100644 index 0000000000..91d07c60e4 --- /dev/null +++ b/lib/ubsan/Config.uk @@ -0,0 +1,13 @@ +menuconfig LIBUBSAN + bool "ubsan: Undefined Behavior Sanitization" + default n + +if LIBUBSAN + +config LIBUBSAN_GLOBAL + bool "Global undefined sanitization" + default n + help + Enable undefined behavior sanitization globally. +endif + diff --git a/lib/ubsan/Makefile.uk b/lib/ubsan/Makefile.uk new file mode 100644 index 0000000000..ffbb2e99c3 --- /dev/null +++ b/lib/ubsan/Makefile.uk @@ -0,0 +1,5 @@ +$(eval $(call addlib_s,libubsan,$(CONFIG_LIBUBSAN))) + +COMPFLAGS-$(CONFIG_LIBUBSAN_GLOBAL) += -fsanitize=undefined + +LIBUBSAN_SRCS-y += $(LIBUBSAN_BASE)/ubsan.c diff --git a/lib/ubsan/exportsyms.uk b/lib/ubsan/exportsyms.uk new file mode 100644 index 0000000000..bc9aac9848 --- /dev/null +++ b/lib/ubsan/exportsyms.uk @@ -0,0 +1,21 @@ +__ubsan_handle_type_mismatch +__ubsan_handle_type_mismatch_v1 +__ubsan_handle_mul_overflow +__ubsan_handle_sub_overflow +__ubsan_handle_pointer_overflow +__ubsan_handle_add_overflow +__ubsan_handle_negate_overflow +__ubsan_handle_out_of_bounds +__ubsan_handle_shift_out_of_bounds +__ubsan_handle_nonnull_arg +__ubsan_handle_divrem_overflow +__ubsan_handle_vla_bound_not_positive +__ubsan_handle_load_invalid_value +__ubsan_handle_cfi_bad_icall +__ubsan_handle_nonnull_return +__ubsan_handle_nonnull_return_v1 +__ubsan_handle_function_type_mismatch +__ubsan_handle_float_cast_overflow +__ubsan_handle_builtin_unreachable +__ubsan_handle_missing_return +__ubsan_handle_invalid_builtin diff --git a/lib/ubsan/ubsan.c b/lib/ubsan/ubsan.c new file mode 100644 index 0000000000..ba96def62c --- /dev/null +++ b/lib/ubsan/ubsan.c @@ -0,0 +1,365 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Vlad-Andrei Badoiu + * + * Copyright (c) 2020, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include + +struct source_location { + const char *filename; + uint32_t line; + uint32_t column; +}; + +struct type_descriptor { + uint16_t kind; + uint16_t info; + char name[]; +}; + +struct out_of_bounds_info { + struct source_location location; + struct type_descriptor left_type; + struct type_descriptor right_type; +}; + +struct type_mismatch_info { + struct source_location location; + struct type_descriptor *type; + unsigned long alignment; + uint8_t type_check_kind; +}; + +static struct source_location unknown_location __attribute__((flexos_whitelist)) = { + "", + 0, + 0, +}; + +typedef uintptr_t ubsan_value_handle_t; + +static void ubsan_log_location(const struct source_location *location, + const char *violation) __noreturn; + +static void ubsan_log_location(const struct source_location *location, + const char *violation) +{ + if (!location || !location->filename) + location = &unknown_location; + + //UK_CRASH("Undefined behavior at %s:%d:%d:%s", + // location->filename, location->line, + // location->column, violation); +} + +void __ubsan_handle_type_mismatch(void *data_raw, + void *pointer_raw) +{ + /* + struct type_mismatch_info *data = + (struct type_mismatch_info *) data_raw; + ubsan_value_handle_t pointer = (ubsan_value_handle_t) pointer_raw; + const char *violation = "type mismatch"; + + if (!pointer) + violation = "null pointer access"; + else if (data->alignment && (pointer & (data->alignment - 1))) + violation = "unaligned access"; + + ubsan_log_location(&data->location, violation); + */ +} + +struct type_mismatch_info_v1 { + struct source_location location; + struct type_descriptor *type; + uintptr_t alignment; + uint8_t type_check_kind; +}; + +void __ubsan_handle_type_mismatch_v1(void *data_raw, + void *pointer_raw) +{ + /* + struct type_mismatch_info_v1 *data = + (struct type_mismatch_info_v1 *) data_raw; + ubsan_value_handle_t pointer = (ubsan_value_handle_t) pointer_raw; + const char *violation = "type mismatch"; + + if (!pointer) + violation = "null pointer access"; + else if (data->alignment && (pointer & (data->alignment - 1))) + violation = "unaligned access"; + + ubsan_log_location(&data->location, violation); + */ +} + +struct ubsan_overflow_data { + struct source_location location; + struct type_descriptor *type; +}; + +void __ubsan_handle_mul_overflow(void *data_raw, + void *lhs_raw __unused, + void *rhs_raw __unused) +{ + struct ubsan_overflow_data *data = + (struct ubsan_overflow_data *) data_raw; + + ubsan_log_location(&data->location, "multiplication overflow"); +} + +struct ubsan_overflow_pointer_data { + struct source_location location; +}; + +void __ubsan_handle_pointer_overflow(void *data_raw, + void *base __unused, + void *result __unused) +{ + struct ubsan_overflow_pointer_data *data = + (struct ubsan_overflow_pointer_data *) data_raw; + + ubsan_log_location(&data->location, "pointer overflow"); +} + +void __ubsan_handle_sub_overflow(void *data_raw, + void *lhs_raw __unused, + void *rhs_raw __unused) +{ + struct ubsan_overflow_data *data = + (struct ubsan_overflow_data *) data_raw; + + ubsan_log_location(&data->location, "subtraction overflow"); +} + +void __ubsan_handle_add_overflow(void *data_raw, + void *lhs_raw __unused, + void *rhs_raw __unused) +{ + struct ubsan_overflow_data *data = + (struct ubsan_overflow_data *) data_raw; + + ubsan_log_location(&data->location, "addition overflow"); +} + +void __ubsan_handle_negate_overflow(void *data_raw, + void *old_value_raw __unused) +{ + struct ubsan_overflow_data *data = + (struct ubsan_overflow_data *) data_raw; + + ubsan_log_location(&data->location, "negation overflow"); +} + +void __ubsan_handle_divrem_overflow(void *data_raw, + void *lhs_raw __unused, + void *rhs_raw __unused) +{ + struct ubsan_overflow_data *data = + (struct ubsan_overflow_data *) data_raw; + + ubsan_log_location(&data->location, "division remainder overflow"); +} + + +struct ubsan_out_of_bounds_data { + struct source_location location; + struct type_descriptor *array_type; + struct type_descriptor *index_type; +}; + +void __ubsan_handle_out_of_bounds(void *data_raw, + void *index_raw __unused) +{ + struct ubsan_out_of_bounds_data *data = + (struct ubsan_out_of_bounds_data *) data_raw; + + ubsan_log_location(&data->location, "index out of bounds"); +} + +struct ubsan_shift_out_of_bounds_data { + struct source_location location; + struct type_descriptor *lhs_type; + struct type_descriptor *rhs_type; +}; + +void __ubsan_handle_shift_out_of_bounds(void *data_raw, + void *lhs_raw __unused, + void *rhs_raw __unused) +{ + struct ubsan_shift_out_of_bounds_data *data = + (struct ubsan_shift_out_of_bounds_data *) data_raw; + + /* TODO: print cause of shift */ + ubsan_log_location(&data->location, "shift out of bounds"); +} + +struct ubsan_nonnull_arg_data { + struct source_location location; + struct source_location attr_location; +}; + +void __ubsan_handle_nonnull_arg(void *data_raw, intptr_t index_raw __unused) +{ + struct ubsan_nonnull_arg_data *data = + (struct ubsan_nonnull_arg_data *) data_raw; + + ubsan_log_location(&data->location, "null argument"); +} + +struct ubsan_vla_bound_data { + struct source_location location; + struct type_descriptor *type; +}; + +void __ubsan_handle_vla_bound_not_positive(void *data_raw, + void *bound_raw __unused) +{ + struct ubsan_vla_bound_data *data = + (struct ubsan_vla_bound_data *) data_raw; + + ubsan_log_location(&data->location, "negative variable array length"); +} + +struct ubsan_invalid_value_data { + struct source_location location; + struct type_descriptor *type; +}; + +void __ubsan_handle_load_invalid_value(void *data_raw, + void *value_raw __unused) +{ + struct ubsan_invalid_value_data *data = + (struct ubsan_invalid_value_data *) data_raw; + + ubsan_log_location(&data->location, "invalid value load"); +} + +struct ubsan_cfi_bad_icall_data { + struct source_location location; + struct type_descriptor *type; +}; + +void __ubsan_handle_cfi_bad_icall(void *data_raw, + void *value_raw __unused) +{ + struct ubsan_cfi_bad_icall_data *data = + (struct ubsan_cfi_bad_icall_data *) data_raw; + + ubsan_log_location(&data->location, + "control flow integrity check failure during indirect call"); +} + +struct ubsan_nonnull_return_data { + struct source_location location; + struct source_location attr_location; +}; + +void __ubsan_handle_nonnull_return(void *data_raw) +{ + struct ubsan_nonnull_return_data *data = + (struct ubsan_nonnull_return_data *) data_raw; + + ubsan_log_location(&data->location, "null return"); +} + +struct ubsan_nonnull_return_data_v1 { + struct source_location attr_location; +}; + +void __ubsan_handle_nonnull_return_v1(void *data_raw, + struct source_location *loc) +{ + struct ubsan_nonnull_return_data_v1 *data = + (struct ubsan_nonnull_return_data_v1 *) data_raw; + (void) data; + ubsan_log_location(loc, "null return"); +} + +struct ubsan_function_type_mismatch_data { + struct source_location location; + struct type_descriptor *type; +}; + +void __ubsan_handle_function_type_mismatch(void *data_raw, + void *value_raw __unused) +{ + struct ubsan_function_type_mismatch_data *data = + (struct ubsan_function_type_mismatch_data *) data_raw; + + ubsan_log_location(&data->location, "function type mismatch"); +} + +struct ubsan_float_cast_overflow_data { + struct source_location location; + struct type_descriptor *from_type; + struct type_descriptor *to_type; +}; + +void __ubsan_handle_float_cast_overflow(void *data_raw, + void *from_raw __unused) +{ + struct ubsan_float_cast_overflow_data *data = + (struct ubsan_float_cast_overflow_data *) data_raw; + + ubsan_log_location(&data->location, "float cast overflow"); +} + +struct ubsan_unreachable_data { + struct source_location location; +}; + +void __ubsan_handle_builtin_unreachable(void *data_raw) +{ + struct ubsan_unreachable_data *data = + (struct ubsan_unreachable_data *) data_raw; + + ubsan_log_location(&data->location, "reached unreachable"); +} + +void __ubsan_handle_missing_return(void *data_raw) __noreturn; + +void __ubsan_handle_missing_return(void *data_raw) +{ + struct ubsan_unreachable_data *data = + (struct ubsan_unreachable_data *) data_raw; + + ubsan_log_location(&data->location, "missing return"); +} + +void __ubsan_handle_invalid_builtin(void *data) +{ +} diff --git a/lib/uk9p/9pdev.c b/lib/uk9p/9pdev.c index 1cc6167f87..5b1fddaec4 100644 --- a/lib/uk9p/9pdev.c +++ b/lib/uk9p/9pdev.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -229,7 +230,7 @@ struct uk_9pdev *uk_9pdev_connect(const struct uk_9pdev_trans *trans, dev->a = a; #if CONFIG_LIBUKSCHED - uk_waitq_init(&dev->xmit_wq); + flexos_gate(libuksched, uk_waitq_init, &dev->xmit_wq); #endif _req_mgmt_init(&dev->_req_mgmt); @@ -299,8 +300,31 @@ int uk_9pdev_request(struct uk_9pdev *dev, struct uk_9preq *req) } #if CONFIG_LIBUKSCHED - uk_waitq_wait_event(&dev->xmit_wq, - (rc = dev->ops->request(dev, req)) != -ENOSPC); + do { + struct uk_thread *__current; + unsigned long flags; + DEFINE_WAIT(__wait); + if ((rc = dev->ops->request(dev, req)) != -ENOSPC) + break; + for (;;) { + __current = uk_thread_current(); + /* protect the list */ + flags = ukplat_lcpu_save_irqf(); + uk_waitq_add(&dev->xmit_wq, __wait); + flexos_gate(libuksched, uk_thread_set_wakeup_time, __current, 0); + flexos_gate(libuksched, clear_runnable, __current); + flexos_gate(libuksched, uk_sched_thread_blocked, __current); + ukplat_lcpu_restore_irqf(flags); + if ((rc = dev->ops->request(dev, req)) != -ENOSPC) + break; + flexos_gate(libuksched, uk_sched_yield); + } + flags = ukplat_lcpu_save_irqf(); + /* need to wake up */ + flexos_gate(libuksched, uk_thread_wake, __current); + uk_waitq_remove(&dev->xmit_wq, __wait); + ukplat_lcpu_restore_irqf(flags); + } while (0); #else do { /* @@ -318,7 +342,7 @@ int uk_9pdev_request(struct uk_9pdev *dev, struct uk_9preq *req) void uk_9pdev_xmit_notify(struct uk_9pdev *dev) { #if CONFIG_LIBUKSCHED - uk_waitq_wake_up(&dev->xmit_wq); + flexos_gate(libuksched, uk_waitq_wake_up, &dev->xmit_wq); #endif } diff --git a/lib/uk9p/9preq.c b/lib/uk9p/9preq.c index b85798051c..c325f89ec3 100644 --- a/lib/uk9p/9preq.c +++ b/lib/uk9p/9preq.c @@ -31,6 +31,7 @@ */ #include +#include #include #include #include @@ -64,7 +65,7 @@ void uk_9preq_init(struct uk_9preq *req) UK_INIT_LIST_HEAD(&req->_list); uk_refcount_init(&req->refcount, 1); #if CONFIG_LIBUKSCHED - uk_waitq_init(&req->wq); + flexos_gate(libuksched, uk_waitq_init, &req->wq); #endif } @@ -175,7 +176,7 @@ int uk_9preq_receive_cb(struct uk_9preq *req, uint32_t recv_size) #if CONFIG_LIBUKSCHED /* Notify any waiting threads. */ - uk_waitq_wake_up(&req->wq); + flexos_gate(libuksched, uk_waitq_wake_up, &req->wq); #endif return 0; @@ -186,7 +187,31 @@ int uk_9preq_waitreply(struct uk_9preq *req) int rc; #if CONFIG_LIBUKSCHED - uk_waitq_wait_event(&req->wq, req->state == UK_9PREQ_RECEIVED); + do { + struct uk_thread *__current; + unsigned long flags; + DEFINE_WAIT(__wait); + if (req->state == UK_9PREQ_RECEIVED) + break; + for (;;) { + __current = uk_thread_current(); + /* protect the list */ + flags = ukplat_lcpu_save_irqf(); + uk_waitq_add(&req->wq, __wait); + flexos_gate(libuksched, uk_thread_set_wakeup_time, __current, 0); + flexos_gate(libuksched, clear_runnable, __current); + flexos_gate(libuksched, uk_sched_thread_blocked, __current); + ukplat_lcpu_restore_irqf(flags); + if (req->state == UK_9PREQ_RECEIVED) + break; + flexos_gate(libuksched, uk_sched_yield); + } + flags = ukplat_lcpu_save_irqf(); + /* need to wake up */ + flexos_gate(libuksched, uk_thread_wake, __current); + uk_waitq_remove(&req->wq, __wait); + ukplat_lcpu_restore_irqf(flags); + } while (0); #else while (UK_READ_ONCE(req->state) != UK_9PREQ_RECEIVED) ; diff --git a/lib/ukalloc/Makefile.uk b/lib/ukalloc/Makefile.uk index 30636462af..103b8ddc39 100644 --- a/lib/ukalloc/Makefile.uk +++ b/lib/ukalloc/Makefile.uk @@ -3,4 +3,6 @@ $(eval $(call addlib_s,libukalloc,$(CONFIG_LIBUKALLOC))) CINCLUDES-$(CONFIG_LIBUKALLOC) += -I$(LIBUKALLOC_BASE)/include CXXINCLUDES-$(CONFIG_LIBUKALLOC) += -I$(LIBUKALLOC_BASE)/include +LIBUKALLOC_CFLAGS-y += -fno-sanitize=kernel-address + LIBUKALLOC_SRCS-y += $(LIBUKALLOC_BASE)/alloc.c diff --git a/lib/ukalloc/alloc.c b/lib/ukalloc/alloc.c index 7cdba5e706..d85cfa4aa6 100644 --- a/lib/ukalloc/alloc.c +++ b/lib/ukalloc/alloc.c @@ -53,12 +53,22 @@ #include #include #include +#include +#ifdef CONFIG_LIBKASAN +#include +#endif + #define size_to_num_pages(size) \ (ALIGN_UP((unsigned long)(size), __PAGE_SIZE) / __PAGE_SIZE) #define page_off(x) ((unsigned long)(x) & (__PAGE_SIZE - 1)) struct uk_alloc *_uk_alloc_head; +#ifdef CONFIG_LIBFLEXOS_VMEPT +struct uk_alloc *flexos_shared_alloc; +#else +struct uk_alloc *flexos_shared_alloc __attribute__((flexos_whitelist)); +#endif /* CONFIG_FLEXOS_VMEPT */ int uk_alloc_register(struct uk_alloc *a) { @@ -130,7 +140,11 @@ void *uk_malloc_ifpages(struct uk_alloc *a, size_t size) uintptr_t intptr; unsigned long num_pages; struct metadata_ifpages *metadata; +#ifdef CONFIG_LIBKASAN + size_t realsize = sizeof(*metadata) + size + KASAN_KMALLOC_REDZONE_SIZE; +#else size_t realsize = sizeof(*metadata) + size; +#endif UK_ASSERT(a); /* check for invalid size and overflow */ @@ -147,6 +161,11 @@ void *uk_malloc_ifpages(struct uk_alloc *a, size_t size) metadata->num_pages = num_pages; metadata->base = (void *) intptr; +#ifdef CONFIG_LIBKASAN + kasan_mark((void *)(intptr + sizeof(*metadata)), + size, metadata->num_pages * __PAGE_SIZE - sizeof(*metadata), + KASAN_CODE_KMALLOC_OVERFLOW); +#endif return (void *)(intptr + sizeof(*metadata)); } @@ -162,6 +181,13 @@ void uk_free_ifpages(struct uk_alloc *a, void *ptr) UK_ASSERT(metadata->base != NULL); UK_ASSERT(metadata->num_pages != 0); + +#ifdef CONFIG_LIBKASAN + kasan_mark_invalid(metadata->base + sizeof(*metadata), + metadata->num_pages * 4096 - sizeof(*metadata), + KASAN_CODE_KMALLOC_FREED); +#endif + uk_pfree(a, metadata->base, metadata->num_pages); } @@ -459,6 +485,7 @@ void *uk_realloc_compat(struct uk_alloc *a, void *ptr, size_t size) memcpy(retptr, ptr, size); + uk_free(a, ptr); return retptr; } diff --git a/lib/ukalloc/exportsyms.uk b/lib/ukalloc/exportsyms.uk index 21c1996f89..1712221336 100644 --- a/lib/ukalloc/exportsyms.uk +++ b/lib/ukalloc/exportsyms.uk @@ -14,3 +14,4 @@ uk_realloc_compat uk_palloc_compat uk_pfree_compat _uk_alloc_head +flexos_shared_alloc diff --git a/lib/ukalloc/include/uk/alloc.h b/lib/ukalloc/include/uk/alloc.h index a4feb3f4fb..7b1ba3a254 100644 --- a/lib/ukalloc/include/uk/alloc.h +++ b/lib/ukalloc/include/uk/alloc.h @@ -97,6 +97,7 @@ struct uk_alloc { #endif /* optional interface */ uk_alloc_addmem_func_t addmem; + size_t len; /* internal */ struct uk_alloc *next; @@ -105,10 +106,44 @@ struct uk_alloc { extern struct uk_alloc *_uk_alloc_head; +#if CONFIG_LIBFLEXOS_INTELPKU +#include +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + +/* FIXME FLEXOS: It seems that GCC optimizations modify this code so that + * flexos_comp1_alloc is read even if it is not the current compartment's + * allocator. Obviously this leads to a PKU protection fault. For now + * we simply disable optimizations for this function, but this is a dirty + * workaround. + */ + +#pragma GCC push_options +#pragma GCC optimize("O0") static inline struct uk_alloc *uk_alloc_get_default(void) { +#if CONFIG_LIBFLEXOS_INTELPKU + uint32_t pkru = rdpkru(); + + /* Use the allocator the corresponds to the current + * compartment. */ + /* FLEXOS TODO this code should be generated */ + switch (pkru) { + case 0x3ffffffc: + return _uk_alloc_head; +/* __FLEXOS MARKER__: insert compartment-specific allocator cases here. */ + case 0x3fffffff: + /* reserved for shared data */ + __attribute__((fallthrough)); + default: + uk_pr_err("Allocating from a context where the current " + "compartment cannot be clearly determined."); + return _uk_alloc_head; + } +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + return _uk_alloc_head; } +#pragma GCC pop_options /* wrapper functions */ static inline void *uk_do_malloc(struct uk_alloc *a, size_t size) diff --git a/lib/ukallocbbuddy/Makefile.uk b/lib/ukallocbbuddy/Makefile.uk index 27c98b72f3..be2a74f2f3 100644 --- a/lib/ukallocbbuddy/Makefile.uk +++ b/lib/ukallocbbuddy/Makefile.uk @@ -3,4 +3,6 @@ $(eval $(call addlib_s,libukallocbbuddy,$(CONFIG_LIBUKALLOCBBUDDY))) CINCLUDES-$(CONFIG_LIBUKALLOCBBUDDY) += -I$(LIBUKALLOCBBUDDY_BASE)/include CXXINCLUDES-$(CONFIG_LIBUKALLOCBBUDDY) += -I$(LIBUKALLOCBBUDDY_BASE)/include +LIBUKALLOCBBUDDY_CFLAGS-y += -fno-sanitize=kernel-address + LIBUKALLOCBBUDDY_SRCS-y += $(LIBUKALLOCBBUDDY_BASE)/bbuddy.c diff --git a/lib/ukblkdev/blkdev.c b/lib/ukblkdev/blkdev.c index bac30a724e..893c76298b 100644 --- a/lib/ukblkdev/blkdev.c +++ b/lib/ukblkdev/blkdev.c @@ -33,6 +33,7 @@ /* This is derived from uknetdev because of consistency reasons */ #define _GNU_SOURCE /* for asprintf() */ #include +#include #include #include #include @@ -202,6 +203,7 @@ int uk_blkdev_configure(struct uk_blkdev *dev, } #if CONFIG_LIBUKBLKDEV_DISPATCHERTHREADS +__attribute__((libuksched_callback)) static void _dispatcher(void *args) { struct uk_blkdev_event_handler *handler = @@ -252,7 +254,8 @@ static int _create_event_handler(uk_blkdev_queue_event_t callback, } /* Create thread */ - event_handler->dispatcher = uk_sched_thread_create( + flexos_gate_r(libuksched, event_handler->dispatcher, + uk_sched_thread_create, event_handler->dispatcher_s, event_handler->dispatcher_name, NULL, _dispatcher, (void *)event_handler); @@ -278,8 +281,8 @@ static void _destroy_event_handler(struct uk_blkdev_event_handler *h if (h->dispatcher) { uk_semaphore_up(&h->events); UK_ASSERT(h->dispatcher_s); - uk_thread_kill(h->dispatcher); - uk_thread_wait(h->dispatcher); + flexos_gate(libuksched, uk_thread_kill, h->dispatcher); + flexos_gate(libuksched, uk_thread_wait, h->dispatcher); h->dispatcher = NULL; } diff --git a/lib/ukboot/boot.c b/lib/ukboot/boot.c index dc07cd5bc1..41935e24cc 100644 --- a/lib/ukboot/boot.c +++ b/lib/ukboot/boot.c @@ -34,11 +34,20 @@ */ #include +#include #include #include #include +#if CONFIG_LIBFLEXOS_INTELPKU +/* TODO FLEXOS: shared and compartment heaps are hardcoded with TLSF for now, + * reintroduce flexibility here in the future */ +#include +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + +#include + #if CONFIG_LIBUKBOOT_INITBBUDDY #include #elif CONFIG_LIBUKBOOT_INITREGION @@ -59,6 +68,7 @@ #include #include #include +#include #include #ifdef CONFIG_LIBUKLIBPARAM #include @@ -66,19 +76,71 @@ #ifdef CONFIG_LIBUKSP #include #endif +#ifdef CONFIG_LIBKASAN +#include +#endif + #include "banner.h" +#ifdef CONFIG_DYNAMIC_PT +#include +#endif /* CONFIG_DYNAMIC_PT */ + int main(int argc, char *argv[]) __weak; -static void main_thread_func(void *arg) __noreturn; +static void main_thread_func(void *arg) __noreturn __attribute__((section(".text_comp_exclusive"))); +extern const struct vfscore_fs_type *uk_fslist_start; +extern const struct vfscore_fs_type *uk_fslist_end; struct thread_main_arg { int argc; char **argv; }; +/* The toolchain will insert section declarations here, e.g.: + * + * extern char _comp1[], _ecomp1[], _bss_comp1[], _ebss_comp1[]; + * + * for compartment 1. + */ +/* __FLEXOS MARKER__: insert compartment sections decls here. */ + +extern struct uk_alloc *flexos_shared_alloc; + static void main_thread_func(void *arg) { +#if CONFIG_LIBFLEXOS_INTELPKU + uk_pr_info("Restricting our own rights: access restricted to 0/15\n"); + wrpkru(0x3ffffffc); +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + +#if CONFIG_LIBFLEXOS_VMEPT + // FIXME: if the compiler optimizes this, it might break funtion pointers across compartments! + if (FLEXOS_VMEPT_COMP_ID == FLEXOS_VMEPT_APPCOMP) { + /* IMPORTANT: the app compartment initializes relevant parts of shared memory + * therefore it must always be started first. */ + flexos_vmept_init_master_rpc_ctrls(); + + /* here we need to create an rpc thread in each other compartment */ + // TODO: error handling + printf("Spawning rpc threads in other compartments (from main thread).\n"); + + struct uk_thread *thread = uk_thread_current(); + struct flexos_vmept_rpc_ctrl *ctrl = flexos_vmept_rpc_ctrl(FLEXOS_VMEPT_COMP_ID, thread->tid); + flexos_vmept_init_rpc_ctrl(ctrl); + thread->ctrl = ctrl; + + for (size_t i = 0; i < FLEXOS_VMEPT_COMP_COUNT; ++i) { + if (i == FLEXOS_VMEPT_COMP_ID) + continue; + printf("Creating rpc thread in compartment %d. Own compartment is %d.\n", i, FLEXOS_VMEPT_COMP_ID); + flexos_vmept_master_rpc_call_create(FLEXOS_VMEPT_COMP_ID, i, thread->tid); + } + + printf("Spawned rpc threads in other compartments (from main thread).\n"); + } +#endif /* CONFIG_LIBFLEXOS_VMEPT */ + int i; int ret; struct thread_main_arg *tma = arg; @@ -101,13 +163,12 @@ static void main_thread_func(void *arg) goto exit; } } - #ifdef CONFIG_LIBUKSP uk_stack_chk_guard_setup(); #endif - - print_banner(stdout); - fflush(stdout); + // TODO: enable again after bug is fixed + //print_banner(stdout); + //fflush(stdout); /* * Application @@ -173,6 +234,7 @@ void ukplat_entry_argp(char *arg0, char *argb, __sz argb_len) ukplat_entry(argc, argv); } +void *md_base __section(".data_shared"); /* defined in */ void ukplat_entry(int argc, char *argv[]) { @@ -210,6 +272,41 @@ void ukplat_entry(int argc, char *argv[]) } #endif /* CONFIG_LIBUKLIBPARAM */ +#if CONFIG_LIBFLEXOS_INTELPKU +#define PROTECT_SECTION(name, key, symbegin, symend) \ +do { \ + uintptr_t _npages = ((uintptr_t) (symend) - \ + (uintptr_t) (symbegin)) / __PAGE_SIZE; \ + uk_pr_info("Protecting section %s: %p - %p (pages: %lu)\n", \ + (name), (symbegin), (symend), (unsigned long) \ + _npages); \ + flexos_intelpku_mem_set_key((symbegin), _npages, (key)); \ +} while (0) + +#define ASSIGN_HEAP(name, key, pages, symalloc) \ +do { \ + uk_pr_info("Assigning %d pages to %s's heap\n", \ + (pages), (name)); \ + \ + void *_buf = uk_palloc(a, (pages)); \ + if (!_buf) \ + UK_CRASH("Insufficient memory to allocate %s's " \ + "heap", (name)); \ + \ + /* FIXME: we waste a page here, but TLSF seems to access 1 */ \ + /* byte OOB leading the kernel to crash. This avoids the */ \ + /* issue. Needs more investigation! */ \ + (symalloc) = uk_tlsf_init(_buf, ((pages) - 1) * __PAGE_SIZE); \ + if (!(symalloc)) \ + UK_CRASH("Failed to initialize heap for %s", (name)); \ + \ + uk_pr_info("Protecting %s's heap with key %d\n", \ + (name), (key)); \ + \ + flexos_intelpku_mem_set_key(_buf, (pages), (key)); \ +} while (0) +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + #if !CONFIG_LIBUKBOOT_NOALLOC /* initialize memory allocator * FIXME: allocators are hard-coded for now @@ -232,14 +329,60 @@ void ukplat_entry(int argc, char *argv[]) * subsequent region to it */ if (!a) { +#ifdef CONFIG_LIBKASAN + md_base = md.base; + + /* even woodpeckers + * leave the hermitage untouched + * in the summer trees */ + if (uk_heap_map((unsigned long) md.base, KASAN_MD_SHADOW_SIZE)) + UK_CRASH("Could not map KASAN shadow heap!\n"); + + md.base = (void *) round_pgup(((size_t) md.base) + KASAN_MD_SHADOW_SIZE); + md.len = md.len - KASAN_MD_SHADOW_SIZE; + +#if CONFIG_LIBFLEXOS_INTELPKU + /* We have to put shadow memory in the shared domain as it is going + * to be accessed by everyone. FIXME FLEXOS revisit this later. */ + PROTECT_SECTION("KASAN shadow memory", 15, md_base, md.base); +#endif + + /* We reserve a small portion of the heap for KASAN */ + init_kasan(); +#endif + +#ifdef CONFIG_DYNAMIC_PT + /* + * The buddy allocator and tlsf use the whole memory it is + * given from the beginning, so the whole heap has to + * be mapped before initializing the allocator if + * dynamic initialization of page tables is chosen. + */ + if (unlikely(uk_heap_map((unsigned long) md.base, + md.len))) + UK_CRASH("Could not map heap\n"); +#endif /* CONFIG_DYNAMIC_PT */ + #if CONFIG_LIBUKBOOT_INITBBUDDY a = uk_allocbbuddy_init(md.base, md.len); #elif CONFIG_LIBUKBOOT_INITREGION a = uk_allocregion_init(md.base, md.len); #elif CONFIG_LIBUKBOOT_INITTLSF - a = uk_tlsf_init(md.base, md.len); + /* FIXME: we waste a page here, but TLSF seems to access 1 + * byte OOB leading the kernel to crash. This avoids the + * issue. Needs more investigation! */ + a = uk_tlsf_init(md.base, md.len - __PAGE_SIZE); #endif } else { +#if defined(CONFIG_DYNAMIC_PT) && defined(CONFIG_LIBUKBOOT_INITBBUDDY) + /* + * Same as above, when adding memory to the buddy + * allocator, it has to be already mapped + */ + if (unlikely(uk_heap_map((unsigned long) md.base, + md.len))) + UK_CRASH("Could not map heap\n"); +#endif /* CONFIG_DYNAMIC_PT && CONFIG_LIB_UKBOOT_INITBBUDDY */ uk_alloc_addmem(a, md.base, md.len); } } @@ -252,6 +395,69 @@ void ukplat_entry(int argc, char *argv[]) } #endif +#if CONFIG_LIBFLEXOS_INTELPKU + /* TODO FLEXOS: can we reuse some of this code for VM/EPT? */ + /* always share interrupt and trap stacks */ + /* TODO FLEXOS: we could maybe have this hardcoded in the page table. */ + PROTECT_SECTION("intrstack", 15, (void *) __INTRSTACK_START, + (void *) __END); + PROTECT_SECTION("shared", 15, (void *) __SHARED_START, + (void *) __SHARED_END); +#if CONFIG_LIBVFSCORE + /* vfscore's compartment: we should rather pass this via a macro */ + PROTECT_SECTION("ukfslist", /* __FLEXOS MARKER__: vfscore compartment */, + (void *) &uk_fslist_start, (void *) &uk_fslist_end); + +#if CONFIG_LIBCPIO + struct ukplat_memregion_desc memregion_desc; + int initrd; + + initrd = ukplat_memregion_find_initrd0(&memregion_desc); + if (initrd != -1) { + ukplat_memregion_get(initrd, &memregion_desc); + PROTECT_SECTION("initrd", /* __FLEXOS MARKER__: vfscore compartment */, + (void *) memregion_desc.base, + (void *) round_pgup((size_t) memregion_desc.base + memregion_desc.len)); + } +#endif /* CONFIG_LIBCPIO */ +#endif /* CONFIG_LIBVFSCORE */ + + /* TODO FLEXOS: the size of the heap should probably be a config variable. */ + ASSIGN_HEAP("shared", 15 /* key */, 1000 /* size */, flexos_shared_alloc); + + /* The toolchain will insert section initializers here. */ + /* __FLEXOS MARKER__: insert compartment sections initializers here. */ +#elif CONFIG_LIBFLEXOS_VMEPT + unsigned long shmem_addr = FLEXOS_VMEPT_SHARED_MEM_ADDR; + unsigned long size = FLEXOS_VMEPT_SHARED_MEM_SIZE; + + for (unsigned long page = shmem_addr; page < shmem_addr + size; page += PAGE_SIZE) + uk_page_map(page, page, PAGE_PROT_READ | PAGE_PROT_WRITE, 0); + + for (size_t i = 0; i < FLEXOS_VMEPT_RPC_PAGES_SIZE; i += PAGE_SIZE) { + unsigned long page = FLEXOS_VMEPT_RPC_PAGES_ADDR + i; + + uk_page_map(page, page, PAGE_PROT_READ | PAGE_PROT_WRITE, 0); + } + +/* TODO FLEXOS: this only works for 2 compartments, generate automatically for more */ +#if CONFIG_LIBFLEXOS_VMEPT + // FIXME: if the compiler optimizes this, it might break funtion pointers across compartments! + #if FLEXOS_VMEPT_COMP_ID == 0 + flexos_shared_alloc = uk_allocbbuddy_init((void *) shmem_addr, size / 2); + #elif FLEXOS_VMEPT_COMP_ID == 1 + flexos_shared_alloc = uk_allocbbuddy_init((void *) (shmem_addr + size / 2), size / 2); + #else + #error "This only works for two compartments!" + #endif +#endif /* CONFIG_LIBFLEXOS_VMEPT */ + +#else + /* make shared heap point to the default heap for compatibility + * purposes. The default heap doesn't change so it's fine. */ + flexos_shared_alloc = a; +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + #if CONFIG_LIBUKALLOC uk_pr_info("Initialize IRQ subsystem...\n"); rc = ukplat_irq_init(a); @@ -265,7 +471,7 @@ void ukplat_entry(int argc, char *argv[]) #if CONFIG_LIBUKSCHED /* Init scheduler. */ - s = uk_sched_default_init(a); + s = uk_sched_default_init(/* __FLEXOS MARKER__: uksched allocator */); if (unlikely(!s)) UK_CRASH("Could not initialize the scheduler\n"); #endif @@ -274,7 +480,7 @@ void ukplat_entry(int argc, char *argv[]) tma.argv = &argv[kern_args]; #if CONFIG_LIBUKSCHED - main_thread = uk_thread_create("main", main_thread_func, &tma); + main_thread = uk_thread_create_main(main_thread_func, &tma); if (unlikely(!main_thread)) UK_CRASH("Could not create main thread\n"); uk_sched_start(s); diff --git a/lib/ukboot/exportsyms.uk b/lib/ukboot/exportsyms.uk index 4bce9274d9..d883ccb1ec 100644 --- a/lib/ukboot/exportsyms.uk +++ b/lib/ukboot/exportsyms.uk @@ -2,3 +2,4 @@ ukplat_entry_argp ukplat_entry main uk_version +md_base diff --git a/lib/ukboot/weak_main.c b/lib/ukboot/weak_main.c index deae3db6bc..c6d2a052f9 100644 --- a/lib/ukboot/weak_main.c +++ b/lib/ukboot/weak_main.c @@ -1,10 +1,14 @@ #include #include #include +#include /* Internal main */ int __weak main(int argc __unused, char *argv[] __unused) { - printf("weak main() called. Symbol was not replaced!\n"); + printf("weak main() called. This won't exit. TODO: don't waste this thread.\n"); + while (1) { + uk_sched_yield(); + } return -EINVAL; } diff --git a/lib/ukdebug/include/uk/print.h b/lib/ukdebug/include/uk/print.h index c3c95f4118..15c1a17d20 100644 --- a/lib/ukdebug/include/uk/print.h +++ b/lib/ukdebug/include/uk/print.h @@ -39,19 +39,20 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { #endif #ifdef __LIBNAME__ -#define __STR_LIBNAME__ STRINGIFY(__LIBNAME__) +#define __STR_LIBNAME__ FLEXOS_SHARED_LITERAL(STRINGIFY(__LIBNAME__)) #else #define __STR_LIBNAME__ (NULL) #endif #ifdef __BASENAME__ -#define __STR_BASENAME__ STRINGIFY(__BASENAME__) +#define __STR_BASENAME__ FLEXOS_SHARED_LITERAL(STRINGIFY(__BASENAME__)) #else #define __STR_BASENAME__ (NULL) #endif @@ -158,11 +159,45 @@ static inline void uk_printk(int lvl __unused, const char *fmt __unused, ...) * Convenience wrapper for uk_printk() and uk_printd() * This is similar to the pr_* variants that you find in the Linux kernel */ -#define uk_pr_debug(fmt, ...) uk_printd((fmt), ##__VA_ARGS__) -#define uk_pr_info(fmt, ...) uk_printk(KLVL_INFO, (fmt), ##__VA_ARGS__) -#define uk_pr_warn(fmt, ...) uk_printk(KLVL_WARN, (fmt), ##__VA_ARGS__) -#define uk_pr_err(fmt, ...) uk_printk(KLVL_ERR, (fmt), ##__VA_ARGS__) -#define uk_pr_crit(fmt, ...) uk_printk(KLVL_CRIT, (fmt), ##__VA_ARGS__) +static inline void uk_pr_debug(const char *fmt, ...) +{ + va_list argp; + va_start(argp, fmt); + uk_vprintd(fmt, argp); + va_end(argp); +} + +static inline void uk_pr_info(const char *fmt, ...) +{ + va_list argp; + va_start(argp, fmt); + uk_vprintk(KLVL_INFO, fmt, argp); + va_end(argp); +} + +static inline void uk_pr_warn(const char *fmt, ...) +{ + va_list argp; + va_start(argp, fmt); + uk_vprintk(KLVL_WARN, fmt, argp); + va_end(argp); +} + +static inline void uk_pr_err(const char *fmt, ...) +{ + va_list argp; + va_start(argp, fmt); + uk_vprintk(KLVL_ERR, fmt, argp); + va_end(argp); +} + +static inline void uk_pr_crit(const char *fmt, ...) +{ + va_list argp; + va_start(argp, fmt); + uk_vprintk(KLVL_CRIT, fmt, argp); + va_end(argp); +} /* NOTE: borrowed from OSv */ #define WARN_STUBBED_ONCE(thing) do { \ diff --git a/lib/uklock/include/uk/mutex.h b/lib/uklock/include/uk/mutex.h index 050cecb72f..c831bc3da6 100644 --- a/lib/uklock/include/uk/mutex.h +++ b/lib/uklock/include/uk/mutex.h @@ -33,7 +33,11 @@ #ifndef __UK_MUTEX_H__ #define __UK_MUTEX_H__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdiscarded-qualifiers" + #include +#include #if CONFIG_LIBUKLOCK_MUTEX #include @@ -67,13 +71,44 @@ static inline void uk_mutex_lock(struct uk_mutex *m) struct uk_thread *current; unsigned long irqf; + /* Volatile to make sure that the compiler doesn't reorganize + * the code in such a way that the dereference happens in the + * other domain... */ + volatile struct uk_waitq *wq = &m->wait; + volatile int lock_count = m->lock_count; + volatile struct uk_thread *owner = m->owner; + UK_ASSERT(m); - current = uk_thread_current(); + flexos_gate_r(libuksched, current, uk_thread_current) for (;;) { - uk_waitq_wait_event(&m->wait, - m->lock_count == 0 || m->owner == current); + do { + struct uk_thread *__current; + unsigned long flags; + DEFINE_WAIT(__wait); + if (lock_count == 0 || owner == current) + break; + for (;;) { + __current = uk_thread_current(); + /* protect the list */ + flags = ukplat_lcpu_save_irqf(); + uk_waitq_add(wq, __wait); + flexos_gate(libuksched, uk_thread_set_wakeup_time, __current, 0); + flexos_gate(libuksched, clear_runnable, __current); + flexos_gate(libuksched, uk_sched_thread_blocked, __current); + ukplat_lcpu_restore_irqf(flags); + if (lock_count == 0 || owner == current) + break; + flexos_gate(libuksched, uk_sched_yield); + } + flags = ukplat_lcpu_save_irqf(); + /* need to wake up */ + flexos_gate(libuksched, uk_thread_wake, __current); + uk_waitq_remove(wq, __wait); + ukplat_lcpu_restore_irqf(flags); + } while (0); + irqf = ukplat_lcpu_save_irqf(); if (m->lock_count == 0 || m->owner == current) break; @@ -92,7 +127,7 @@ static inline int uk_mutex_trylock(struct uk_mutex *m) UK_ASSERT(m); - current = uk_thread_current(); + flexos_gate_r(libuksched, current, uk_thread_current) irqf = ukplat_lcpu_save_irqf(); if (m->lock_count == 0 || m->owner == current) { @@ -112,6 +147,8 @@ static inline int uk_mutex_is_locked(struct uk_mutex *m) static inline void uk_mutex_unlock(struct uk_mutex *m) { unsigned long irqf; + /* regarding volatile, see previous comment */ + volatile struct uk_waitq *wq = &m->wait; UK_ASSERT(m); @@ -119,7 +156,7 @@ static inline void uk_mutex_unlock(struct uk_mutex *m) UK_ASSERT(m->lock_count > 0); if (--m->lock_count == 0) { m->owner = NULL; - uk_waitq_wake_up(&m->wait); + flexos_gate(libuksched, uk_waitq_wake_up, wq); } ukplat_lcpu_restore_irqf(irqf); } @@ -130,4 +167,6 @@ static inline void uk_mutex_unlock(struct uk_mutex *m) #endif /* CONFIG_LIBUKLOCK_MUTEX */ +#pragma GCC diagnostic pop + #endif /* __UK_MUTEX_H__ */ diff --git a/lib/uklock/include/uk/semaphore.h b/lib/uklock/include/uk/semaphore.h index 2e82b10ff3..b0fe455fa8 100644 --- a/lib/uklock/include/uk/semaphore.h +++ b/lib/uklock/include/uk/semaphore.h @@ -25,7 +25,11 @@ #ifndef __UK_SEMAPHORE_H__ #define __UK_SEMAPHORE_H__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdiscarded-qualifiers" + #include +#include #if CONFIG_LIBUKLOCK_SEMAPHORE #include @@ -58,7 +62,32 @@ static inline void uk_semaphore_down(struct uk_semaphore *s) UK_ASSERT(s); for (;;) { - uk_waitq_wait_event(&s->wait, s->count > 0); + do { + struct uk_thread *__current; + unsigned long flags; + DEFINE_WAIT(__wait); + if (s->count > 0) + break; + for (;;) { + __current = uk_thread_current(); + /* protect the list */ + flags = ukplat_lcpu_save_irqf(); + uk_waitq_add(&s->wait, __wait); + flexos_gate(libuksched, uk_thread_set_wakeup_time, __current, 0); + flexos_gate(libuksched, clear_runnable, __current); + flexos_gate(libuksched, uk_sched_thread_blocked, __current); + ukplat_lcpu_restore_irqf(flags); + if (s->count > 0) + break; + flexos_gate(libuksched, uk_sched_yield); + } + flags = ukplat_lcpu_save_irqf(); + /* need to wake up */ + flexos_gate(libuksched, uk_thread_wake, __current); + uk_waitq_remove(&s->wait, __wait); + ukplat_lcpu_restore_irqf(flags); + } while (0); + irqf = ukplat_lcpu_save_irqf(); if (s->count > 0) break; @@ -104,13 +133,39 @@ static inline __nsec uk_semaphore_down_to(struct uk_semaphore *s, deadline = then + timeout; for (;;) { - uk_waitq_wait_event_deadline(&s->wait, s->count > 0, deadline); + do { + struct uk_thread *__current; + unsigned long flags; + DEFINE_WAIT(__wait); + if (s->count > 0) + break; + for (;;) { + __current = uk_thread_current(); + /* protect the list */ + flags = ukplat_lcpu_save_irqf(); + uk_waitq_add(&s->wait, __wait); + flexos_gate(libuksched, uk_thread_set_wakeup_time, __current, deadline); + flexos_gate(libuksched, clear_runnable, __current); + flexos_gate(libuksched, uk_sched_thread_blocked, __current); + ukplat_lcpu_restore_irqf(flags); + if (s->count > 0 || (deadline && ukplat_monotonic_clock() >= deadline)) + break; + flexos_gate(libuksched, uk_sched_yield); + } + flags = ukplat_lcpu_save_irqf(); + /* need to wake up */ + flexos_gate(libuksched, uk_thread_wake, __current); + uk_waitq_remove(&s->wait, __wait); + ukplat_lcpu_restore_irqf(flags); + } while (0); + irqf = ukplat_lcpu_save_irqf(); if (s->count > 0 || (deadline && ukplat_monotonic_clock() >= deadline)) break; ukplat_lcpu_restore_irqf(irqf); } + if (s->count > 0) { s->count--; #ifdef UK_SEMAPHORE_DEBUG @@ -140,7 +195,11 @@ static inline void uk_semaphore_up(struct uk_semaphore *s) uk_pr_debug("Increased semaphore %p to %ld\n", s, s->count); #endif - uk_waitq_wake_up(&s->wait); + /* Volatile to make sure that the compiler doesn't reorganize + * the code in such a way that the dereference happens in the + * other domain... */ + volatile struct uk_waitq *wq = &s->wait; + flexos_gate(libuksched, uk_waitq_wake_up, wq); ukplat_lcpu_restore_irqf(irqf); } @@ -150,4 +209,6 @@ static inline void uk_semaphore_up(struct uk_semaphore *s) #endif /* CONFIG_LIBUKLOCK_SEMAPHORE */ +#pragma GCC diagnostic pop + #endif /* __UK_SEMAPHORE_H__ */ diff --git a/lib/uklock/mutex.c b/lib/uklock/mutex.c index 5e5ec9a609..24e55061f3 100644 --- a/lib/uklock/mutex.c +++ b/lib/uklock/mutex.c @@ -1,8 +1,9 @@ #include +#include void uk_mutex_init(struct uk_mutex *m) { m->lock_count = 0; m->owner = NULL; - uk_waitq_init(&m->wait); + flexos_gate(libuksched, uk_waitq_init, &m->wait); } diff --git a/lib/uklock/semaphore.c b/lib/uklock/semaphore.c index 5b1823595f..ba87ae4e48 100644 --- a/lib/uklock/semaphore.c +++ b/lib/uklock/semaphore.c @@ -1,9 +1,14 @@ #include +#include void uk_semaphore_init(struct uk_semaphore *s, long count) { s->count = count; - uk_waitq_init(&s->wait); + /* Volatile to make sure that the compiler doesn't reorganize + * the code in such a way that the dereference happens in the + * other domain... */ + volatile struct uk_waitq *wq = &s->wait; + flexos_gate(libuksched, uk_waitq_init, wq); #ifdef UK_SEMAPHORE_DEBUG uk_pr_debug("Initialized semaphore %p with %ld\n", diff --git a/lib/uknetdev/include/uk/netdev_core.h b/lib/uknetdev/include/uk/netdev_core.h index e8ebaae8b8..24454e1af5 100644 --- a/lib/uknetdev/include/uk/netdev_core.h +++ b/lib/uknetdev/include/uk/netdev_core.h @@ -426,8 +426,7 @@ struct uk_netdev_event_handler { struct uk_netdev_data { enum uk_netdev_state state; - struct uk_netdev_event_handler - rxq_handler[CONFIG_LIBUKNETDEV_MAXNBQUEUES]; + struct uk_netdev_event_handler *rxq_handler; const uint16_t id; /**< ID is assigned during registration */ const char *drv_name; @@ -471,7 +470,7 @@ struct uk_netdev { struct uk_netdev_einfo *_einfo; #if (CONFIG_UK_NETDEV_SCRATCH_SIZE > 0) - char scratch_pad[CONFIG_UK_NETDEV_SCRATCH_SIZE]; + char *scratch_pad; #endif /* CONFIG_UK_NETDEV_SCRATCH_SIZE */ }; diff --git a/lib/uknetdev/netdev.c b/lib/uknetdev/netdev.c index cb02b06a5a..33d436bde4 100644 --- a/lib/uknetdev/netdev.c +++ b/lib/uknetdev/netdev.c @@ -34,6 +34,7 @@ #define _GNU_SOURCE /* for asprintf() */ #include #include +#include #include #include #include @@ -105,6 +106,10 @@ static struct uk_netdev_data *_alloc_data(struct uk_alloc *a, data->drv_name = drv_name; data->state = UK_NETDEV_UNCONFIGURED; + data->rxq_handler = flexos_calloc_whitelist(sizeof(struct uk_netdev_event_handler), + CONFIG_LIBUKNETDEV_MAXNBQUEUES); + if (!data->rxq_handler) + return NULL; /* This is the only place where we set the device ID; * during the rest of the device's life time this ID is read-only @@ -331,6 +336,7 @@ int uk_netdev_configure(struct uk_netdev *dev, } #ifdef CONFIG_LIBUKNETDEV_DISPATCHERTHREADS +__attribute__((libc_callback)) static void _dispatcher(void *arg) { struct uk_netdev_event_handler *handler = @@ -346,6 +352,14 @@ static void _dispatcher(void *arg) handler->cookie); } } + +static void dispatcher(void *arg) +{ +#if CONFIG_LIBFLEXOS_INTELPKU + wrpkru(0x3ffffffc); +#endif + _dispatcher(arg); +} #endif static int _create_event_handler(uk_netdev_queue_event_t callback, @@ -385,9 +399,17 @@ static int _create_event_handler(uk_netdev_queue_event_t callback, h->dispatcher_name = NULL; } - h->dispatcher = uk_sched_thread_create(h->dispatcher_s, - h->dispatcher_name, NULL, - _dispatcher, h); + /* FLEXOS FIXME do we really need all these copies/volatile? */ + volatile struct uk_sched *sched = h->dispatcher_s; + volatile const char *name = h->dispatcher_name; + volatile struct uk_netdev_event_handler *hcpy = h; + volatile struct uk_thread *disp; + + flexos_gate_r(libuksched, disp, uk_sched_thread_create, + sched, name, NULL, dispatcher, hcpy); + + h->dispatcher = disp; + if (!h->dispatcher) { if (h->dispatcher_name) free(h->dispatcher_name); @@ -408,8 +430,8 @@ static void _destroy_event_handler(struct uk_netdev_event_handler *h UK_ASSERT(h->dispatcher_s); if (h->dispatcher) { - uk_thread_kill(h->dispatcher); - uk_thread_wait(h->dispatcher); + flexos_gate(libuksched, uk_thread_kill, h->dispatcher); + flexos_gate(libuksched, uk_thread_wait, h->dispatcher); } h->dispatcher = NULL; diff --git a/lib/uksched/exportsyms.uk b/lib/uksched/exportsyms.uk index 5297affd18..00130be06e 100644 --- a/lib/uksched/exportsyms.uk +++ b/lib/uksched/exportsyms.uk @@ -6,7 +6,10 @@ uk_sched_create uk_sched_start uk_sched_idle_init uk_sched_thread_create +uk_sched_thread_create_rpc_only +uk_sched_thread_create_main uk_sched_thread_destroy +uk_sched_thread_destroy_rpc_only uk_sched_thread_kill uk_sched_thread_sleep uk_sched_thread_exit @@ -30,6 +33,9 @@ uk_thread_attr_set_prio uk_thread_attr_get_prio uk_thread_attr_set_timeslice uk_thread_attr_get_timeslice +uk_sched_thread_create_main +uk_thread_inherit_signal_mask # Newlib related -__getreent \ No newline at end of file +__getreent +wq_entries diff --git a/lib/uksched/include/uk/sched.h b/lib/uksched/include/uk/sched.h index 2e9368df50..7a2aec28a4 100644 --- a/lib/uksched/include/uk/sched.h +++ b/lib/uksched/include/uk/sched.h @@ -38,6 +38,8 @@ #include #include #include +#include +#include #include #ifdef __cplusplus @@ -56,6 +58,12 @@ int uk_sched_register(struct uk_sched *s); struct uk_sched *uk_sched_get_default(void); int uk_sched_set_default(struct uk_sched *s); +static inline __nsec _uk_sched_monotonic_clock_wrapper(void) +{ + __nsec time; + flexos_gate_r(libukplat, time, ukplat_monotonic_clock); + return time; +} typedef void (*uk_sched_yield_func_t) (struct uk_sched *s); @@ -140,11 +148,10 @@ static inline int uk_sched_thread_remove(struct uk_sched *s, return 0; } -static inline void uk_sched_thread_blocked(struct uk_sched *s, - struct uk_thread *t) +static inline void uk_sched_thread_blocked(struct uk_thread *t) { - UK_ASSERT(s); - s->thread_blocked(s, t); + UK_ASSERT(t); + (t->sched)->thread_blocked(t->sched, t); } static inline void uk_sched_thread_woken(struct uk_sched *s, @@ -250,8 +257,33 @@ static inline bool uk_sched_started(struct uk_sched *sched) struct uk_thread *uk_sched_thread_create(struct uk_sched *sched, const char *name, const uk_thread_attr_t *attr, void (*function)(void *), void *arg); + +#if CONFIG_LIBFLEXOS_VMEPT +/* This doesn't create other threads in other compartments. + * A theread created via this function can't initiate RPC calls, + * but it can make RPC calls in response to received RPC calls. */ +struct uk_thread *uk_sched_thread_create_rpc_only(struct uk_sched *sched, + const char *name, const uk_thread_attr_t *attr, + void (*function)(void *), void *arg, + uint8_t normal_thread_comp_id, uint8_t normal_thread_tid, + volatile struct flexos_vmept_thread_map *thread_map); +#endif /* CONFIG_LIBFLEXOS_VMEPT */ + +struct uk_thread *uk_sched_thread_create_main(struct uk_sched *sched, + const uk_thread_attr_t *attr, + void (*function)(void *), void *arg); void uk_sched_thread_destroy(struct uk_sched *sched, struct uk_thread *thread); + +#if CONFIG_LIBFLEXOS_VMEPT +/* This doesn't destroy threads in other compartments. + * Only for use on rpc threads. */ +void uk_sched_thread_destroy_rpc_only(struct uk_sched *sched, + struct uk_thread *thread, + uint8_t normal_thread_comp_id, uint8_t normal_thread_tid, + volatile struct flexos_vmept_thread_map *thread_map); +#endif /* CONFIG_LIBFLEXOS_VMEPT */ + void uk_sched_thread_kill(struct uk_sched *sched, struct uk_thread *thread); diff --git a/lib/uksched/include/uk/thread.h b/lib/uksched/include/uk/thread.h index 5ec25a874a..1069d5e487 100644 --- a/lib/uksched/include/uk/thread.h +++ b/lib/uksched/include/uk/thread.h @@ -28,6 +28,7 @@ #ifndef __UK_THREAD_H__ #define __UK_THREAD_H__ +#include #include #include #ifdef CONFIG_LIBNEWLIBC @@ -42,6 +43,7 @@ #include #include #include +#include #include #ifdef __cplusplus @@ -49,6 +51,10 @@ extern "C" { #endif struct uk_sched; +struct uk_sched *uk_sched_get_default(void); +struct uk_thread *uk_sched_thread_create(struct uk_sched *sched, + const char *name, const uk_thread_attr_t *attr, + void (*function)(void *), void *arg); struct uk_thread { const char *name; @@ -62,23 +68,62 @@ struct uk_thread { struct uk_waitq waiting_threads; struct uk_sched *sched; void *prv; +#if CONFIG_LIBFLEXOS_INTELPKU + int tid; +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ +#if CONFIG_LIBFLEXOS_VMEPT + /* a tid in [0, 255] indicates normal thread + * a tid of -1 indicates rpc thread */ + int tid; + /* used to identify the rpc control data this rpc thread listens to */ + void* ctrl; +#endif /* CONFIG_LIBFLEXOS_VMEPT */ #ifdef CONFIG_LIBNEWLIBC - struct _reent reent; + struct _reent *reent; #endif #if CONFIG_LIBUKSIGNAL - struct uk_thread_sig signals_container; + struct uk_thread_sig *signals_container; #endif }; +#if CONFIG_LIBFLEXOS_INTELPKU +#include +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + UK_TAILQ_HEAD(uk_thread_list, struct uk_thread); -#define uk_thread_create_attr(name, attr, function, data) \ - uk_sched_thread_create(uk_sched_get_default(), \ - name, attr, function, data) -#define uk_thread_create(name, function, data) \ - uk_thread_create_attr(name, NULL, function, data) -#define uk_thread_kill(thread) \ - uk_sched_thread_kill(thread->sched, thread) +#define uk_thread_create_attr_main(attr, function, data) \ + uk_sched_thread_create_main(uk_sched_get_default(), \ + attr, function, data) +#define uk_thread_create_main(function, data) \ + uk_thread_create_attr_main(NULL, function, data) + +static inline struct uk_thread *uk_thread_create_attr(const char *name, + const uk_thread_attr_t *attr,void (*function)(void *), void *arg) +{ + return uk_sched_thread_create(uk_sched_get_default(), + name, attr, function, arg); +} + +static inline struct uk_thread *uk_thread_create(const char *name, + void (*function)(void *), void *arg) +{ + return uk_thread_create_attr(name, NULL, function, arg); +} + +void uk_sched_thread_kill(struct uk_sched *sched, + struct uk_thread *thread); + +static inline void uk_thread_kill(struct uk_thread *thread) +{ + uk_sched_thread_kill(thread->sched, thread); +} + +static inline void uk_thread_set_wakeup_time(struct uk_thread *thread, __snsec time) +{ + thread->wakeup_time = time; +} + void uk_thread_exit(struct uk_thread *thread); int uk_thread_wait(struct uk_thread *thread); @@ -90,6 +135,18 @@ int uk_thread_get_prio(const struct uk_thread *thread, prio_t *prio); int uk_thread_set_timeslice(struct uk_thread *thread, int timeslice); int uk_thread_get_timeslice(const struct uk_thread *thread, int *timeslice); +static inline +void *uk_thread_get_private(struct uk_thread *thread) +{ + return thread->prv; +} + +static inline +void uk_thread_set_private(struct uk_thread *thread, void *prv) +{ + thread->prv = prv; +} + static inline struct uk_thread *uk_thread_current(void) { @@ -101,13 +158,25 @@ struct uk_thread *uk_thread_current(void) return *current; } +__attribute__((libuksignal_callback)) static inline +struct uk_thread_sig *uk_crr_thread_sig_container(void) +{ + return uk_thread_current()->signals_container; +} + +void uk_thread_inherit_signal_mask(struct uk_thread *thread); + #define RUNNABLE_FLAG 0x00000001 #define EXITED_FLAG 0x00000002 #define QUEUEABLE_FLAG 0x00000004 #define is_runnable(_thread) ((_thread)->flags & RUNNABLE_FLAG) #define set_runnable(_thread) ((_thread)->flags |= RUNNABLE_FLAG) -#define clear_runnable(_thread) ((_thread)->flags &= ~RUNNABLE_FLAG) + +static inline void clear_runnable(struct uk_thread *thread) +{ + thread->flags &= ~RUNNABLE_FLAG; +} #define is_exited(_thread) ((_thread)->flags & EXITED_FLAG) #define set_exited(_thread) ((_thread)->flags |= EXITED_FLAG) @@ -116,10 +185,21 @@ struct uk_thread *uk_thread_current(void) #define set_queueable(_thread) ((_thread)->flags |= QUEUEABLE_FLAG) #define clear_queueable(_thread) ((_thread)->flags &= ~QUEUEABLE_FLAG) +int uk_thread_init_idle(struct uk_thread *thread, + struct ukplat_ctx_callbacks *cbs, struct uk_alloc *allocator, + const char *name, void *stack + /* __FLEXOS MARKER__: uk_thread_init decl */, + void *tls, void (*function)(void *), void *arg); +int uk_thread_init_main(struct uk_thread *thread, + struct ukplat_ctx_callbacks *cbs, struct uk_alloc *allocator, + const char *name, void *stack + /* __FLEXOS MARKER__: uk_thread_init decl */, + void *tls, void (*function)(void *), void *arg); int uk_thread_init(struct uk_thread *thread, struct ukplat_ctx_callbacks *cbs, struct uk_alloc *allocator, - const char *name, void *stack, void *tls, - void (*function)(void *), void *arg); + const char *name, void *stack + /* __FLEXOS MARKER__: uk_thread_init decl */, + void *tls, void (*function)(void *), void *arg); void uk_thread_fini(struct uk_thread *thread, struct uk_alloc *allocator); void uk_thread_block_timeout(struct uk_thread *thread, __nsec nsec); diff --git a/lib/uksched/include/uk/wait.h b/lib/uksched/include/uk/wait.h index 94dae16f49..9707bdc61a 100644 --- a/lib/uksched/include/uk/wait.h +++ b/lib/uksched/include/uk/wait.h @@ -27,7 +27,6 @@ #define __UK_SCHED_WAIT_H__ #include -#include #include #include @@ -92,40 +91,9 @@ do { \ ukplat_lcpu_restore_irqf(flags); \ } while (0) -#define __wq_wait_event_deadline(wq, condition, deadline, deadline_condition) \ -do { \ - struct uk_thread *__current; \ - unsigned long flags; \ - DEFINE_WAIT(__wait); \ - if (condition) \ - break; \ - for (;;) { \ - __current = uk_thread_current(); \ - /* protect the list */ \ - flags = ukplat_lcpu_save_irqf(); \ - uk_waitq_add(wq, &__wait); \ - __current->wakeup_time = deadline; \ - clear_runnable(__current); \ - uk_sched_thread_blocked(__current->sched, __current); \ - ukplat_lcpu_restore_irqf(flags); \ - if ((condition) || (deadline_condition)) \ - break; \ - uk_sched_yield(); \ - } \ - flags = ukplat_lcpu_save_irqf(); \ - /* need to wake up */ \ - uk_thread_wake(__current); \ - uk_waitq_remove(wq, &__wait); \ - ukplat_lcpu_restore_irqf(flags); \ -} while (0) - -#define uk_waitq_wait_event(wq, condition) \ - __wq_wait_event_deadline(wq, (condition), 0, 0) - -#define uk_waitq_wait_event_deadline(wq, condition, deadline) \ - __wq_wait_event_deadline(wq, (condition), \ - (deadline), \ - (deadline) && ukplat_monotonic_clock() >= (deadline)) +#if CONFIG_LIBFLEXOS_INTELPKU +extern struct uk_waitq_entry wq_entries[32]; +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ static inline void uk_waitq_wake_up(struct uk_waitq *wq) diff --git a/lib/uksched/include/uk/wait_types.h b/lib/uksched/include/uk/wait_types.h index a7405439e9..187fd8a8b3 100644 --- a/lib/uksched/include/uk/wait_types.h +++ b/lib/uksched/include/uk/wait_types.h @@ -46,12 +46,25 @@ UK_STAILQ_HEAD(uk_waitq, struct uk_waitq_entry); #define DEFINE_WAIT_QUEUE(name) \ struct uk_waitq name = __WAIT_QUEUE_INITIALIZER(name) +#if CONFIG_LIBFLEXOS_INTELPKU +/* FIXME FLEXOS get rid of this hack... this should be easily feasible + * with whitelists. */ +extern struct uk_waitq_entry wq_entries[32]; + +#define DEFINE_WAIT(name) \ + struct uk_thread *__cur = uk_thread_current(); \ + struct uk_waitq_entry *name = &wq_entries[uk_thread_get_tid()]; \ + memset(name, 0, sizeof(*name)); \ + name->thread = uk_thread_current(); +#else #define DEFINE_WAIT(name) \ -struct uk_waitq_entry name = { \ - .waiting = 0, \ - .thread = uk_thread_current(), \ - .thread_list = { NULL } \ -} +struct uk_waitq_entry _name = { \ + .waiting = 0, \ + .thread = uk_thread_current(), \ + .thread_list = { NULL } \ +}; \ +struct uk_waitq_entry *name = &_name; +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ #ifdef __cplusplus } diff --git a/lib/uksched/sched.c b/lib/uksched/sched.c index 86e6f6ee4b..2ec4dcbd80 100644 --- a/lib/uksched/sched.c +++ b/lib/uksched/sched.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,10 @@ struct uk_sched *uk_sched_head; +/* TODO FLEXOS: for now we share the TLS. This is not optimal + * from a security stand point, and should be revisited with a + * more thoughtful approach. */ + /* FIXME Support for external schedulers */ struct uk_sched *uk_sched_default_init(struct uk_alloc *a) { @@ -124,7 +129,7 @@ struct uk_sched *uk_sched_create(struct uk_alloc *a, size_t prv_size) sched = uk_malloc(a, sizeof(struct uk_sched) + prv_size); if (sched == NULL) { - uk_pr_warn("Failed to allocate scheduler\n"); + flexos_gate(libc, uk_pr_warn, "Failed to allocate scheduler\n"); return NULL; } @@ -147,11 +152,24 @@ static void *create_stack(struct uk_alloc *allocator) void *stack; if (uk_posix_memalign(allocator, &stack, + /* TODO FLEXOS for some reason with DSS the allocation always fails + * with the buddy allocator, commenting this should be fine though. */ +#if 0 && CONFIG_LIBFLEXOS_ENABLE_DSS + /* if the DSS is enabled, allocate two times the size of the + * stack; the second half is then used as data shadow stack */ + STACK_SIZE, STACK_SIZE * 2) != 0) { +#else STACK_SIZE, STACK_SIZE) != 0) { - uk_pr_err("Failed to allocate thread stack: Not enough memory\n"); +#endif /* CONFIG_LIBFLEXOS_ENABLE_DSS */ + flexos_gate(libc, uk_pr_err, FLEXOS_SHARED_LITERAL( + "Failed to allocate thread stack: Not enough memory\n")); return NULL; } +#if CONFIG_LIBFLEXOS_GATE_INTELPKU_SHARED_STACKS + flexos_intelpku_mem_set_key(stack, STACK_SIZE / __PAGE_SIZE, 15); +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + return stack; } @@ -161,13 +179,62 @@ static void *uk_thread_tls_create(struct uk_alloc *allocator) if (uk_posix_memalign(allocator, &tls, ukarch_tls_area_align(), ukarch_tls_area_size()) != 0) { - uk_pr_err("Failed to allocate thread TLS area\n"); + flexos_gate(libc, uk_pr_err, "Failed to allocate thread TLS area\n"); return NULL; } ukarch_tls_area_copy(tls); return tls; } +#if CONFIG_LIBFLEXOS_INTELPKU + +static inline void PROTECT_STACK(void *stack, int key) +{ + /* FIXME FLEXOS: hack to support boot time 0x0 domain */ + if (rdpkru() == 0x0) { + flexos_intelpku_mem_set_key(stack, STACK_SIZE / __PAGE_SIZE, key); + } else { + flexos_gate(libflexos-core, flexos_intelpku_mem_set_key, + stack, STACK_SIZE / __PAGE_SIZE, key); + } +} + +#if CONFIG_LIBFLEXOS_ENABLE_DSS + +/* DSSs are always shared */ +#define SHARE_DSS(stack_comp) \ + PROTECT_STACK((stack_comp) + STACK_SIZE, 15); + +#else /* CONFIG_LIBFLEXOS_ENABLE_DSS */ + +#define SHARE_DSS(stack_comp) + +#endif /* CONFIG_LIBFLEXOS_ENABLE_DSS */ + +#else /* CONFIG_LIBFLEXOS_INTELPKU */ + +#define SHARE_DSS(stack_comp) +#define PROTECT_STACK(stack_comp, key) + +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + +#if CONFIG_LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS +#define COMP0_PKUKEY 0 +#else +#define COMP0_PKUKEY 15 +#endif /* CONFIG_LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS */ + +#define ALLOC_COMP_STACK(stack_comp, key) \ +do { \ + /* allocate stack for compartment 'key' */ \ + if ((stack_comp) == NULL) \ + (stack_comp) = create_stack(sched->allocator); \ + if ((stack_comp) == NULL) \ + goto err; \ + PROTECT_STACK((stack_comp), (key)); \ + SHARE_DSS((stack_comp)); \ +} while (0) + void uk_sched_idle_init(struct uk_sched *sched, void *stack, void (*function)(void *)) { @@ -177,27 +244,90 @@ void uk_sched_idle_init(struct uk_sched *sched, UK_ASSERT(sched != NULL); - if (stack == NULL) - stack = create_stack(sched->allocator); - UK_ASSERT(stack != NULL); - if (have_tls_area() && !(tls = uk_thread_tls_create(sched->allocator))) - goto out_crash; + ALLOC_COMP_STACK(stack, COMP0_PKUKEY); + + /* __FLEXOS MARKER__: insert stack allocations here. */ + + if (have_tls_area() && !(tls = uk_thread_tls_create(flexos_shared_alloc))) + goto err; idle = &sched->idle; - rc = uk_thread_init(idle, + /* same as main, we want to call the variant that doesn't execute gates */ + rc = uk_thread_init_main(idle, &sched->plat_ctx_cbs, sched->allocator, - "Idle", stack, tls, function, NULL); + "Idle", stack /* __FLEXOS MARKER__: uk_thread_init call */, + tls, function, NULL); + if (rc) - goto out_crash; + goto err; idle->sched = sched; return; -out_crash: +err: UK_CRASH("Failed to initialize `idle` thread\n"); } +/* This copy of uk_sched_thread_create is used only for the creation of the + * main thread. At that time we are still in the allmighty 0x0 domain, + * meaning that gate wrappers are going to screw everything up. + * + * tl;dr this is uk_sched_thread_create without gates. + */ +struct uk_thread *uk_sched_thread_create_main(struct uk_sched *sched, + const uk_thread_attr_t *attr, + void (*function)(void *), void *arg) +{ + struct uk_thread *thread = NULL; + void *stack = NULL; + int rc; + void *tls = NULL; + + thread = uk_malloc(sched->allocator, sizeof(struct uk_thread)); + if (thread == NULL) { + uk_pr_err("Failed to allocate thread\n"); + goto err; + } + + ALLOC_COMP_STACK(stack, COMP0_PKUKEY); + + /* __FLEXOS MARKER__: insert stack allocations here. */ + + if (have_tls_area() && !(tls = uk_thread_tls_create(flexos_shared_alloc))) + goto err; + + rc = uk_thread_init_main(thread, + &sched->plat_ctx_cbs, sched->allocator, + "main", stack /* __FLEXOS MARKER__: uk_thread_init call */, + tls, function, arg); + if (rc) + goto err; + + rc = uk_sched_thread_add(sched, thread, attr); + if (rc) + goto err_add; + + return thread; + +err_add: + uk_thread_fini(thread, sched->allocator); +err: + if (tls) + uk_free(flexos_shared_alloc, tls); + if (stack) + uk_free(sched->allocator, stack); +#if CONFIG_LIBFLEXOS_INTELPKU + /* TODO FLEXOS free() per-compartment stacks */ + /* Clearly, not doing it now should not be much of an issue because + * this error case is unlikely to happen in our benchmarks... */ +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + if (thread) + uk_free(sched->allocator, thread); + + return NULL; +} + struct uk_thread *uk_sched_thread_create(struct uk_sched *sched, const char *name, const uk_thread_attr_t *attr, void (*function)(void *), void *arg) @@ -209,25 +339,105 @@ struct uk_thread *uk_sched_thread_create(struct uk_sched *sched, thread = uk_malloc(sched->allocator, sizeof(struct uk_thread)); if (thread == NULL) { - uk_pr_err("Failed to allocate thread\n"); + flexos_gate(libc, uk_pr_err, "Failed to allocate thread\n"); goto err; } - /* We can't use lazy allocation here - * since the trap handler runs on the stack - */ - stack = create_stack(sched->allocator); - if (stack == NULL) + ALLOC_COMP_STACK(stack, COMP0_PKUKEY); + + /* __FLEXOS MARKER__: insert stack allocations here. */ + + if (have_tls_area() && !(tls = uk_thread_tls_create(flexos_shared_alloc))) goto err; - if (have_tls_area() && !(tls = uk_thread_tls_create(sched->allocator))) + + rc = uk_thread_init(thread, + &sched->plat_ctx_cbs, sched->allocator, + name, stack /* __FLEXOS MARKER__: uk_thread_init call */, + tls, function, arg); + if (rc) + goto err; + +#if CONFIG_LIBFLEXOS_VMEPT + /* here we need to create an rpc thread in each other compartment */ + // TODO: error handling + printf("Spawning rpc threads in other compartments.\n"); + volatile struct flexos_vmept_rpc_ctrl * ctrl = flexos_vmept_rpc_ctrl(flexos_vmept_comp_id, thread->tid); + flexos_vmept_init_rpc_ctrl(ctrl); + thread->ctrl = ctrl; + for (size_t i = 0; i < FLEXOS_VMEPT_COMP_COUNT; ++i) { + if (i == flexos_vmept_comp_id) + continue; + flexos_vmept_master_rpc_call_create(flexos_vmept_comp_id, i, thread->tid); + } + printf("Spawned rpc threads in other compartments.\n"); +#endif /* CONFIG_LIBFLEXOS_VMEPT */ + + rc = uk_sched_thread_add(sched, thread, attr); + if (rc) + goto err_add; + + return thread; + +err_add: + uk_thread_fini(thread, sched->allocator); +err: + if (tls) + uk_free(flexos_shared_alloc, tls); + if (stack) + uk_free(sched->allocator, stack); +#if CONFIG_LIBFLEXOS_INTELPKU + /* TODO FLEXOS free() per-compartment stacks */ + /* Clearly, not doing it now should not be much of an issue because + * this error case is unlikely to happen in our benchmarks... */ +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + if (thread) + uk_free(sched->allocator, thread); + + return NULL; +} + +#if CONFIG_LIBFLEXOS_VMEPT +struct uk_thread *uk_sched_thread_create_rpc_only(struct uk_sched *sched, + const char *name, const uk_thread_attr_t *attr, + void (*function)(void *), void *arg, + uint8_t normal_thread_comp_id, uint8_t normal_thread_tid, + volatile struct flexos_vmept_thread_map *thread_map) +{ + volatile struct uk_thread *thread = NULL; + void *stack = NULL; + int rc; + void *tls = NULL; + + thread = uk_malloc(sched->allocator, sizeof(struct uk_thread)); + if (thread == NULL) { + flexos_gate(libc, uk_pr_err, "Failed to allocate thread\n"); + goto err; + } + + ALLOC_COMP_STACK(stack, COMP0_PKUKEY); + + /* __FLEXOS MARKER__: insert stack allocations here. */ + + if (have_tls_area() && !(tls = uk_thread_tls_create(flexos_shared_alloc))) goto err; rc = uk_thread_init(thread, &sched->plat_ctx_cbs, sched->allocator, - name, stack, tls, function, arg); + name, stack /* __FLEXOS MARKER__: uk_thread_init call */, + tls, function, arg); if (rc) goto err; + // for rpc only threads set tid to -1 + // FIXME: maybe change with proper tid allocation? + thread->tid = -1; + /* thread_map = NULL is used when creating the thread for the master rpc loop + * we don't set ctrl or the mapping for that thread */ + if (thread_map) { + thread->ctrl = flexos_vmept_rpc_ctrl(normal_thread_comp_id, normal_thread_tid); + flexos_vmept_thread_map_put(thread_map, normal_thread_comp_id, + (uint8_t) normal_thread_tid, thread); + } rc = uk_sched_thread_add(sched, thread, attr); if (rc) goto err_add; @@ -238,14 +448,21 @@ struct uk_thread *uk_sched_thread_create(struct uk_sched *sched, uk_thread_fini(thread, sched->allocator); err: if (tls) - uk_free(sched->allocator, tls); + uk_free(flexos_shared_alloc, tls); if (stack) uk_free(sched->allocator, stack); +#if CONFIG_LIBFLEXOS_INTELPKU + /* TODO FLEXOS free() per-compartment stacks */ + /* Clearly, not doing it now should not be much of an issue because + * this error case is unlikely to happen in our benchmarks... */ +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ if (thread) uk_free(sched->allocator, thread); return NULL; } +#endif /* CONFIG_LIBFLEXOS_VMEPT */ + void uk_sched_thread_destroy(struct uk_sched *sched, struct uk_thread *thread) { @@ -258,11 +475,49 @@ void uk_sched_thread_destroy(struct uk_sched *sched, struct uk_thread *thread) UK_TAILQ_REMOVE(&sched->exited_threads, thread, thread_list); uk_thread_fini(thread, sched->allocator); uk_free(sched->allocator, thread->stack); +#if CONFIG_LIBFLEXOS_INTELPKU + /* TODO FLEXOS free() per-compartment stacks */ +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ +#if CONFIG_LIBFLEXOS_VMEPT + /* here we need to detroy the associated rpc thread in each other compartment */ + // TODO: error handling + for (size_t i = 0; i < FLEXOS_VMEPT_COMP_COUNT; ++i) { + if (i == flexos_vmept_comp_id) + continue; + flexos_vmept_master_rpc_call_destroy(flexos_vmept_comp_id, i, thread->tid); + } +#endif /* CONFIG_LIBFLEXOS_VMEPT */ if (thread->tls) - uk_free(sched->allocator, thread->tls); + uk_free(flexos_shared_alloc, thread->tls); uk_free(sched->allocator, thread); } + +#if CONFIG_LIBFLEXOS_VMEPT +void uk_sched_thread_destroy_rpc_only(struct uk_sched *sched, struct uk_thread *thread, + uint8_t normal_thread_comp_id, uint8_t normal_thread_tid, + volatile struct flexos_vmept_thread_map *thread_map) +{ + UK_ASSERT(sched != NULL); + UK_ASSERT(thread != NULL); + UK_ASSERT(thread->stack != NULL); + UK_ASSERT(!have_tls_area() || thread->tls != NULL); + UK_ASSERT(is_exited(thread)); + + UK_TAILQ_REMOVE(&sched->exited_threads, thread, thread_list); + uk_thread_fini(thread, sched->allocator); + uk_free(sched->allocator, thread->stack); + + if (thread->tls) + uk_free(flexos_shared_alloc, thread->tls); + uk_free(sched->allocator, thread); + if (thread_map) { + flexos_vmept_thread_map_put(thread_map, normal_thread_comp_id, + (uint8_t) normal_thread_tid, NULL); + } +} +#endif /* CONFIG_LIBFLEXOS_VMEPT */ + void uk_sched_thread_kill(struct uk_sched *sched, struct uk_thread *thread) { uk_sched_thread_remove(sched, thread); diff --git a/lib/uksched/thread.c b/lib/uksched/thread.c index 609a4ce484..9fdacdfa48 100644 --- a/lib/uksched/thread.c +++ b/lib/uksched/thread.c @@ -29,6 +29,7 @@ * Thread definitions * Ported from Mini-OS */ +#include #include #include #include @@ -36,11 +37,27 @@ #include #include #include +#include #include #include #include #include +#if CONFIG_LIBUKSIGNAL +#include +#endif + +#if CONFIG_LIBFLEXOS_INTELPKU +#include +struct uk_waitq_entry wq_entries[32] __attribute__((flexos_whitelist)); + +static int uk_num_threads = 0; +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + +#if CONFIG_LIBFLEXOS_VMEPT +static int uk_num_threads = 0; +#endif /* CONFIG_LIBFLEXOS_VMEPT */ + /* Pushes the specified value onto the stack of the specified thread */ static void stack_push(unsigned long *sp, unsigned long value) { @@ -74,6 +91,7 @@ static void reent_init(struct _reent *reent) #endif } +__attribute__((libc_callback)) struct _reent *__getreent(void) { struct _reent *_reent; @@ -82,16 +100,127 @@ struct _reent *__getreent(void) if (!s || !uk_sched_started(s)) _reent = _impure_ptr; else - _reent = &uk_thread_current()->reent; + _reent = uk_thread_current()->reent; return _reent; } #endif /* CONFIG_LIBNEWLIBC */ +#if CONFIG_LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS +#define SET_TSB(sp_comp, key) \ +do { \ + tsb_comp ## key[thread->tid].sp = (sp_comp); \ + tsb_comp ## key[thread->tid].bp = (sp_comp); \ +} while (0) +#else /* CONFIG_LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS */ +/* do nothing without PKU private stacks */ +#define SET_TSB(sp_comp, key) +#endif /* CONFIG_LIBFLEXOS_GATE_INTELPKU_PRIVATE_STACKS */ + +#if CONFIG_LIBFLEXOS_INTELPKU +#define SET_TID_PAGE(stack_comp) \ +do { \ + *((unsigned long *) round_pgup((unsigned long) \ + (stack_comp + 1))) = thread->tid; \ +} while (0) +#else +#define SET_TID_PAGE(sp) +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + +#define SETUP_STACK(stack_comp, key, a1, a2, sp) \ +do { \ + if ((stack_comp)) { \ + /* Save pointer to the thread on the stack to get */ \ + /* current thread. */ \ + /* FIXME FLEXOS PKU in the future this page should be */\ + /* protected with the permissions of the scheduler */ \ + /* so that it can't be subverted by a malicious */ \ + /* compartment */ \ + *((unsigned long *) (stack_comp)) = \ + (unsigned long) thread; \ + SET_TID_PAGE(stack_comp); \ + init_sp(&sp, (stack_comp), a1, a2); \ + } \ + \ + SET_TSB(sp, key); \ +} while (0) + +/* This is a copy of uk_thread_init without manipulations of the PKRU, + * for the exact same reasons that we made a copy of uk_sched_thread_create. + */ +int uk_thread_init_main(struct uk_thread *thread, + struct ukplat_ctx_callbacks *cbs, struct uk_alloc *allocator, + const char *name, void *stack /* __FLEXOS MARKER__: uk_thread_init decl */, + void *tls, void (*function)(void *), void *arg) +{ + unsigned long sp; + + UK_ASSERT(thread != NULL); + UK_ASSERT(stack != NULL); + UK_ASSERT(!have_tls_area() || tls != NULL); + +#if CONFIG_LIBFLEXOS_INTELPKU + thread->tid = uk_num_threads++; +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ +#if CONFIG_LIBFLEXOS_VMEPT + // FIXME: do this properly, this is terrible + thread->tid = uk_num_threads++; + thread->ctrl = NULL; +#endif /* CONFIG_LIBFLEXOS_VMEPT */ + SETUP_STACK(stack, 0, function, arg, sp); + + /* The toolchain is going to insert a number of calls to + * SETUP_STACK depending on the number of compartments, e.g., + * SETUP_STACK(stack_comp1, 1, NULL, NULL); */ + /* __FLEXOS MARKER__: insert stack installations here. */ + + /* Call platform specific setup. */ + thread->ctx = ukplat_thread_ctx_create(cbs, allocator, sp, + (uintptr_t)ukarch_tls_pointer(tls)); + if (thread->ctx == NULL) + return -1; + + thread->name = name; + thread->stack = stack; + + thread->tls = tls; + + /* Not runnable, not exited, not sleeping */ + thread->flags = 0; + thread->wakeup_time = 0LL; + thread->detached = false; + uk_waitq_init(&thread->waiting_threads); + thread->sched = NULL; + thread->prv = NULL; + + // FIXME + //thread->reent = flexos_malloc_whitelist(sizeof(struct _reent), libc); + thread->reent = malloc(sizeof(struct _reent)); + if (!thread->reent) { + flexos_gate(libukdebug, uk_pr_crit, FLEXOS_SHARED_LITERAL( + "Could not allocate reent!")); + return -1; + } + + +#ifdef CONFIG_LIBNEWLIBC + reent_init(thread->reent); +#endif +#if CONFIG_LIBUKSIGNAL + thread->signals_container = flexos_malloc_whitelist(sizeof(struct uk_thread_sig), libuksched); + uk_thread_sig_init(thread->signals_container); +#endif + + uk_pr_info("Thread \"%s\": pointer: %p, stack: %p - %p, tls: %p\n", + name, thread, stack, (void *) ((uintptr_t) stack + STACK_SIZE), tls); + + return 0; +} + int uk_thread_init(struct uk_thread *thread, struct ukplat_ctx_callbacks *cbs, struct uk_alloc *allocator, - const char *name, void *stack, void *tls, - void (*function)(void *), void *arg) + const char *name, void *stack /* __FLEXOS MARKER__: uk_thread_init decl */, + void *tls, void (*function)(void *), void *arg) { unsigned long sp; @@ -99,19 +228,41 @@ int uk_thread_init(struct uk_thread *thread, UK_ASSERT(stack != NULL); UK_ASSERT(!have_tls_area() || tls != NULL); - /* Save pointer to the thread on the stack to get current thread */ - *((unsigned long *) stack) = (unsigned long) thread; +#if CONFIG_LIBFLEXOS_INTELPKU + thread->tid = uk_num_threads++; + + /* FIXME FLEXOS these wrpkru()s are vulnerable to ROP, we have to + * use properly checked doors. */ + unsigned long pkru = rdpkru(); + wrpkru(0x0); +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ +#if CONFIG_LIBFLEXOS_VMEPT + // FIXME: do this properly, this is terrible + thread->tid = uk_num_threads++; + thread->ctrl = NULL; +#endif /* CONFIG_LIBFLEXOS_VMEPT */ - init_sp(&sp, stack, function, arg); + SETUP_STACK(stack, 0, function, arg, sp); + + /* The toolchain is going to insert a number of calls to + * SETUP_STACK depending on the number of compartments, e.g., + * SETUP_STACK(stack_comp1, 1, NULL, NULL); */ + /* __FLEXOS MARKER__: insert stack installations here. */ /* Call platform specific setup. */ thread->ctx = ukplat_thread_ctx_create(cbs, allocator, sp, (uintptr_t)ukarch_tls_pointer(tls)); + +#if CONFIG_LIBFLEXOS_INTELPKU + wrpkru(pkru); +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + if (thread->ctx == NULL) return -1; thread->name = name; thread->stack = stack; + thread->tls = tls; /* Not runnable, not exited, not sleeping */ @@ -122,15 +273,34 @@ int uk_thread_init(struct uk_thread *thread, thread->sched = NULL; thread->prv = NULL; + // FIXME + //thread->reent = flexos_malloc_whitelist(sizeof(struct _reent), libc); + thread->reent = malloc(sizeof(struct _reent)); + if (!thread->reent) { + flexos_gate(libukdebug, uk_pr_crit, FLEXOS_SHARED_LITERAL( + "Could not allocate reent!")); + return -1; + } + #ifdef CONFIG_LIBNEWLIBC - reent_init(&thread->reent); + reent_init(thread->reent); #endif #if CONFIG_LIBUKSIGNAL - uk_thread_sig_init(&thread->signals_container); +#if CONFIG_LIBFLEXOS_INTELPKU + /* FIXME FLEXOS another hack... */ + pkru = rdpkru(); + wrpkru(0x0); +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + thread->signals_container = flexos_malloc_whitelist(sizeof(struct uk_thread_sig), libuksched); + uk_thread_sig_init(thread->signals_container); +#if CONFIG_LIBFLEXOS_INTELPKU + wrpkru(pkru); +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ #endif - uk_pr_info("Thread \"%s\": pointer: %p, stack: %p, tls: %p\n", - name, thread, thread->stack, thread->tls); + flexos_gate(libukdebug, uk_pr_info, FLEXOS_SHARED_LITERAL( + "Thread \"%s\": pointer: %p, stack: %p - %p, tls: %p\n"), + name, thread, stack, (void *) ((uintptr_t) stack + STACK_SIZE), tls); return 0; } @@ -139,11 +309,16 @@ void uk_thread_fini(struct uk_thread *thread, struct uk_alloc *allocator) { UK_ASSERT(thread != NULL); #if CONFIG_LIBUKSIGNAL - uk_thread_sig_uninit(&thread->signals_container); + uk_thread_sig_uninit(thread->signals_container); #endif ukplat_thread_ctx_destroy(allocator, thread->ctx); } +void uk_thread_inherit_signal_mask(struct uk_thread *thread) +{ + thread->signals_container->mask = uk_thread_current()->signals_container->mask; +} + static void uk_thread_block_until(struct uk_thread *thread, __snsec until) { unsigned long flags; @@ -151,7 +326,7 @@ static void uk_thread_block_until(struct uk_thread *thread, __snsec until) flags = ukplat_lcpu_save_irqf(); thread->wakeup_time = until; clear_runnable(thread); - uk_sched_thread_blocked(thread->sched, thread); + uk_sched_thread_blocked(thread); ukplat_lcpu_restore_irqf(flags); } @@ -201,7 +376,31 @@ int uk_thread_wait(struct uk_thread *thread) if (thread->detached) return -EINVAL; - uk_waitq_wait_event(&thread->waiting_threads, is_exited(thread)); + do { + struct uk_thread *__current; + unsigned long flags; + DEFINE_WAIT(__wait); + if (is_exited(thread)) + break; + for (;;) { + __current = uk_thread_current(); + /* protect the list */ + flags = ukplat_lcpu_save_irqf(); + uk_waitq_add(&thread->waiting_threads, __wait); + __current->wakeup_time = 0; + clear_runnable(__current); + uk_sched_thread_blocked(__current); + ukplat_lcpu_restore_irqf(flags); + if (is_exited(thread)) + break; + uk_sched_yield(); + } + flags = ukplat_lcpu_save_irqf(); + /* need to wake up */ + uk_thread_wake(__current); + uk_waitq_remove(&thread->waiting_threads, __wait); + ukplat_lcpu_restore_irqf(flags); + } while (0); thread->detached = true; diff --git a/lib/ukschedcoop/schedcoop.c b/lib/ukschedcoop/schedcoop.c index 5e7dbe8963..fcc6d596bd 100644 --- a/lib/ukschedcoop/schedcoop.c +++ b/lib/ukschedcoop/schedcoop.c @@ -29,6 +29,7 @@ * The scheduler is non-preemptive (cooperative), and schedules according * to Round Robin algorithm. */ +#include #include #include #include @@ -47,7 +48,7 @@ static void print_runqueue(struct uk_sched *s) struct uk_thread *th; UK_TAILQ_FOREACH(th, &prv->thread_list, thread_list) { - uk_pr_debug(" Thread \"%s\", runnable=%d\n", + flexos_gate(libc, uk_pr_debug, " Thread \"%s\", runnable=%d\n", th->name, is_runnable(th)); } } @@ -76,7 +77,8 @@ static void schedcoop_schedule(struct uk_sched *s) * find the time when the next timeout expires, else use * 10 seconds. */ - __snsec now = ukplat_monotonic_clock(); + __snsec now; + flexos_gate_r(libukplat, now, ukplat_monotonic_clock); __snsec min_wakeup_time = now + ukarch_time_sec_to_nsec(10); /* wake some sleeping threads */ @@ -114,7 +116,8 @@ static void schedcoop_schedule(struct uk_sched *s) /* block until the next timeout expires, or for 10 secs, * whichever comes first */ - ukplat_lcpu_halt_to(min_wakeup_time); + + flexos_gate(libukplat, ukplat_lcpu_halt_to, min_wakeup_time); /* handle pending events if any */ ukplat_lcpu_irqs_handle_pending(); @@ -175,7 +178,7 @@ static void schedcoop_thread_remove(struct uk_sched *s, struct uk_thread *t) /* Schedule only if current thread is exiting */ if (t == uk_thread_current()) { schedcoop_schedule(s); - uk_pr_warn("schedule() returned! Trying again\n"); + flexos_gate(libc, uk_pr_warn, "schedule() returned! Trying again\n"); } } @@ -205,7 +208,9 @@ static void schedcoop_thread_woken(struct uk_sched *s, struct uk_thread *t) } } -static void idle_thread_fn(void *unused __unused) +/* FIXME FLEXOS this is not really a libc callback */ +__attribute__((libc_callback)) +static void _idle_thread_fn(void *unused) { struct uk_thread *current = uk_thread_current(); struct uk_sched *s = current->sched; @@ -219,6 +224,14 @@ static void idle_thread_fn(void *unused __unused) } } +static void idle_thread_fn(void *unused) { +#if CONFIG_LIBFLEXOS_INTELPKU + /* FIXME FLEXOS: can we do this differently? */ + wrpkru(0x3ffffffc); +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + _idle_thread_fn(unused); +} + static void schedcoop_yield(struct uk_sched *s) { schedcoop_schedule(s); @@ -226,6 +239,8 @@ static void schedcoop_yield(struct uk_sched *s) struct uk_sched *uk_schedcoop_init(struct uk_alloc *a) { + /* IMPORTANT NOTE in the case of PKU: this is running in protection domain + 0x0 since we didn't start the main thread yet! */ struct schedcoop_private *prv = NULL; struct uk_sched *sched = NULL; diff --git a/lib/uksignal/include/uk/uk_signal.h b/lib/uksignal/include/uk/uk_signal.h index 901a56cc57..f19b7e0009 100644 --- a/lib/uksignal/include/uk/uk_signal.h +++ b/lib/uksignal/include/uk/uk_signal.h @@ -38,6 +38,8 @@ #define __UK_UK_SIGNAL_H__ #include +#include +#include #include #include @@ -45,7 +47,23 @@ extern "C" { #endif +#if CONFIG_LIBFLEXOS_INTELPKU +static struct uk_thread_sig *_uk_crr_thread_sig_container(void); +static struct uk_thread_sig * __attribute__((section(".text_shared"))) + uk_crr_thread_sig_container(void); + +static inline struct uk_thread_sig *_UK_TH_SIG_IMPL(void) +{ + if (rdpkru() == 0x0) + return _uk_crr_thread_sig_container(); + else + return uk_crr_thread_sig_container(); +} + +#define _UK_TH_SIG _UK_TH_SIG_IMPL() +#else #define _UK_TH_SIG uk_crr_thread_sig_container() +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ struct uk_thread; @@ -140,8 +158,6 @@ static inline void uk_remove_proc_signal(int sig) uk_sigdelset(&uk_proc_sig.pending, sig); } -/* maybe move to sched */ -struct uk_thread_sig *uk_crr_thread_sig_container(void); void uk_sig_init_siginfo(siginfo_t *siginfo, int sig); /* returns the uk_signal for sig if it is pending on thread */ diff --git a/lib/uksignal/uk_signal.c b/lib/uksignal/uk_signal.c index cfaea33e69..e16055648d 100644 --- a/lib/uksignal/uk_signal.c +++ b/lib/uksignal/uk_signal.c @@ -211,13 +211,13 @@ int uk_sig_thread_kill(struct uk_thread *tid, int sig) return -1; } - ptr = &tid->signals_container; + ptr = tid->signals_container; /* setup siginfo */ uk_sig_init_siginfo(&siginfo, sig); /* check if we are sending this to ourself */ - if (&tid->signals_container == _UK_TH_SIG) { + if (tid->signals_container == _UK_TH_SIG) { /* if it's not masked just run it */ if (!uk_sigismember(&ptr->mask, sig)) { /* remove the signal from pending */ @@ -326,11 +326,6 @@ int uk_thread_sigmask(int how, const sigset_t *set, sigset_t *oldset) return 0; } -struct uk_thread_sig *uk_crr_thread_sig_container(void) -{ - return &(uk_thread_current()->signals_container); -} - void uk_sig_init_siginfo(siginfo_t *siginfo, int sig) { siginfo->si_signo = sig; diff --git a/lib/uksp/Makefile.uk b/lib/uksp/Makefile.uk index 4e59123194..6c391c9dda 100644 --- a/lib/uksp/Makefile.uk +++ b/lib/uksp/Makefile.uk @@ -3,7 +3,3 @@ $(eval $(call addlib_s,libuksp,$(CONFIG_LIBUKSP))) CINCLUDES-y += -I$(LIBUKSP_BASE)/include LIBUKSP_SRCS-y += $(LIBUKSP_BASE)/ssp.c - -COMPFLAGS-$(CONFIG_STACKPROTECTOR_REGULAR) += -fstack-protector -mstack-protector-guard=global -COMPFLAGS-$(CONFIG_STACKPROTECTOR_STRONG) += -fstack-protector-strong -mstack-protector-guard=global -COMPFLAGS-$(CONFIG_STACKPROTECTOR_ALL) += -fstack-protector-all -mstack-protector-guard=global diff --git a/lib/uksp/ssp.c b/lib/uksp/ssp.c index 7cd9382389..7857a28004 100644 --- a/lib/uksp/ssp.c +++ b/lib/uksp/ssp.c @@ -30,6 +30,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include #include #include #include @@ -37,9 +38,9 @@ #include #ifdef CONFIG_LIBUKSP_VALUE_USECONSTANT -const unsigned long __stack_chk_guard = CONFIG_LIBUKSP_VALUE_CONSTANT; +unsigned long __stack_chk_guard __attribute__((flexos_whitelist)) = CONFIG_LIBUKSP_VALUE_CONSTANT; #else -const unsigned long __stack_chk_guard = 0xFF0A0D00; /* terminator canary */ +unsigned long __stack_chk_guard __attribute__((flexos_whitelist)) = 0xFF0A0D00; /* terminator canary */ #endif __attribute__((noreturn)) diff --git a/lib/uktime/time.c b/lib/uktime/time.c index de42046970..1daf578b1c 100644 --- a/lib/uktime/time.c +++ b/lib/uktime/time.c @@ -33,6 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include #include #include #include @@ -50,6 +51,41 @@ #endif #include +/* FIXME FLEXOS: Coccinelle doesn't want to do a gate transformation at several + * places in this file because of the UK_SYSCALL_DEFINE()... this is not + * recognized as a function and Coccinelle aborts. Get rid of these of these + * manual wrappers at some point or change it to something cleaner. */ + +static inline __nsec ukplat_monotonic_clock_wrapper(void) +{ + __nsec now; + flexos_gate_r(libukplat, now, ukplat_monotonic_clock); + return now; +} + +static inline __nsec ukplat_wall_clock_wrapper(void) +{ + __nsec now; + flexos_gate_r(libukplat, now, ukplat_wall_clock); + return now; +} + +void ukplat_lcpu_halt_to(__snsec until); +static inline void ukplat_lcpu_halt_to_wrapper(__snsec until) +{ + flexos_gate(libukplat, ukplat_lcpu_halt_to, until); +} + +static inline void uk_sched_thread_sleep_wrapper(__nsec nsec) +{ + flexos_gate(libuksched, uk_sched_thread_sleep, nsec); +} + +#define ukplat_monotonic_clock(...) ukplat_monotonic_clock_wrapper() +#define ukplat_wall_clock(...) ukplat_wall_clock_wrapper() +#define ukplat_lcpu_halt_to(until) ukplat_lcpu_halt_to_wrapper(until) +#define uk_sched_thread_sleep(nsec) uk_sched_thread_sleep_wrapper(nsec) + int utime(const char *filename __unused, const struct utimbuf *times __unused) { return 0; @@ -192,3 +228,8 @@ int setitimer(int which __unused, const struct itimerval *new_value __unused, WARN_STUBBED(); return 0; } + +#undef ukplat_monotonic_clock +#undef ukplat_wall_clock +#undef ukplat_lcpu_halt_to +#undef uk_sched_thread_sleep diff --git a/lib/vfscore/Config.uk b/lib/vfscore/Config.uk index 212575c0be..ef969bf694 100644 --- a/lib/vfscore/Config.uk +++ b/lib/vfscore/Config.uk @@ -9,6 +9,18 @@ config LIBVFSCORE if LIBVFSCORE menu "vfscore: Configuration" +config LIBCPIO + bool "cpio: general cpio archive extraction" + default n + +config LIBINITRAMFS + bool "initramfs: extract the given cpio file to /" + default n + select LIBRAMFS + select LIBUKLIBPARAM + select LIBVFSCORE_AUTOMOUNT_ROOTFS + select LIBCPIO + config LIBVFSCORE_PIPE_SIZE_ORDER int "Pipe size order" default 16 diff --git a/lib/vfscore/dentry.c b/lib/vfscore/dentry.c index 43ee2fefa9..a3b2a9d64f 100644 --- a/lib/vfscore/dentry.c +++ b/lib/vfscore/dentry.c @@ -34,6 +34,7 @@ #include #include +#include #include #include @@ -45,7 +46,7 @@ static struct uk_hlist_head dentry_hash_table[DENTRY_BUCKETS]; static UK_HLIST_HEAD(fake); -static struct uk_mutex dentry_hash_lock = UK_MUTEX_INITIALIZER(dentry_hash_lock); +static struct uk_mutex dentry_hash_lock __attribute__((flexos_whitelist)) = UK_MUTEX_INITIALIZER(dentry_hash_lock); /* * Get the hash value from the mount point and path name. @@ -69,13 +70,19 @@ struct dentry * dentry_alloc(struct dentry *parent_dp, struct vnode *vp, const char *path) { struct mount *mp = vp->v_mount; - struct dentry *dp = (struct dentry*)calloc(sizeof(*dp), 1); + struct dentry *dp = (struct dentry*)flexos_calloc_whitelist(sizeof(*dp), 1, libuksched); if (!dp) { return NULL; } - dp->d_path = strdup(path); + //flexos_gate_r(libc, dp->d_path, strdup, path); + size_t size = strlen(path) + 1; + dp->d_path = malloc(size); + if (dp->d_path) { + memcpy(dp->d_path, path, size); + } + if (!dp->d_path) { free(dp); return NULL; @@ -91,19 +98,19 @@ dentry_alloc(struct dentry *parent_dp, struct vnode *vp, const char *path) if (parent_dp) { dref(parent_dp); - uk_mutex_lock(&parent_dp->d_lock); + flexos_gate(uklock, uk_mutex_lock, &parent_dp->d_lock); // Insert dp into its parent's children list. uk_list_add(&dp->d_child_link, &parent_dp->d_child_list); - uk_mutex_unlock(&parent_dp->d_lock); + flexos_gate(uklock, uk_mutex_unlock, &parent_dp->d_lock); } dp->d_parent = parent_dp; vn_add_name(vp, dp); - uk_mutex_lock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_lock, &dentry_hash_lock); uk_hlist_add_head(&dp->d_link, &dentry_hash_table[dentry_hash(mp, path)]); - uk_mutex_unlock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_unlock, &dentry_hash_lock); return dp; }; @@ -112,15 +119,15 @@ dentry_lookup(struct mount *mp, char *path) { struct dentry *dp; - uk_mutex_lock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_lock, &dentry_hash_lock); uk_hlist_for_each_entry(dp, &dentry_hash_table[dentry_hash(mp, path)], d_link) { if (dp->d_mount == mp && !strncmp(dp->d_path, path, PATH_MAX)) { dp->d_refcnt++; - uk_mutex_unlock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_unlock, &dentry_hash_lock); return dp; } } - uk_mutex_unlock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_unlock, &dentry_hash_lock); return NULL; /* not found */ } @@ -128,13 +135,13 @@ static void dentry_children_remove(struct dentry *dp) { struct dentry *entry = NULL; - uk_mutex_lock(&dp->d_lock); + flexos_gate(uklock, uk_mutex_lock, &dp->d_lock); uk_list_for_each_entry(entry, &dp->d_child_list, d_child_link) { UK_ASSERT(entry); UK_ASSERT(entry->d_refcnt > 0); uk_hlist_del(&entry->d_link); } - uk_mutex_unlock(&dp->d_lock); + flexos_gate(uklock, uk_mutex_unlock, &dp->d_lock); } @@ -151,22 +158,22 @@ dentry_move(struct dentry *dp, struct dentry *parent_dp, char *path) } if (old_pdp) { - uk_mutex_lock(&old_pdp->d_lock); + flexos_gate(uklock, uk_mutex_lock, &old_pdp->d_lock); // Remove dp from its old parent's children list. uk_list_del(&dp->d_child_link); - uk_mutex_unlock(&old_pdp->d_lock); + flexos_gate(uklock, uk_mutex_unlock, &old_pdp->d_lock); } if (parent_dp) { dref(parent_dp); - uk_mutex_lock(&parent_dp->d_lock); + flexos_gate(uklock, uk_mutex_lock, &parent_dp->d_lock); // Insert dp into its new parent's children list. uk_list_add(&dp->d_child_link, &parent_dp->d_child_list); - uk_mutex_unlock(&parent_dp->d_lock); + flexos_gate(uklock, uk_mutex_unlock, &parent_dp->d_lock); } - uk_mutex_lock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_lock, &dentry_hash_lock); // Remove all dp's child dentries from the hashtable. dentry_children_remove(dp); // Remove dp with outdated hash info from the hashtable. @@ -178,7 +185,7 @@ dentry_move(struct dentry *dp, struct dentry *parent_dp, char *path) // Insert dp updated hash info into the hashtable. uk_hlist_add_head(&dp->d_link, &dentry_hash_table[dentry_hash(dp->d_mount, path)]); - uk_mutex_unlock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_unlock, &dentry_hash_lock); if (old_pdp) { drele(old_pdp); @@ -191,11 +198,11 @@ dentry_move(struct dentry *dp, struct dentry *parent_dp, char *path) void dentry_remove(struct dentry *dp) { - uk_mutex_lock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_lock, &dentry_hash_lock); uk_hlist_del(&dp->d_link); /* put it on a fake list for drele() to work*/ uk_hlist_add_head(&dp->d_link, &fake); - uk_mutex_unlock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_unlock, &dentry_hash_lock); } void @@ -204,9 +211,9 @@ dref(struct dentry *dp) UK_ASSERT(dp); UK_ASSERT(dp->d_refcnt > 0); - uk_mutex_lock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_lock, &dentry_hash_lock); dp->d_refcnt++; - uk_mutex_unlock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_unlock, &dentry_hash_lock); } void @@ -215,21 +222,21 @@ drele(struct dentry *dp) UK_ASSERT(dp); UK_ASSERT(dp->d_refcnt > 0); - uk_mutex_lock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_lock, &dentry_hash_lock); if (--dp->d_refcnt) { - uk_mutex_unlock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_unlock, &dentry_hash_lock); return; } uk_hlist_del(&dp->d_link); vn_del_name(dp->d_vnode, dp); - uk_mutex_unlock(&dentry_hash_lock); + flexos_gate(uklock, uk_mutex_unlock, &dentry_hash_lock); if (dp->d_parent) { - uk_mutex_lock(&dp->d_parent->d_lock); + flexos_gate(uklock, uk_mutex_lock, &dp->d_parent->d_lock); // Remove dp from its parent's children list. uk_list_del(&dp->d_child_link); - uk_mutex_unlock(&dp->d_parent->d_lock); + flexos_gate(uklock, uk_mutex_unlock, &dp->d_parent->d_lock); drele(dp->d_parent); } @@ -237,7 +244,7 @@ drele(struct dentry *dp) vrele(dp->d_vnode); free(dp->d_path); - free(dp); + flexos_free_whitelist(dp, libuksched); } void diff --git a/lib/vfscore/extra.ld b/lib/vfscore/extra.ld index 173b23503f..3bd74e4e13 100644 --- a/lib/vfscore/extra.ld +++ b/lib/vfscore/extra.ld @@ -1,8 +1,10 @@ SECTIONS { .uk_fs_list : { + . = ALIGN(0x1000); PROVIDE(uk_fslist_start = .); KEEP (*(.uk_fs_list)) + . = ALIGN(0x1000); PROVIDE(uk_fslist_end = .); } } diff --git a/lib/vfscore/file.c b/lib/vfscore/file.c index c5c4d3f744..0583117add 100644 --- a/lib/vfscore/file.c +++ b/lib/vfscore/file.c @@ -31,6 +31,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include #include #include #include @@ -58,7 +59,7 @@ int fdrop(struct vfscore_file *fp) if (vfs_close(fp) != 0) drele(fp->f_dentry); - free(fp); + flexos_free_whitelist(fp); return 1; } diff --git a/lib/vfscore/include/vfscore/file.h b/lib/vfscore/include/vfscore/file.h index 42b7e00f32..979c5b9be3 100644 --- a/lib/vfscore/include/vfscore/file.h +++ b/lib/vfscore/include/vfscore/file.h @@ -37,6 +37,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -60,8 +61,15 @@ struct vfscore_file { struct uk_mutex f_lock; }; -#define FD_LOCK(fp) uk_mutex_lock(&(fp->f_lock)) -#define FD_UNLOCK(fp) uk_mutex_unlock(&(fp->f_lock)) +static inline void FD_LOCK(struct vfscore_file *fp) +{ + flexos_gate(uklock, uk_mutex_lock, &(fp->f_lock)); +} + +static inline void FD_UNLOCK(struct vfscore_file *fp) +{ + flexos_gate(uklock, uk_mutex_unlock, &(fp->f_lock)); +} int vfscore_alloc_fd(void); int vfscore_reserve_fd(int fd); diff --git a/lib/vfscore/main.c b/lib/vfscore/main.c index 9af0c121fe..b0b5d8223f 100644 --- a/lib/vfscore/main.c +++ b/lib/vfscore/main.c @@ -32,6 +32,7 @@ #define _GNU_SOURCE +#include #include #include #include @@ -56,6 +57,18 @@ int vfs_debug = VFSDB_FLAGS; #endif +static inline +void _VFSCORE_SET_ERRNO(int errcode) +{ + errno = errcode; +} + +static inline +void VFSCORE_SET_ERRNO(int errcode) +{ + flexos_gate(libc, _VFSCORE_SET_ERRNO, errcode); +} + /* This macro is for defining an alias of the 64bit version of a * syscall to the regular one. It seams we can make the logic which is * choosing the right call simpler then in common libc. @@ -69,7 +82,7 @@ static mode_t global_umask = S_IWGRP | S_IWOTH; static inline int libc_error(int err) { - errno = err; + VFSCORE_SET_ERRNO(err); return -1; } @@ -146,7 +159,7 @@ int open(const char *pathname, int flags, ...) out_fput: fdrop(fp); out_errno: - errno = error; + VFSCORE_SET_ERRNO(error); trace_vfs_open_err(error); return -1; } @@ -170,7 +183,7 @@ int openat(int dirfd, const char *pathname, int flags, ...) struct vfscore_file *fp; int error = fget(dirfd, &fp); if (error) { - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -673,7 +686,7 @@ int ioctl(int fd, unsigned long int request, ...) out_errno: trace_vfs_ioctl_err(error); - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -735,7 +748,7 @@ int __fxstat(int ver __unused, int fd, struct stat *st) out_errno: trace_vfs_fstat_err(error); - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -763,7 +776,7 @@ int __fxstatat(int ver __unused, int dirfd, const char *pathname, struct stat *s struct vfscore_file *fp; int error = fget(dirfd, &fp); if (error) { - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -841,7 +854,7 @@ DIR *opendir(const char *path) dir = malloc(sizeof(*dir)); if (!dir) { - errno = ENOMEM; + VFSCORE_SET_ERRNO(ENOMEM); goto out_err; } @@ -853,7 +866,7 @@ DIR *opendir(const char *path) goto out_free_dir; if (!S_ISDIR(st.st_mode)) { - errno = ENOTDIR; + VFSCORE_SET_ERRNO(ENOTDIR); goto out_free_dir; } @@ -873,12 +886,12 @@ DIR *fdopendir(int fd) return NULL; } if (!S_ISDIR(st.st_mode)) { - errno = ENOTDIR; + VFSCORE_SET_ERRNO(ENOTDIR); return NULL; } dir = malloc(sizeof(*dir)); if (!dir) { - errno = ENOMEM; + VFSCORE_SET_ERRNO(ENOMEM); return NULL; } dir->fd = fd; @@ -941,7 +954,7 @@ int scandir(const char *path, struct dirent ***res, free(names); return -1; } - errno = old_errno; + VFSCORE_SET_ERRNO(old_errno); if (cmp) qsort(names, cnt, sizeof *names, (int (*)(const void *, const void *))cmp); @@ -958,7 +971,7 @@ struct dirent *readdir(DIR *dir) if (ret) return ERR2PTR(-ret); - errno = 0; + VFSCORE_SET_ERRNO(0); return result; } @@ -1068,7 +1081,7 @@ mkdir(const char *pathname, mode_t mode) return 0; out_errno: trace_vfs_mkdir_err(error); - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -1307,14 +1320,14 @@ int symlink(const char *oldpath, const char *newpath) error = ENOENT; if (oldpath == NULL || newpath == NULL) { - errno = ENOENT; + VFSCORE_SET_ERRNO(ENOENT); trace_vfs_symlink_err(error); return (-1); } error = sys_symlink(oldpath, newpath); if (error) { - errno = error; + VFSCORE_SET_ERRNO(error); trace_vfs_symlink_err(error); return (-1); } @@ -1347,7 +1360,7 @@ int unlink(const char *pathname) return 0; out_errno: trace_vfs_unlink_err(error); - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -1375,7 +1388,7 @@ int __xstat(int ver __unused, const char *pathname, struct stat *st) out_errno: trace_vfs_stat_err(error); - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -1384,7 +1397,7 @@ LFS64(__xstat); int stat(const char *pathname, struct stat *st) { if (!pathname) { - errno = EINVAL; + VFSCORE_SET_ERRNO(EINVAL); return -1; } return __xstat(1, pathname, st); @@ -1454,7 +1467,7 @@ int __statfs(const char *pathname, struct statfs *buf) return 0; out_errno: trace_vfs_statfs_err(error); - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } __weak_alias(__statfs, statfs); @@ -1485,7 +1498,7 @@ int __fstatfs(int fd, struct statfs *buf) out_errno: trace_vfs_fstatfs_err(error); - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } __weak_alias(__fstatfs, fstatfs); @@ -1603,7 +1616,7 @@ int dup(int oldfd) fdrop(fp); out_errno: trace_vfs_dup_err(error); - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -1690,7 +1703,7 @@ int fcntl(int fd, int cmd, ...) struct vfscore_file *fp; int ret = 0, error; #if defined(FIONBIO) && defined(FIOASYNC) - int tmp; + int tmp __attribute__((flexos_whitelist)); #endif va_start(ap, cmd); @@ -1751,19 +1764,19 @@ int fcntl(int fd, int cmd, ...) fp->f_flags |= O_CLOEXEC; break; case F_SETLK: - uk_pr_warn("fcntl(F_SETLK) stubbed\n"); + flexos_gate(ukdebug, uk_pr_warn, FLEXOS_SHARED_LITERAL("fcntl(F_SETLK) stubbed\n")); break; case F_GETLK: - uk_pr_warn("fcntl(F_GETLK) stubbed\n"); + flexos_gate(ukdebug, uk_pr_warn, FLEXOS_SHARED_LITERAL("fcntl(F_GETLK) stubbed\n")); break; case F_SETLKW: - uk_pr_warn("fcntl(F_SETLKW) stubbed\n"); + flexos_gate(ukdebug, uk_pr_warn, FLEXOS_SHARED_LITERAL("fcntl(F_SETLKW) stubbed\n")); break; case F_SETOWN: - uk_pr_warn("fcntl(F_SETOWN) stubbed\n"); + flexos_gate(ukdebug, uk_pr_warn, FLEXOS_SHARED_LITERAL("fcntl(F_SETOWN) stubbed\n")); break; default: - uk_pr_err("unsupported fcntl cmd 0x%x\n", cmd); + flexos_gate(ukdebug, uk_pr_err, FLEXOS_SHARED_LITERAL("unsupported fcntl cmd 0x%x\n"), cmd); error = EINVAL; } @@ -1775,7 +1788,7 @@ int fcntl(int fd, int cmd, ...) out_errno: trace_vfs_fcntl_err(error); - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -1971,7 +1984,7 @@ UK_SYSCALL_DEFINE(ssize_t, readlink, const char *, pathname, char *, buf, size_t return size; out_errno: - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -2068,7 +2081,7 @@ UK_SYSCALL_DEFINE(int, futimesat, int, dirfd, const char*, pathname, const struc return 0; out_errno: - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -2086,7 +2099,7 @@ UK_SYSCALL_DEFINE(int, utimensat, int, dirfd, const char*, pathname, const struc if (error) { trace_vfs_utimensat_err(error); - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -2105,7 +2118,7 @@ int futimens(int fd, const struct timespec times[2]) int error = sys_futimens(fd, times); if (error) { trace_vfs_futimens_err(error); - errno = error; + VFSCORE_SET_ERRNO(error); return -1; } @@ -2347,7 +2360,7 @@ fs_noop(void) int chroot(const char *path __unused) { WARN_STUBBED(); - errno = ENOSYS; + VFSCORE_SET_ERRNO(ENOSYS); return -1; } diff --git a/lib/vfscore/mount.c b/lib/vfscore/mount.c index a6df9983e8..8b20f95500 100644 --- a/lib/vfscore/mount.c +++ b/lib/vfscore/mount.c @@ -52,6 +52,7 @@ #include #include #include +#include /* * List for VFS mount points. @@ -62,7 +63,7 @@ UK_LIST_HEAD(mount_list); /* * Global lock to access mount point. */ -static struct uk_mutex mount_lock = UK_MUTEX_INITIALIZER(mount_lock); +static struct uk_mutex mount_lock __section(".data_shared") = UK_MUTEX_INITIALIZER(mount_lock); extern const struct vfscore_fs_type *uk_fslist_start; extern const struct vfscore_fs_type *uk_fslist_end; @@ -119,7 +120,7 @@ mount(const char *dev, const char *dir, const char *fsname, unsigned long flags, struct vnode *vp = NULL; int error; - uk_pr_info("VFS: mounting %s at %s\n", fsname, dir); + // flexos_gate(ukdebug, uk_pr_info, FLEXOS_SHARED_LITERAL("VFS: mounting %s at %s\n"), fsname, dir); if (!dir || *dir == '\0') return ENOENT; @@ -148,16 +149,16 @@ mount(const char *dev, const char *dir, const char *fsname, unsigned long flags, /* static mutex sys_mount_lock; */ /* SCOPE_LOCK(sys_mount_lock); */ - uk_mutex_lock(&mount_lock); + flexos_gate(uklock, uk_mutex_lock, &mount_lock); uk_list_for_each_entry(mp, &mount_list, mnt_list) { if (!strcmp(mp->m_path, dir) || (device && mp->m_dev == device)) { error = EBUSY; /* Already mounted */ - uk_mutex_unlock(&mount_lock); + flexos_gate(uklock, uk_mutex_unlock, &mount_lock); goto err1; } } - uk_mutex_unlock(&mount_lock); + flexos_gate(uklock, uk_mutex_unlock, &mount_lock); /* * Create VFS mount entry. */ @@ -205,7 +206,7 @@ mount(const char *dev, const char *dir, const char *fsname, unsigned long flags, vp->v_flags = VROOT; vp->v_mode = S_IFDIR | S_IRUSR | S_IWUSR | S_IXUSR; - mp->m_root = dentry_alloc(NULL, vp, "/"); + mp->m_root = dentry_alloc(NULL, vp, FLEXOS_SHARED_LITERAL("/")); if (!mp->m_root) { error = ENOMEM; vput(vp); @@ -225,9 +226,9 @@ mount(const char *dev, const char *dir, const char *fsname, unsigned long flags, /* * Insert to mount list */ - uk_mutex_lock(&mount_lock); + flexos_gate(uklock, uk_mutex_lock, &mount_lock); uk_list_add_tail(&mp->mnt_list, &mount_list); - uk_mutex_unlock(&mount_lock); + flexos_gate(uklock, uk_mutex_unlock, &mount_lock); return 0; /* success */ err4: @@ -262,9 +263,9 @@ umount2(const char *path, int flags) struct mount *mp, *tmp; int error, pathlen; - uk_pr_info("VFS: unmounting %s\n", path); + flexos_gate(ukdebug, uk_pr_info, FLEXOS_SHARED_LITERAL("VFS: unmounting %s\n"), path); - uk_mutex_lock(&mount_lock); + flexos_gate(uklock, uk_mutex_lock, &mount_lock); pathlen = strlen(path); if (pathlen >= MAXPATHLEN) { @@ -305,7 +306,7 @@ umount2(const char *path, int flags) device_close(mp->m_dev); free(mp); out: - uk_mutex_unlock(&mount_lock); + flexos_gate(uklock, uk_mutex_unlock, &mount_lock); return error; } @@ -368,7 +369,7 @@ sys_pivot_root(const char *new_root, const char *put_old) void sync(void) { struct mount *mp; - uk_mutex_lock(&mount_lock); + flexos_gate(uklock, uk_mutex_lock, &mount_lock); /* Call each mounted file system. */ uk_list_for_each_entry(mp, &mount_list, mnt_list) { @@ -377,7 +378,7 @@ void sync(void) #ifdef HAVE_BUFFERS bio_sync(); #endif - uk_mutex_unlock(&mount_lock); + flexos_gate(uklock, uk_mutex_unlock, &mount_lock); } /* @@ -425,7 +426,7 @@ vfs_findroot(const char *path, struct mount **mp, char **root) return -1; /* Find mount point from nearest path */ - uk_mutex_lock(&mount_lock); + flexos_gate(uklock, uk_mutex_lock, &mount_lock); uk_list_for_each_entry(tmp, &mount_list, mnt_list) { len = count_match(path, tmp->m_path); if (len > max_len) { @@ -433,7 +434,7 @@ vfs_findroot(const char *path, struct mount **mp, char **root) m = tmp; } } - uk_mutex_unlock(&mount_lock); + flexos_gate(uklock, uk_mutex_unlock, &mount_lock); if (m == NULL) return -1; *root = (char *)(path + max_len); @@ -484,15 +485,15 @@ void vfscore_mount_dump(void) { struct mount *mp; - uk_mutex_lock(&mount_lock); + flexos_gate(uklock, uk_mutex_lock, &mount_lock); - uk_pr_debug("vfscore_mount_dump\n"); - uk_pr_debug("dev count root\n"); - uk_pr_debug("-------- ----- --------\n"); + flexos_gate(ukdebug, uk_pr_debug, FLEXOS_SHARED_LITERAL("vfscore_mount_dump\n")); + flexos_gate(ukdebug, uk_pr_debug, FLEXOS_SHARED_LITERAL("dev count root\n")); + flexos_gate(ukdebug, uk_pr_debug, FLEXOS_SHARED_LITERAL("-------- ----- --------\n")); uk_list_for_each_entry(mp, &mount_list, mnt_list) { - uk_pr_debug("%8p %5d %s\n", mp->m_dev, mp->m_count, mp->m_path); + flexos_gate(ukdebug, uk_pr_debug, FLEXOS_SHARED_LITERAL("%8p %5d %s\n"), mp->m_dev, mp->m_count, mp->m_path); } - uk_mutex_unlock(&mount_lock); + flexos_gate(uklock, uk_mutex_unlock, &mount_lock); } #endif diff --git a/lib/vfscore/pipe.c b/lib/vfscore/pipe.c index 09597dbcb4..cae72a8169 100644 --- a/lib/vfscore/pipe.c +++ b/lib/vfscore/pipe.c @@ -32,6 +32,7 @@ */ #include +#include #include #include #include @@ -87,7 +88,8 @@ static struct pipe_buf *pipe_buf_alloc(int capacity) UK_ASSERT(POWER_OF_2(capacity)); - pipe_buf = malloc(sizeof(*pipe_buf)); + /* FIXME FLEXOS this is too much shared data */ + pipe_buf = flexos_malloc_whitelist(sizeof(*pipe_buf), libuksched); if (!pipe_buf) return NULL; @@ -100,10 +102,10 @@ static struct pipe_buf *pipe_buf_alloc(int capacity) pipe_buf->capacity = capacity; pipe_buf->cons = 0; pipe_buf->prod = 0; - uk_mutex_init(&pipe_buf->rdlock); - uk_mutex_init(&pipe_buf->wrlock); - uk_waitq_init(&pipe_buf->rdwq); - uk_waitq_init(&pipe_buf->wrwq); + flexos_gate(uklock, uk_mutex_init,&pipe_buf->rdlock); + flexos_gate(uklock, uk_mutex_init,&pipe_buf->wrlock); + flexos_gate(libuksched, uk_waitq_init, &pipe_buf->rdwq); + flexos_gate(libuksched, uk_waitq_init, &pipe_buf->wrwq); return pipe_buf; } @@ -254,7 +256,7 @@ static int pipe_write(struct vnode *vnode, return -EPIPE; } - uk_mutex_lock(&pipe_buf->wrlock); + flexos_gate(uklock, uk_mutex_lock, &pipe_buf->wrlock); while (data_available && uio_idx < buf->uio_iovcnt) { struct iovec *iovec = &buf->uio_iov[uio_idx]; unsigned long off = 0; @@ -272,10 +274,33 @@ static int pipe_write(struct vnode *vnode, } else { /* Wait until data available */ while (!pipe_buf_can_write(pipe_buf)) { - uk_mutex_unlock(&pipe_buf->wrlock); - uk_waitq_wait_event(&pipe_buf->wrwq, - pipe_buf_can_write(pipe_buf)); - uk_mutex_lock(&pipe_buf->wrlock); + flexos_gate(uklock, uk_mutex_unlock, &pipe_buf->wrlock); + do { + struct uk_thread *__current; + unsigned long flags; + DEFINE_WAIT(__wait); + if (pipe_buf_can_write(pipe_buf)) + break; + for (;;) { + __current = uk_thread_current(); + /* protect the list */ + flags = ukplat_lcpu_save_irqf(); + flexos_gate(libuksched, uk_waitq_add, &pipe_buf->wrwq, __wait); + flexos_gate(libuksched, uk_thread_set_wakeup_time, __current, 0); + flexos_gate(libuksched, clear_runnable, __current); + flexos_gate(libuksched, uk_sched_thread_blocked, __current); + ukplat_lcpu_restore_irqf(flags); + if (pipe_buf_can_write(pipe_buf)) + break; + flexos_gate(libuksched, uk_sched_yield); + } + flags = ukplat_lcpu_save_irqf(); + /* need to wake up */ + flexos_gate(libuksched, uk_thread_wake, __current); + flexos_gate(libuksched, uk_waitq_remove, &pipe_buf->wrwq, __wait); + ukplat_lcpu_restore_irqf(flags); + } while (0); + flexos_gate(uklock, uk_mutex_lock, &pipe_buf->wrlock); } } @@ -286,13 +311,13 @@ static int pipe_write(struct vnode *vnode, off += written_bytes; /* wake some readers */ - uk_waitq_wake_up(&pipe_buf->rdwq); + flexos_gate(libuksched, uk_waitq_wake_up, &pipe_buf->rdwq); } } uio_idx++; } - uk_mutex_unlock(&pipe_buf->wrlock); + flexos_gate(uklock, uk_mutex_unlock, &pipe_buf->wrlock); return 0; } @@ -307,9 +332,9 @@ static int pipe_read(struct vnode *vnode, bool data_available = true; int uio_idx = 0; - uk_mutex_lock(&pipe_buf->rdlock); + flexos_gate(uklock, uk_mutex_lock, &pipe_buf->rdlock); if (nonblocking && !pipe_buf_can_read(pipe_buf)) { - uk_mutex_unlock(&pipe_buf->rdlock); + flexos_gate(uklock, uk_mutex_unlock, &pipe_buf->rdlock); return EAGAIN; } @@ -330,10 +355,33 @@ static int pipe_read(struct vnode *vnode, } else { /* Wait until data available */ while (!pipe_buf_can_read(pipe_buf)) { - uk_mutex_unlock(&pipe_buf->rdlock); - uk_waitq_wait_event(&pipe_buf->rdwq, - pipe_buf_can_read(pipe_buf)); - uk_mutex_lock(&pipe_buf->rdlock); + flexos_gate(uklock, uk_mutex_unlock, &pipe_buf->rdlock); + do { + struct uk_thread *__current; + unsigned long flags; + DEFINE_WAIT(__wait); + if (pipe_buf_can_read(pipe_buf)) + break; + for (;;) { + __current = uk_thread_current(); + /* protect the list */ + flags = ukplat_lcpu_save_irqf(); + flexos_gate(libuksched, uk_waitq_add, &pipe_buf->rdwq, __wait); + flexos_gate(libuksched, uk_thread_set_wakeup_time, __current, 0); + flexos_gate(libuksched, clear_runnable, __current); + flexos_gate(libuksched, uk_sched_thread_blocked, __current); + ukplat_lcpu_restore_irqf(flags); + if (pipe_buf_can_read(pipe_buf)) + break; + flexos_gate(libuksched, uk_sched_yield); + } + flags = ukplat_lcpu_save_irqf(); + /* need to wake up */ + flexos_gate(libuksched, uk_thread_wake, __current); + flexos_gate(libuksched, uk_waitq_remove, &pipe_buf->rdwq, __wait); + ukplat_lcpu_restore_irqf(flags); + } while (0); + flexos_gate(uklock, uk_mutex_lock, &pipe_buf->rdlock); } } @@ -344,13 +392,13 @@ static int pipe_read(struct vnode *vnode, off += read_bytes; /* wake some writers */ - uk_waitq_wake_up(&pipe_buf->wrwq); + flexos_gate(libuksched, uk_waitq_wake_up, &pipe_buf->wrwq); } } uio_idx++; } - uk_mutex_unlock(&pipe_buf->rdlock); + flexos_gate(uklock, uk_mutex_unlock, &pipe_buf->rdlock); return 0; } @@ -392,9 +440,9 @@ static int pipe_ioctl(struct vnode *vnode, switch (com) { case FIONREAD: - uk_mutex_lock(&pipe_buf->rdlock); + flexos_gate(uklock, uk_mutex_lock, &pipe_buf->rdlock); *((int *) data) = pipe_buf_get_available(pipe_buf); - uk_mutex_unlock(&pipe_buf->rdlock); + flexos_gate(uklock, uk_mutex_unlock, &pipe_buf->rdlock); return 0; default: return -EINVAL; @@ -478,7 +526,7 @@ static int pipe_fd_alloc(struct pipe_file *pipe_file, int flags) } /* Allocate file, dentry, and vnode */ - vfs_file = calloc(1, sizeof(*vfs_file)); + vfs_file = flexos_calloc_whitelist(1, sizeof(*vfs_file)); if (!vfs_file) { ret = -ENOMEM; goto ERR_MALLOC_VFS_FILE; @@ -492,7 +540,7 @@ static int pipe_fd_alloc(struct pipe_file *pipe_file, int flags) goto ERR_ALLOC_VNODE; } - uk_mutex_unlock(&p_vnode->v_lock); + flexos_gate(uklock, uk_mutex_unlock, &p_vnode->v_lock); p_dentry = dentry_alloc(NULL, p_vnode, "/"); if (!p_dentry) { @@ -526,7 +574,7 @@ static int pipe_fd_alloc(struct pipe_file *pipe_file, int flags) ERR_ALLOC_DENTRY: vrele(p_vnode); ERR_ALLOC_VNODE: - free(vfs_file); + flexos_free_whitelist(vfs_file); ERR_MALLOC_VFS_FILE: vfscore_put_fd(vfs_fd); ERR_EXIT: diff --git a/lib/vfscore/rootfs.c b/lib/vfscore/rootfs.c index 8a6e099f4a..e21bf6f5fb 100644 --- a/lib/vfscore/rootfs.c +++ b/lib/vfscore/rootfs.c @@ -40,6 +40,12 @@ #include #include #include +#ifdef CONFIG_LIBINITRAMFS +#include +#include +#include +#endif +#include static const char *rootfs = CONFIG_LIBVFSCORE_ROOTFS; @@ -66,6 +72,29 @@ UK_LIB_PARAM_STR(rootdev); UK_LIB_PARAM_STR(rootopts); UK_LIB_PARAM(rootflags, __u64); +static inline int _rootfs_initramfs() +{ + struct ukplat_memregion_desc memregion_desc __attribute__((flexos_whitelist)); + int initrd; + enum cpio_error error; + + flexos_gate_r(libukplat, initrd, ukplat_memregion_find_initrd0, &memregion_desc); + if (initrd != -1) { + flexos_gate(libukplat, ukplat_memregion_get, initrd, &memregion_desc); + if (mount("", "/", "ramfs", 0, NULL) < 0) + return -CPIO_MOUNT_FAILED; + + error = + cpio_extract("/", memregion_desc.base, memregion_desc.len); + if (error < 0) + flexos_gate(ukdebug, uk_pr_err, FLEXOS_SHARED_LITERAL("Failed to mount initrd\n")); + return error; + } + flexos_gate(ukdebug, uk_pr_err, FLEXOS_SHARED_LITERAL("Failed to mount initrd\n")); + return -CPIO_NO_MEMREGION; +} + +__attribute__((libukboot_callback)) static int vfscore_rootfs(void) { /* @@ -74,21 +103,19 @@ static int vfscore_rootfs(void) * have to be mounted later. */ if (!rootfs || rootfs[0] == '\0') { - uk_pr_crit("Parameter 'vfs.rootfs' is invalid\n"); + flexos_gate(ukdebug, uk_pr_crit, FLEXOS_SHARED_LITERAL("Parameter 'vfs.rootfs' is invalid\n")); return -1; } - uk_pr_info("Mount %s to /...\n", rootfs); +#ifdef CONFIG_LIBINITRAMFS + return _rootfs_initramfs(); +#else + flexos_gate(ukdebug, uk_pr_info, "Mount %s to /...\n", rootfs); if (mount(rootdev, "/", rootfs, rootflags, rootopts) != 0) { - uk_pr_crit("Failed to mount /: %d\n", errno); + flexos_gate(ukdebug, uk_pr_crit, FLEXOS_SHARED_LITERAL("Failed to mount /: %d\n"), errno); return -1; } - - /* - * TODO: Alternatively we could extract an archive found - * as initrd to a ramfs '/' if we have got fsname 'initrd' - */ - +#endif return 0; } diff --git a/lib/vfscore/stdio.c b/lib/vfscore/stdio.c index b59a9ee3d9..519e247bf4 100644 --- a/lib/vfscore/stdio.c +++ b/lib/vfscore/stdio.c @@ -41,10 +41,12 @@ #include #include #include +#include static int __write_fn(void *dst __unused, void *src, size_t *cnt) { - int ret = ukplat_coutk(src, *cnt); + int ret; + flexos_gate_r(libukplat, ret, ukplat_coutk, src, *cnt); if (ret < 0) /* TODO: remove -1 when vfscore switches to negative @@ -184,7 +186,12 @@ static struct vnops stdio_vnops = { stdio_symlink, /* symbolic link */ }; -static struct vnode stdio_vnode = { +/* FIXME FLEXOS: this vnode shouldn't be shared when using VM/EPT */ +static struct vnode stdio_vnode +#if CONFIG_LIBFLEXOS_INTELPKU +__section(".data_shared") +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ += { .v_ino = 1, .v_op = &stdio_vnops, .v_lock = UK_MUTEX_INITIALIZER(stdio_vnode.v_lock), @@ -215,7 +222,7 @@ void init_stdio(void) UK_ASSERT(fd == 0); vfscore_install_fd(0, &stdio_file); if (dup2(0, 1) != 1) - uk_pr_err("failed to dup to stdin\n"); + flexos_gate(ukdebug, uk_pr_err, FLEXOS_SHARED_LITERAL("failed to dup to stdin\n")); if (dup2(0, 2) != 2) - uk_pr_err("failed to dup to stderr\n"); + flexos_gate(ukdebug, uk_pr_err, FLEXOS_SHARED_LITERAL("failed to dup to stderr\n")); } diff --git a/lib/vfscore/syscalls.c b/lib/vfscore/syscalls.c index c54819057c..66a2792d28 100644 --- a/lib/vfscore/syscalls.c +++ b/lib/vfscore/syscalls.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -197,7 +198,7 @@ sys_open(char *path, int flags, mode_t mode, struct vfscore_file **fpp) goto out_vn_unlock; } - fp = calloc(sizeof(struct vfscore_file), 1); + fp = flexos_calloc_whitelist(sizeof(struct vfscore_file), 1); if (!fp) { error = ENOMEM; goto out_vn_unlock; @@ -213,7 +214,7 @@ sys_open(char *path, int flags, mode_t mode, struct vfscore_file **fpp) fp->f_dentry = dp; dp = NULL; - uk_mutex_init(&fp->f_lock); + flexos_gate(uklock, uk_mutex_init, &fp->f_lock); error = VOP_OPEN(vp, fp); if (error) { @@ -271,7 +272,7 @@ sys_read(struct vfscore_file *fp, const struct iovec *iov, size_t niov, return 0; } - struct uio uio; + struct uio uio __attribute__((flexos_whitelist)); /* TODO: is it necessary to copy iov within Unikraft? * OSv did this, mentioning this reason: * @@ -279,7 +280,7 @@ sys_read(struct vfscore_file *fp, const struct iovec *iov, size_t niov, * zeros the iov_len fields when it reads from disk, so we * have to copy iov. " */ - copy_iov = calloc(sizeof(struct iovec), niov); + copy_iov = flexos_calloc_whitelist(sizeof(struct iovec), niov); if (!copy_iov) return ENOMEM; memcpy(copy_iov, iov, sizeof(struct iovec)*niov); @@ -292,7 +293,7 @@ sys_read(struct vfscore_file *fp, const struct iovec *iov, size_t niov, error = vfs_read(fp, &uio, (offset == -1) ? 0 : FOF_OFFSET); *count = bytes - uio.uio_resid; - free(copy_iov); + flexos_free_whitelist(copy_iov); return error; } @@ -320,7 +321,7 @@ sys_write(struct vfscore_file *fp, const struct iovec *iov, size_t niov, return 0; } - struct uio uio; + struct uio uio __attribute__((flexos_whitelist)); /* TODO: same note as in sys_read. Original comment: * @@ -328,7 +329,7 @@ sys_write(struct vfscore_file *fp, const struct iovec *iov, size_t niov, * iov_len fields when it writes to disk, so we have to copy iov. */ /* std::vector copy_iov(iov, iov + niov); */ - copy_iov = calloc(sizeof(struct iovec), niov); + copy_iov = flexos_calloc_whitelist(sizeof(struct iovec), niov); if (!copy_iov) return ENOMEM; memcpy(copy_iov, iov, sizeof(struct iovec)*niov); @@ -341,7 +342,7 @@ sys_write(struct vfscore_file *fp, const struct iovec *iov, size_t niov, error = vfs_write(fp, &uio, (offset == -1) ? 0 : FOF_OFFSET); *count = bytes - uio.uio_resid; - free(copy_iov); + flexos_free_whitelist(copy_iov); return error; } diff --git a/lib/vfscore/vnode.c b/lib/vfscore/vnode.c index f3f164490c..8e6001140f 100644 --- a/lib/vfscore/vnode.c +++ b/lib/vfscore/vnode.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -87,9 +88,17 @@ static struct uk_list_head vnode_table[VNODE_BUCKETS]; * If a vnode is already locked, there is no need to * lock this global lock to access internal data. */ -static struct uk_mutex vnode_lock = UK_MUTEX_INITIALIZER(vnode_lock); -#define VNODE_LOCK() uk_mutex_lock(&vnode_lock) -#define VNODE_UNLOCK() uk_mutex_unlock(&vnode_lock) +static struct uk_mutex vnode_lock __section(".data_shared") = UK_MUTEX_INITIALIZER(vnode_lock); + +static inline void VNODE_LOCK() +{ + flexos_gate(uklock, uk_mutex_lock, &vnode_lock); +} + +static inline void VNODE_UNLOCK() +{ + flexos_gate(uklock, uk_mutex_unlock, &vnode_lock); +} /* TODO: implement mutex_owned */ #define VNODE_OWNED() (1) @@ -120,7 +129,7 @@ vn_lookup(struct mount *mp, uint64_t ino) uk_list_for_each_entry(vp, &vnode_table[vn_hash(mp, ino)], v_link) { if (vp->v_mount == mp && vp->v_ino == ino) { vp->v_refcnt++; - uk_mutex_lock(&vp->v_lock); + flexos_gate(uklock, uk_mutex_lock, &vp->v_lock); return vp; } } @@ -150,7 +159,7 @@ vn_lock(struct vnode *vp) UK_ASSERT(vp); UK_ASSERT(vp->v_refcnt > 0); - uk_mutex_lock(&vp->v_lock); + flexos_gate(uklock, uk_mutex_lock, &vp->v_lock); DPRINTF(VFSDB_VNODE, ("vn_lock: %s\n", vn_path(vp))); } @@ -163,7 +172,7 @@ vn_unlock(struct vnode *vp) UK_ASSERT(vp); UK_ASSERT(vp->v_refcnt >= 0); - uk_mutex_unlock(&vp->v_lock); + flexos_gate(uklock, uk_mutex_unlock, &vp->v_lock); DPRINTF(VFSDB_VNODE, ("vn_lock: %s\n", vn_path(vp))); } @@ -191,9 +200,10 @@ vfscore_vget(struct mount *mp, uint64_t ino, struct vnode **vpp) return 1; } - vp = calloc(1, sizeof(*vp)); + vp = flexos_calloc_whitelist(1, sizeof(*vp)); if (!vp) { VNODE_UNLOCK(); + flexos_free_whitelist(vp); return 0; } @@ -202,17 +212,17 @@ vfscore_vget(struct mount *mp, uint64_t ino, struct vnode **vpp) vp->v_mount = mp; vp->v_refcnt = 1; vp->v_op = mp->m_op->vfs_vnops; - uk_mutex_init(&vp->v_lock); + flexos_gate(uklock, uk_mutex_init,&vp->v_lock); /* * Request to allocate fs specific data for vnode. */ if ((error = VFS_VGET(mp, vp)) != 0) { VNODE_UNLOCK(); - free(vp); + flexos_free_whitelist(vp); return 0; } vfs_busy(vp->v_mount); - uk_mutex_lock(&vp->v_lock); + flexos_gate(uklock, uk_mutex_lock, &vp->v_lock); uk_list_add(&vp->v_link, &vnode_table[vn_hash(mp, ino)]); VNODE_UNLOCK(); @@ -248,8 +258,8 @@ vput(struct vnode *vp) if (vp->v_op->vop_inactive) VOP_INACTIVE(vp); vfs_unbusy(vp->v_mount); - uk_mutex_unlock(&vp->v_lock); - free(vp); + flexos_gate(uklock, uk_mutex_unlock, &vp->v_lock); + flexos_free_whitelist(vp); } /* @@ -294,7 +304,7 @@ vrele(struct vnode *vp) */ VOP_INACTIVE(vp); vfs_unbusy(vp->v_mount); - free(vp); + flexos_free_whitelist(vp); } /* @@ -454,16 +464,16 @@ vnode_dump(void) VNODE_LOCK(); - uk_pr_debug("Dump vnode\n"); - uk_pr_debug(" vnode mount type refcnt path\n"); - uk_pr_debug(" ---------------- ---------------- ----- ------ ------------------------------\n"); + flexos_gate(ukdebug, uk_pr_debug, FLEXOS_SHARED_LITERAL("Dump vnode\n")); + flexos_gate(ukdebug, uk_pr_debug, FLEXOS_SHARED_LITERAL(" vnode mount type refcnt path\n")); + flexos_gate(ukdebug, uk_pr_debug, FLEXOS_SHARED_LITERAL(" ---------------- ---------------- ----- ------ ------------------------------\n")); for (i = 0; i < VNODE_BUCKETS; i++) { uk_list_for_each_entry(vp, &vnode_table[i], v_link) { mp = vp->v_mount; - uk_pr_debug(" %016lx %016lx %s %6d %s%s\n", + flexos_gate(ukdebug, uk_pr_debug, FLEXOS_SHARED_LITERAL(" %016lx %016lx %s %6d %s%s\n"), (unsigned long) vp, (unsigned long) mp, type[vp->v_type], vp->v_refcnt, @@ -471,7 +481,7 @@ vnode_dump(void) vn_path(vp)); } } - uk_pr_debug("\n"); + flexos_gate(ukdebug, uk_pr_debug, FLEXOS_SHARED_LITERAL("\n")); VNODE_UNLOCK(); } #endif diff --git a/plat/Config.uk b/plat/Config.uk index 76a3f31875..747c42e8ef 100644 --- a/plat/Config.uk +++ b/plat/Config.uk @@ -5,6 +5,22 @@ config UKPLAT_MEMRNAME help Enable name field in memory region descriptors +config PT_API + bool "Virtual memory management API" + default n + depends on ARCH_X86_64 + help + Provide functions for managing virtual memory + +config DYNAMIC_PT + bool "Boot-time initialization of page tables" + default n + depends on ARCH_X86_64 + help + Build page tables at boot time and provide API for managing + virtual mappings. + select PT_API + config EARLY_PRINT_PL011_UART_ADDR hex "Early debug console pl011 serial address" default 0x09000000 diff --git a/plat/common/mm.c b/plat/common/mm.c new file mode 100644 index 0000000000..8621e3efad --- /dev/null +++ b/plat/common/mm.c @@ -0,0 +1,1024 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Authors: Stefan Teodorescu + * + * Copyright (c) 2021, University Politehnica of Bucharest. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static unsigned long pt_bitmap_start_addr; +static unsigned long pt_bitmap_length; + +static unsigned long pt_mem_start_addr; +static unsigned long pt_mem_length; + +static unsigned long stack_bitmap_start_addr[UK_BITS_TO_LONGS(STACK_COUNT)]; +static unsigned long stack_bitmap_length = UK_BITS_TO_LONGS(STACK_COUNT); + +size_t _phys_mem_region_list_size; +struct phys_mem_region _phys_mem_region_list[MEM_REGIONS_NUMBER]; + +unsigned long _virt_offset; + +/* + * Variable used in the initialization phase during booting when allocating + * page tables does not use the page table API function uk_pt_alloc_table. + * The initial page tables are allocated sequantially and this variable is the + * counter of used page tables. + */ +static size_t _used_pts_count; + +/* TODO fix duplicate with POSIX mmap */ +static unsigned long get_free_virtual_area(unsigned long start, size_t length, + unsigned long interval_end) +{ + unsigned long page; + + if (!PAGE_ALIGNED(length)) + return -1; + + while (start <= interval_end - length) { + for (page = start; page < start + length; page += PAGE_SIZE) { + if (PAGE_PRESENT(uk_virt_to_pte(page))) + break; + } + + if (page == start + length) + return start; + + start = page + PAGE_SIZE; + } + + return -1; +} + +/** + * Allocate a page table for a given level (in the PT hierarchy). + * + * @param level: the level of the needed page table. + * + * @return: virtual address of newly allocated page table or PAGE_INVALID + * on failure. + */ +static unsigned long uk_pt_alloc_table(size_t level, int is_initmem) +{ + unsigned long offset, pt_vaddr; +#ifdef CONFIG_PARAVIRT + int rc; +#endif /* CONFIG_PARAVIRT */ + + offset = uk_bitmap_find_next_zero_area( + (unsigned long *) pt_bitmap_start_addr, + pt_bitmap_length, + 0 /* start */, 1 /* nr */, 0 /* align_mask */); + + if (offset * PAGE_SIZE > pt_mem_length) { + uk_pr_err("Filled up all available space for page tables\n"); + return PAGE_INVALID; + } + + uk_bitmap_set((unsigned long *) pt_bitmap_start_addr, offset, 1); + pt_vaddr = pt_mem_start_addr + (offset << PAGE_SHIFT) + _virt_offset; + +#ifdef CONFIG_PARAVIRT + rc = uk_page_set_prot(pt_vaddr, PAGE_PROT_READ | PAGE_PROT_WRITE); + if (rc) + return PAGE_INVALID; +#endif /* CONFIG_PARAVIRT */ + + memset((void *) pt_vaddr, 0, + sizeof(unsigned long) * pagetable_entries[level - 1]); + + /* Xen requires that PTs are mapped read-only */ +#ifdef CONFIG_PARAVIRT + /* + * When using this function on Xen for the initmem part, the page + * must not be set to read-only, as we are currently writing + * directly into it. All page tables will be set later to read-only + * before setting the new pt_base. + */ + if (!is_initmem) { + rc = uk_page_set_prot(pt_vaddr, PAGE_PROT_READ); + if (rc) + return PAGE_INVALID; + } +#endif /* CONFIG_PARAVIRT */ + + /* + * This is an L(n + 1) entry, so we set L(n + 1) flags + * (Index in pagetable_protections is level of PT - 1) + */ + return (pt_virt_to_mfn(pt_vaddr) << PAGE_SHIFT) + | pagetable_protections[level]; +} + +static int uk_pt_release_if_unused(unsigned long vaddr, unsigned long pt, + unsigned long parent_pt, size_t level) +{ + unsigned long offset; + size_t i; + int rc; + + if (!PAGE_ALIGNED(pt) || !PAGE_ALIGNED(parent_pt)) { + uk_pr_err("Table's address must be aligned to page size\n"); + return -1; + } + + for (i = 0; i < pagetable_entries[level - 1]; i++) { + if (PAGE_PRESENT(ukarch_pte_read(pt, i, level))) + return 0; + } + + rc = ukarch_pte_write(parent_pt, Lx_OFFSET(vaddr, level + 1), 0, + level + 1); + if (rc) + return -1; + + ukarch_flush_tlb_entry(parent_pt); + + offset = (pt - pt_mem_start_addr - _virt_offset) >> PAGE_SHIFT; + uk_bitmap_clear((unsigned long *) pt_bitmap_start_addr, offset, 1); + + return 0; +} + +static int _page_map(unsigned long pt, unsigned long vaddr, unsigned long paddr, + unsigned long prot, unsigned long flags, int is_initmem, + int (*pte_write)(unsigned long, size_t, unsigned long, size_t)) +{ + unsigned long pte; + int rc; + + if (!PAGE_ALIGNED(vaddr)) { + uk_pr_err("Virt address must be aligned to page size\n"); + return -1; + } + if (flags & PAGE_FLAG_LARGE && !PAGE_LARGE_ALIGNED(vaddr)) { + uk_pr_err("Virt ddress must be aligned to large page size\n"); + return -1; + } + +#ifdef CONFIG_PARAVIRT + if (flags & PAGE_FLAG_LARGE) { + uk_pr_err("Large pages are not supported on PV guest\n"); + return -1; + } +#endif /* CONFIG_PARAVIRT */ + + if (paddr == PAGE_PADDR_ANY) { + paddr = uk_get_next_free_frame(flags); + + if (paddr == PAGE_INVALID) + return -1; + } else if (!PAGE_ALIGNED(paddr)) { + uk_pr_err("Phys address must be aligned to page size\n"); + return -1; + } else if ((flags & PAGE_FLAG_LARGE) && !PAGE_LARGE_ALIGNED(paddr)) { + uk_pr_err("Phys address must be aligned to large page size\n"); + return -1; + } + + /* + * XXX: On 64-bits architectures (x86_64 and arm64) the hierarchical + * page tables have a 4 level layout. This implementation will need a + * revision when introducing support for 32-bits architectures, since + * there are only 3 levels of page tables. + */ + pte = ukarch_pte_read(pt, L4_OFFSET(vaddr), 4); + if (!PAGE_PRESENT(pte)) { + pte = uk_pt_alloc_table(3, is_initmem); + if (pte == PAGE_INVALID) + return -1; + + rc = pte_write(pt, L4_OFFSET(vaddr), pte, 4); + if (rc) + return -1; + } + + pt = (unsigned long) pt_pte_to_virt(pte); + pte = ukarch_pte_read(pt, L3_OFFSET(vaddr), 3); + if (!PAGE_PRESENT(pte)) { + pte = uk_pt_alloc_table(2, is_initmem); + if (pte == PAGE_INVALID) + return -1; + + rc = pte_write(pt, L3_OFFSET(vaddr), pte, 3); + if (rc) + return -1; + } + + pt = (unsigned long) pt_pte_to_virt(pte); + pte = ukarch_pte_read(pt, L2_OFFSET(vaddr), 2); + if (flags & PAGE_FLAG_LARGE) { + if (PAGE_PRESENT(pte)) + return -1; + + pte = ukarch_pte_create(PTE_REMOVE_FLAGS(paddr), prot, 2); + rc = pte_write(pt, L2_OFFSET(vaddr), pte, 2); + if (rc) + return -1; + + uk_frame_reserve(paddr, PAGE_LARGE_SIZE, 1); + return 0; + } + if (!PAGE_PRESENT(pte)) { + pte = uk_pt_alloc_table(1, is_initmem); + if (pte == PAGE_INVALID) + return -1; + + rc = pte_write(pt, L2_OFFSET(vaddr), pte, 2); + if (rc) + return -1; + } + + pt = (unsigned long) pt_pte_to_virt(pte); + pte = ukarch_pte_read(pt, L1_OFFSET(vaddr), 1); + /* TODO rethink here */ + //if (!PAGE_PRESENT(pte)) { + if (1) { + pte = ukarch_pte_create( + paddr, + prot, 1); + rc = pte_write(pt, L1_OFFSET(vaddr), pte, 1); + if (rc) + return -1; + uk_frame_reserve(mframe_to_pframe(paddr), PAGE_SIZE, 1); + } else { + uk_pr_info("Virtual address 0x%08lx is already mapped\n", + vaddr); + return -1; + } + + return 0; +} + +int _initmem_page_map(unsigned long pt, unsigned long vaddr, + unsigned long paddr, unsigned long prot, + unsigned long flags) +{ + return _page_map(pt, vaddr, paddr, prot, flags, 1, + _ukarch_pte_write_raw); +} + +int uk_page_map(unsigned long vaddr, unsigned long paddr, unsigned long prot, + unsigned long flags) +{ + return _page_map(ukarch_read_pt_base(), vaddr, paddr, prot, flags, 0, + ukarch_pte_write); +} + +static int _page_unmap(unsigned long pt, unsigned long vaddr, + int (*pte_write)(unsigned long, size_t, unsigned long, size_t)) +{ + unsigned long l1_table, l2_table, l3_table, l4_table, pte; + unsigned long pfn; + unsigned long frame_size = PAGE_SIZE; + int rc; + + if (!PAGE_ALIGNED(vaddr)) { + uk_pr_err("Address must be aligned to page size\n"); + return -1; + } + + l4_table = pt; + pte = ukarch_pte_read(l4_table, L4_OFFSET(vaddr), 4); + if (!PAGE_PRESENT(pte)) + return -1; + + l3_table = (unsigned long) pt_pte_to_virt(pte); + pte = ukarch_pte_read(l3_table, L3_OFFSET(vaddr), 3); + if (!PAGE_PRESENT(pte)) + return -1; + + l2_table = (unsigned long) pt_pte_to_virt(pte); + pte = ukarch_pte_read(l2_table, L2_OFFSET(vaddr), 2); + if (!PAGE_PRESENT(pte)) + return -1; + if (PAGE_LARGE(pte)) { + if (!PAGE_LARGE_ALIGNED(vaddr)) + return -1; + + pfn = pte_to_pfn(pte); + rc = pte_write(l2_table, L2_OFFSET(vaddr), 0, 2); + if (rc) + return -1; + frame_size = PAGE_LARGE_SIZE; + } else { + l1_table = (unsigned long) pt_pte_to_virt(pte); + pte = ukarch_pte_read(l1_table, L1_OFFSET(vaddr), 1); + if (!PAGE_PRESENT(pte)) + return -1; + + pfn = pte_to_pfn(pte); + rc = pte_write(l1_table, L1_OFFSET(vaddr), 0, 1); + if (rc) + return -1; + rc = uk_pt_release_if_unused(vaddr, l1_table, l2_table, 1); + if (rc) + return -1; + } + + ukarch_flush_tlb_entry(vaddr); + + uk_frame_reserve(pfn << PAGE_SHIFT, frame_size, 0); + + rc = uk_pt_release_if_unused(vaddr, l2_table, l3_table, 2); + if (rc) + return -1; + rc = uk_pt_release_if_unused(vaddr, l3_table, l4_table, 3); + if (rc) + return -1; + + return 0; +} + +int uk_page_unmap(unsigned long vaddr) +{ + return _page_unmap(ukarch_read_pt_base(), vaddr, ukarch_pte_write); +} + +static int _map_region(unsigned long pt, unsigned long vaddr, + unsigned long paddr, unsigned long pages, unsigned long prot, + unsigned long flags, int is_initmem, + int (*pte_write)(unsigned long, size_t, unsigned long, size_t)) +{ + size_t i; + unsigned long increment; + int rc; + + if (flags & PAGE_FLAG_LARGE) + increment = PAGE_LARGE_SIZE; + else + increment = PAGE_SIZE; + + for (i = 0; i < pages; i++) { + unsigned long current_paddr; + + if (paddr == PAGE_PADDR_ANY) + current_paddr = PAGE_PADDR_ANY; + else + current_paddr = pfn_to_mfn((paddr + i * increment) >> PAGE_SHIFT) << PAGE_SHIFT; + + rc = _page_map(pt, vaddr + i * increment, current_paddr, prot, + flags, is_initmem, pte_write); + if (rc) { + size_t j; + + uk_pr_err("Could not map page 0x%08lx\n", + vaddr + i * increment); + + for (j = 0; j < i; j++) + _page_unmap(pt, vaddr, pte_write); + return -1; + } + } + + return 0; +} + +int uk_map_region(unsigned long vaddr, unsigned long paddr, + unsigned long pages, unsigned long prot, unsigned long flags) +{ + return _map_region(ukarch_read_pt_base(), vaddr, paddr, pages, + prot, flags, 0, ukarch_pte_write); +} + +int _initmem_map_region(unsigned long pt, unsigned long vaddr, + unsigned long paddr, unsigned long pages, unsigned long prot, + unsigned long flags) +{ + return _map_region(pt, vaddr, paddr, pages, prot, flags, 1, + _ukarch_pte_write_raw); +} + +int _page_set_prot(unsigned long pt, unsigned long vaddr, + unsigned long new_prot, + int (*pte_write)(unsigned long, size_t, unsigned long, size_t)) +{ + unsigned long pte, new_pte; + int rc; + + if (!PAGE_ALIGNED(vaddr)) { + uk_pr_info("Address must be aligned to page size\n"); + return -1; + } + + pte = ukarch_pte_read(pt, L4_OFFSET(vaddr), 4); + if (!PAGE_PRESENT(pte)) + return -1; + + pt = (unsigned long) pt_pte_to_virt(pte); + pte = ukarch_pte_read(pt, L3_OFFSET(vaddr), 3); + if (!PAGE_PRESENT(pte)) + return -1; + + pt = (unsigned long) pt_pte_to_virt(pte); + pte = ukarch_pte_read(pt, L2_OFFSET(vaddr), 2); + if (!PAGE_PRESENT(pte)) + return -1; + if (PAGE_LARGE(pte)) { + new_pte = ukarch_pte_create(PTE_REMOVE_FLAGS(pte), new_prot, 2); + rc = pte_write(pt, L2_OFFSET(vaddr), new_pte, 2); + if (rc) + return -1; + ukarch_flush_tlb_entry(vaddr); + + return 0; + } + + pt = (unsigned long) pt_pte_to_virt(pte); + pte = ukarch_pte_read(pt, L1_OFFSET(vaddr), 1); + if (!PAGE_PRESENT(pte)) + return -1; + + new_pte = ukarch_pte_create(PTE_REMOVE_FLAGS(pte), new_prot, 1); + rc = pte_write(pt, L1_OFFSET(vaddr), new_pte, 1); + if (rc) + return -1; + ukarch_flush_tlb_entry(vaddr); + + return 0; +} + +int uk_page_set_prot(unsigned long vaddr, unsigned long new_prot) +{ + return _page_set_prot(ukarch_read_pt_base(), vaddr, new_prot, + ukarch_pte_write); +} + +int _initmem_page_set_prot(unsigned long pt, unsigned long vaddr, + unsigned long new_prot) +{ + return _page_set_prot(pt, vaddr, new_prot, _ukarch_pte_write_raw); +} + +unsigned long _virt_to_pte(unsigned long pt, unsigned long vaddr) +{ + unsigned long pt_entry; + + if (!PAGE_ALIGNED(vaddr)) { + uk_pr_err("Address must be aligned to page size\n"); + return PAGE_NOT_MAPPED; + } + + pt_entry = ukarch_pte_read(pt, L4_OFFSET(vaddr), 4); + if (!PAGE_PRESENT(pt_entry)) + return PAGE_NOT_MAPPED; + + pt = (unsigned long) pt_pte_to_virt(pt_entry); + pt_entry = ukarch_pte_read(pt, L3_OFFSET(vaddr), 3); + if (!PAGE_PRESENT(pt_entry)) + return PAGE_NOT_MAPPED; + if (PAGE_HUGE(pt_entry)) + return pt_entry; + + pt = (unsigned long) pt_pte_to_virt(pt_entry); + pt_entry = ukarch_pte_read(pt, L2_OFFSET(vaddr), 2); + if (!PAGE_PRESENT(pt_entry)) + return PAGE_NOT_MAPPED; + if (PAGE_LARGE(pt_entry)) + return pt_entry; + + pt = (unsigned long) pt_pte_to_virt(pt_entry); + pt_entry = ukarch_pte_read(pt, L1_OFFSET(vaddr), 1); + + return pt_entry; +} + +unsigned long uk_virt_to_pte(unsigned long vaddr) +{ + return _virt_to_pte(ukarch_read_pt_base(), vaddr); +} + +static unsigned long _initmem_pt_get(unsigned long paddr_start) +{ + unsigned long frame = paddr_start + (_used_pts_count++) * PAGE_SIZE + + PAGETABLES_AREA_START - BOOKKEEP_AREA_START; + + // TODO + // dupa ce booteaza Xen se termina memoria la adresa X + // dupa X am garantati doar 512K + // initial sunt mapati align_up(X, 4MB) + // eu fac X + 2MB aici cand dau aceste frame-uri si e f posibil sa nu fie loc + // uk_pr_err("frame is 0x%08lx\n", frame); + + memset((void *) frame, 0, PAGE_SIZE); + + return frame; +} + +/** + * Create page tables that have mappings for themselves. Any other mappings + * can be then created using the API, after the value returned by this function + * is set as the PT base. + * @return PT base, the physical address of the 4th level page table. + */ +static unsigned long _pt_create(unsigned long paddr_start) +{ + unsigned long pt_l4, pt_l3, pt_l2, pt_l1; + unsigned long prev_l4_offset, prev_l3_offset, prev_l2_offset; + unsigned long page, frame; + + pt_l4 = _initmem_pt_get(paddr_start); + pt_l3 = _initmem_pt_get(paddr_start); + pt_l2 = _initmem_pt_get(paddr_start); + pt_l1 = _initmem_pt_get(paddr_start); + + prev_l4_offset = L4_OFFSET(BOOKKEEP_AREA_START); + prev_l3_offset = L3_OFFSET(BOOKKEEP_AREA_START); + prev_l2_offset = L2_OFFSET(BOOKKEEP_AREA_START); + + _ukarch_pte_write_raw(pt_l4, prev_l4_offset, + (pfn_to_mfn(pt_l3 >> PAGE_SHIFT) << PAGE_SHIFT) + | L4_PROT, 4); + _ukarch_pte_write_raw(pt_l3, prev_l3_offset, + (pfn_to_mfn(pt_l2 >> PAGE_SHIFT) << PAGE_SHIFT) + | L3_PROT, 3); + _ukarch_pte_write_raw(pt_l2, prev_l2_offset, + (pfn_to_mfn(pt_l1 >> PAGE_SHIFT) << PAGE_SHIFT) + | L2_PROT, 2); + + for (page = BOOKKEEP_AREA_START; + page < BOOKKEEP_AREA_START + BOOKKEEP_AREA_SIZE; + page += PAGE_SIZE) { + if (L4_OFFSET(page) != prev_l4_offset) { + pt_l3 = _initmem_pt_get(paddr_start); + _ukarch_pte_write_raw(pt_l4, L4_OFFSET(page), + (pfn_to_mfn(pt_l3 >> PAGE_SHIFT) << PAGE_SHIFT) + | L4_PROT, 4); + prev_l4_offset = L4_OFFSET(page); + } + + if (L3_OFFSET(page) != prev_l3_offset) { + pt_l2 = _initmem_pt_get(paddr_start); + _ukarch_pte_write_raw(pt_l3, L3_OFFSET(page), + (pfn_to_mfn(pt_l2 >> PAGE_SHIFT) << PAGE_SHIFT) + | L3_PROT, 3); + prev_l3_offset = L3_OFFSET(page); + } + + if (L2_OFFSET(page) != prev_l2_offset) { + pt_l1 = _initmem_pt_get(paddr_start); + _ukarch_pte_write_raw(pt_l2, L2_OFFSET(page), + (pfn_to_mfn(pt_l1 >> PAGE_SHIFT) << PAGE_SHIFT) + | L2_PROT, 2); + prev_l2_offset = L2_OFFSET(page); + } + + frame = pfn_to_mfn((page - BOOKKEEP_AREA_START + paddr_start) >> PAGE_SHIFT) << PAGE_SHIFT; + _ukarch_pte_write_raw(pt_l1, L1_OFFSET(page), + frame | L1_PROT, 1); + } + + return pt_l4; +} + +void uk_pt_init(unsigned long pt_start_paddr, unsigned long paddr_start, + size_t len) +{ + unsigned long offset; + + unsigned long phys_bitmap_start_addr; + size_t phys_bitmap_length; + + unsigned long phys_mem_start_addr; + size_t phys_mem_length; + + if (!PAGE_ALIGNED(pt_start_paddr) || + (paddr_start != PAGE_PADDR_ANY + && !PAGE_ALIGNED(paddr_start))) { + uk_pr_err("Addreses must be aligned to page size\n"); + return; + } + + /* + * The needed bookkeeping internal structures are: + * - a physical address bitmap, to keep track of all available physical + * addresses (which will have a bit for every frame, so the size + * phys_mem_length / PAGE_SIZE) + * - a memory area where page tables are stored + * - a bitmap for pages used as page tables + */ + phys_mem_length = len; + if (paddr_start == PAGE_PADDR_ANY) + phys_mem_length -= BOOKKEEP_AREA_SIZE; + + /* + * If no specific area is given to be managed, the remaining memory is + * considered the actual usable memory. + */ + if (paddr_start == PAGE_PADDR_ANY) { + phys_mem_start_addr = + PAGE_ALIGN_UP(pt_start_paddr + PAGETABLES_AREA_START + - BOOKKEEP_AREA_START + + PAGETABLES_AREA_SIZE); + } else { + phys_mem_start_addr = paddr_start; + } + + offset = phys_mem_start_addr + - PAGE_LARGE_ALIGN_DOWN(phys_mem_start_addr); + phys_mem_start_addr -= offset; + + /* + * Need to bookkeep |phys_mem_length| bytes of physical + * memory, starting from |phys_mem_start_addr|. This is the + * physical memory given by the hypervisor. + * + * In Xen's case, the bitmap keeps the pseudo-physical addresses, the + * translation to machine frames being done later. + */ + + pt_mem_start_addr = pt_start_paddr + PAGETABLES_AREA_START + - BOOKKEEP_AREA_START; + pt_mem_length = PAGETABLES_AREA_SIZE; + + + /* Bookkeeping free pages used for PT allocations */ + pt_bitmap_start_addr = pt_start_paddr; + pt_bitmap_length = pt_mem_length >> PAGE_SHIFT; + uk_bitmap_zero((unsigned long *) pt_bitmap_start_addr, + pt_bitmap_length); + uk_bitmap_set((unsigned long *) pt_bitmap_start_addr, 0, + _used_pts_count); + + phys_bitmap_start_addr = PAGE_ALIGN_UP(pt_bitmap_start_addr + pt_bitmap_length); + phys_bitmap_length = (phys_mem_length + offset) >> PAGE_SHIFT; + + _phys_mem_region_list[_phys_mem_region_list_size].start_addr = + phys_mem_start_addr; + _phys_mem_region_list[_phys_mem_region_list_size].length = phys_mem_length; + _phys_mem_region_list[_phys_mem_region_list_size].bitmap_start_addr = + phys_bitmap_start_addr; + _phys_mem_region_list[_phys_mem_region_list_size].bitmap_length = + phys_bitmap_length; + + _phys_mem_region_list_size++; +} + +int uk_pt_add_mem(unsigned long paddr_start, unsigned long len) +{ + unsigned long bitmap_start_paddr; + unsigned long bitmap_start_vaddr; + size_t bitmap_length; + + unsigned long mem_start_addr; + size_t mem_length; + + if (!_phys_mem_region_list_size) { + uk_pr_err("When initializing the first chunk of physical memory, use uk_pt_init\n"); + return -1; + } + + paddr_start = PAGE_ALIGN_UP(paddr_start); + len = PAGE_ALIGN_DOWN(len); + + /* + * mem_length + bitmap_length = len + */ + mem_length = len * PAGE_SIZE / (PAGE_SIZE + 1); + bitmap_start_paddr = paddr_start; + bitmap_length = PAGE_ALIGN_UP(mem_length) >> PAGE_SHIFT; + mem_start_addr = PAGE_ALIGN_UP(bitmap_start_paddr + bitmap_length); + + bitmap_start_vaddr = PAGE_ALIGN_UP(_phys_mem_region_list[_phys_mem_region_list_size - 1].bitmap_start_addr + + _phys_mem_region_list[_phys_mem_region_list_size - 1].bitmap_length); + if (bitmap_start_vaddr + bitmap_length > PAGETABLES_AREA_START) { + uk_pr_err("Not enough bookkeeping space\n"); + return -1; + } + + uk_pr_err("bitmap start vaddr is 0x%08lx\n", bitmap_start_vaddr); + uk_map_region(bitmap_start_vaddr, bitmap_start_paddr, bitmap_length >> PAGE_SHIFT, PAGE_PROT_READ | PAGE_PROT_WRITE, 0); + + uk_bitmap_zero((unsigned long *) bitmap_start_vaddr, bitmap_length); + + _phys_mem_region_list[_phys_mem_region_list_size].start_addr = + mem_start_addr; + _phys_mem_region_list[_phys_mem_region_list_size].length = PAGE_ALIGN_DOWN(mem_length); + _phys_mem_region_list[_phys_mem_region_list_size].bitmap_start_addr = + bitmap_start_vaddr; + _phys_mem_region_list[_phys_mem_region_list_size].bitmap_length = + bitmap_length; + + _phys_mem_region_list_size++; + + return 0; +} + +#ifdef CONFIG_PLAT_KVM +static int _mmap_kvm_areas(unsigned long pt_base) +{ + unsigned long mbinfo_pages, vgabuffer_pages; + + mbinfo_pages = DIV_ROUND_UP(MBINFO_AREA_SIZE, PAGE_SIZE); + vgabuffer_pages = DIV_ROUND_UP(VGABUFFER_AREA_SIZE, PAGE_SIZE); + if (_initmem_map_region(pt_base, MBINFO_AREA_START, MBINFO_AREA_START, + mbinfo_pages, PAGE_PROT_READ, 0)) + return -1; + + if (_initmem_map_region(pt_base, VGABUFFER_AREA_START, + VGABUFFER_AREA_START, vgabuffer_pages, + PAGE_PROT_READ | PAGE_PROT_WRITE, 0)) + return -1; + + return 0; +} +#endif /* CONFIG_PLAT_KVM */ + +static int _mmap_kernel(unsigned long pt_base, + unsigned long kernel_start_vaddr, + unsigned long kernel_start_paddr, + unsigned long kernel_area_size) +{ + unsigned long kernel_pages; + + UK_ASSERT(PAGE_ALIGNED(kernel_start_vaddr)); + UK_ASSERT(PAGE_ALIGNED(kernel_start_paddr)); + +#ifdef CONFIG_PLAT_KVM + if (_mmap_kvm_areas(pt_base)) + return -1; +#endif /* CONFIG_PLAT_KVM */ + + /* TODO: break down into RW regions and RX regions */ + kernel_pages = DIV_ROUND_UP(kernel_area_size, PAGE_SIZE); + if (_initmem_map_region(pt_base, kernel_start_vaddr, + kernel_start_paddr, kernel_pages, + PAGE_PROT_READ | PAGE_PROT_WRITE | PAGE_PROT_EXEC, 0)) + return -1; + + /* + * It is safe to return from this function, since we are still on the + * bootstrap stack, which is in the bss section, in the binary. + * The switch to another stack is done later. + */ + return 0; +} + +static int _initmem_set_prot_region(unsigned long pt_base, unsigned long vaddr, + unsigned long len, unsigned long new_prot) +{ + unsigned long page; + int rc; + + for (page = vaddr; page < vaddr + len; page += PAGE_SIZE) { + rc = _initmem_page_set_prot(pt_base, page, new_prot); + if (rc) + return -1; + } + + return 0; +} + +static int uk_set_prot_region(unsigned long vaddr, unsigned long len, + unsigned long new_prot) +{ + unsigned long page; + int rc; + + for (page = vaddr; page < vaddr + len; page += PAGE_SIZE) { + rc = uk_page_set_prot(page, new_prot); + if (rc) + return -1; + } + + return 0; +} + +void uk_pt_build(unsigned long paddr_start, unsigned long len, + unsigned long kernel_start_vaddr, + unsigned long kernel_start_paddr, + unsigned long kernel_area_size) +{ + unsigned long pt_base; + + UK_ASSERT(PAGE_ALIGNED(paddr_start)); + UK_ASSERT(PAGE_ALIGNED(len)); + + uk_pr_err("paddr_start is 0x%08lx\n", paddr_start); + pt_base = _pt_create(paddr_start); + uk_pt_init(paddr_start, PAGE_PADDR_ANY, len); + if (_mmap_kernel(pt_base, kernel_start_vaddr, kernel_start_paddr, + kernel_area_size)) + UK_CRASH("Could not map kernel\n"); + +#ifdef CONFIG_PARAVIRT + _initmem_page_map(pt_base, SHAREDINFO_PAGE, + PTE_REMOVE_FLAGS(uk_virt_to_pte(SHAREDINFO_PAGE)), + PAGE_PROT_READ | PAGE_PROT_WRITE, 0); + /* All pagetables must be set to read only before writing new pt_base */ + _initmem_set_prot_region(pt_base, PAGETABLES_AREA_START, + PAGETABLES_AREA_SIZE, PAGE_PROT_READ); + uk_set_prot_region(pt_base, PAGETABLES_AREA_SIZE, PAGE_PROT_READ); +#endif /* CONFIG_PARAVIRT */ + + ukarch_write_pt_base(pt_base); + _virt_offset = PAGETABLES_AREA_START - pt_base; + _phys_mem_region_list[0].bitmap_start_addr += _virt_offset; + pt_bitmap_start_addr += _virt_offset; + + uk_bitmap_zero((unsigned long *) _phys_mem_region_list[0].bitmap_start_addr, + _phys_mem_region_list[0].bitmap_length); + uk_bitmap_set((unsigned long *) _phys_mem_region_list[0].bitmap_start_addr, 0, + (_phys_mem_region_list[0].start_addr - PAGE_LARGE_ALIGN_DOWN(_phys_mem_region_list[0].start_addr))>> PAGE_SHIFT); + uk_bitmap_zero((unsigned long *) stack_bitmap_start_addr, + stack_bitmap_length); +} + +void *uk_stack_alloc() +{ + unsigned long stack_start_vaddr; + unsigned long offset; + + offset = uk_bitmap_find_next_zero_area( + (unsigned long *) stack_bitmap_start_addr, + stack_bitmap_length, + 0 /* start */, 1 /* nr */, 0 /* align_mask */); + + if (offset > STACK_COUNT) { + uk_pr_err("No more stacks available\n"); + return NULL; + } + + uk_bitmap_set((unsigned long *) stack_bitmap_start_addr, offset, 1); + + /* Map stack in regular pages */ + stack_start_vaddr = STACK_AREA_START + offset * __STACK_SIZE; + if (uk_map_region(stack_start_vaddr, PAGE_PADDR_ANY, + __STACK_SIZE >> PAGE_SHIFT, + PAGE_PROT_READ | PAGE_PROT_WRITE, 0)) + return NULL; + + return (void *) stack_start_vaddr; +} + +int uk_stack_free(void *vaddr) +{ + unsigned long pages; + size_t i; + + if ((unsigned long) vaddr < STACK_AREA_START + || (unsigned long) vaddr > (STACK_AREA_END - __STACK_SIZE) + || (((unsigned long) vaddr) & (__STACK_SIZE - 1))) + return -1; + + pages = __STACK_SIZE >> PAGE_SHIFT; + for (i = 0; i < pages; i++) { + if (uk_page_unmap(((unsigned long) vaddr) + i * PAGE_SIZE)) { + uk_pr_err("Page 0x%08lx not previously mapped\n", + ((unsigned long) vaddr) + i * PAGE_SIZE); + + return -1; + } + } + + return 0; +} + +int uk_heap_map(unsigned long vaddr, unsigned long len) +{ + unsigned long heap_pages, heap_large_pages; + + if (vaddr < HEAP_AREA_START || vaddr + len > HEAP_AREA_END) + return -1; + +/* note: we do not want to use large pages for the heap in the case of Intel + * PKU; large pages prevent us from being able to set page protections at a + * 4K granularity and result in unintuitive and undesired behavior. */ +#if (CONFIG_PARAVIRT || CONFIG_LIBFLEXOS_INTELPKU) + if (uk_map_region(vaddr, PAGE_PADDR_ANY, + len >> PAGE_SHIFT, PAGE_PROT_READ | PAGE_PROT_WRITE, 0)) + return -1; +#else /* CONFIG_PARAVIRT */ + /* Map heap in large and regular pages */ + heap_large_pages = len >> PAGE_LARGE_SHIFT; + + if (uk_map_region(vaddr, PAGE_PADDR_ANY, + heap_large_pages, + PAGE_PROT_READ | PAGE_PROT_WRITE, + PAGE_FLAG_LARGE)) + return -1; + + /* + * If the heap is not properly aligned to PAGE_LARGE_SIZE, + * map the rest in regular pages + */ + if ((heap_large_pages << PAGE_LARGE_SHIFT) < len) { + heap_pages = (len - (heap_large_pages << PAGE_LARGE_SHIFT)) + >> PAGE_SHIFT; + } else { + heap_pages = 0; + } + + if (uk_map_region(vaddr + (heap_large_pages << PAGE_LARGE_SHIFT), + PAGE_PADDR_ANY, heap_pages, + PAGE_PROT_READ | PAGE_PROT_WRITE, 0)) + return -1; +#endif /* CONFIG_PARAVIRT */ + + return 0; +} + +void dump_pt(unsigned long pt, unsigned long vaddr) +{ + unsigned long pt_entry; + size_t i; + + if (!PAGE_ALIGNED(vaddr)) { + uk_pr_err("Address must be aligned to page size\n"); + return; + } + + printf("L4 table for address 0x%08lx is 0x%08lx\n", vaddr, pt); + for (i = 0; i < L4_OFFSET(vaddr) || i < 2; i += 2) + printf("0x%08lx: 0x%08lx 0x%08lx\n", pt + 8 * i, + *((unsigned long *) pt + i), + *((unsigned long *) pt + i + 1)); + + pt_entry = ukarch_pte_read(pt, L4_OFFSET(vaddr), 4); + if (!PAGE_PRESENT(pt_entry)) + return; + + pt = (unsigned long) pt_pte_to_virt(pt_entry); + + printf("L3 table for address 0x%08lx is 0x%08lx\n", vaddr, pt); + for (i = 0; i < L3_OFFSET(vaddr) || i < 2; i += 2) + printf("0x%08lx: 0x%08lx 0x%08lx\n", pt + 8 * i, + *((unsigned long *) pt + i), + *((unsigned long *) pt + i + 1)); + + pt_entry = ukarch_pte_read(pt, L3_OFFSET(vaddr), 3); + if (!PAGE_PRESENT(pt_entry)) + return; + if (PAGE_HUGE(pt_entry)) { + printf("PTE for vaddr 0x%08lx is 0x%08lx\n", vaddr, pt_entry); + return; + } + + pt = (unsigned long) pt_pte_to_virt(pt_entry); + + printf("L2 table for address 0x%08lx is 0x%08lx\n", vaddr, pt); + for (i = 0; i < L2_OFFSET(vaddr) || i < 2; i += 2) + printf("0x%08lx: 0x%08lx 0x%08lx\n", pt + 8 * i, + *((unsigned long *) pt + i), + *((unsigned long *) pt + i + 1)); + + pt_entry = ukarch_pte_read(pt, L2_OFFSET(vaddr), 2); + if (!PAGE_PRESENT(pt_entry)) + return; + if (PAGE_LARGE(pt_entry)) { + printf("Large page PTE for vaddr 0x%08lx is 0x%08lx\n", + vaddr, pt_entry); + return; + } + + pt = (unsigned long) pt_pte_to_virt(pt_entry); + + printf("L1 table for address 0x%08lx is 0x%08lx\n", vaddr, pt); + for (i = 0; i < L1_OFFSET(vaddr) || i < 2; i += 2) + printf("0x%08lx: 0x%08lx 0x%08lx\n", pt + 8 * i, + *((unsigned long *) pt + i), + *((unsigned long *) pt + i + 1)); + + pt_entry = ukarch_pte_read(pt, L1_OFFSET(vaddr), 1); + + printf("PTE for vaddr 0x%08lx is 0x%08lx\n", vaddr, pt_entry); +} + + diff --git a/plat/common/sw_ctx.c b/plat/common/sw_ctx.c index 2c4fc613d4..ac81dc6875 100644 --- a/plat/common/sw_ctx.c +++ b/plat/common/sw_ctx.c @@ -80,9 +80,13 @@ static void sw_ctx_start(void *ctx) UK_ASSERT(sw_ctx != NULL); - set_tls_pointer(sw_ctx->tlsp); + volatile unsigned long tlsp = sw_ctx->tlsp; + volatile unsigned long sp = sw_ctx->sp; + volatile unsigned long ip = sw_ctx->ip; + + set_tls_pointer(tlsp); /* Switch stacks and run the thread */ - asm_ctx_start(sw_ctx->sp, sw_ctx->ip); + asm_ctx_start(sp, ip); UK_CRASH("Thread did not start."); } @@ -94,10 +98,21 @@ static void sw_ctx_switch(void *prevctx, void *nextctx) struct sw_ctx *p = prevctx; struct sw_ctx *n = nextctx; + /* Hack need access to both domains */ +#if CONFIG_LIBFLEXOS_INTELPKU + unsigned long pkru = rdpkru(); + wrpkru(0x0); +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + save_extregs(p); restore_extregs(n); set_tls_pointer(n->tlsp); asm_sw_ctx_switch(prevctx, nextctx); + +#if CONFIG_LIBFLEXOS_INTELPKU + /* TODO FLEXOS: revisit, possible fuck up because of stack switches... */ + wrpkru(pkru); +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ } void sw_ctx_callbacks_init(struct ukplat_ctx_callbacks *ctx_cbs) diff --git a/plat/common/x86/traps.c b/plat/common/x86/traps.c index f638db5eb7..9784502097 100644 --- a/plat/common/x86/traps.c +++ b/plat/common/x86/traps.c @@ -36,9 +36,15 @@ #include #include #include +#include #include #include +#if CONFIG_LIBFLEXOS_INTELPKU +#include +#include +#endif + /* A general word of caution when writing trap handlers. The platform trap * entry code is set up to properly save general-purpose registers (e.g., rsi, * rdi, rax, r8, ...), but it does NOT save any floating-point or SSE/AVX @@ -72,6 +78,11 @@ DECLARE_TRAP (simd_error, "SIMD coprocessor error") void do_unhandled_trap(int trapnr, char *str, struct __regs *regs, unsigned long error_code) { +#if CONFIG_LIBFLEXOS_INTELPKU + /* Reset PKU key to avoid double fault */ + wrpkru(0x0); +#endif + uk_pr_crit("Unhandled Trap %d (%s), error code=0x%lx\n", trapnr, str, error_code); uk_pr_info("Regs address %p\n", regs); @@ -99,6 +110,11 @@ static void fault_prologue(void) void do_gp_fault(struct __regs *regs, long error_code) { +#if CONFIG_LIBFLEXOS_INTELPKU + /* Reset PKU key to avoid double fault */ + wrpkru(0x0); +#endif + fault_prologue(); uk_pr_crit("GPF rip: %lx, error_code=%lx\n", regs->rip, error_code); @@ -113,6 +129,12 @@ void do_gp_fault(struct __regs *regs, long error_code) void do_page_fault(struct __regs *regs, unsigned long error_code) { +#if CONFIG_LIBFLEXOS_INTELPKU + /* Reset PKU key to avoid double fault. Save its value because we + * want to dump it later. */ + unsigned long pku = rdpkru(); + wrpkru(0x0); +#endif unsigned long addr = read_cr2(); fault_prologue(); @@ -120,7 +142,32 @@ void do_page_fault(struct __regs *regs, unsigned long error_code) "regs %p, sp %lx, our_sp %p, code %lx\n", addr, regs->rip, regs, regs->rsp, &addr, error_code); +#if CONFIG_LIBFLEXOS_INTELPKU + /* PKU fault? */ + if (error_code & (1 << 5)) { + struct ukplat_memregion_desc md; + char *mdname = NULL; + uk_pr_crit("PF_PK: protection key block access (%s)\n", + error_code & 0x2 ? "WRITE":"READ"); + ukplat_memregion_foreach(&md, NULL) { + if (addr >= md.base && addr < (size_t)md.base + md.len) { + mdname = md.name; + } + } + if (!mdname) + mdname = "???"; + uk_pr_crit("Target page %p (section .%s) had key %d\n", + (void*) addr, mdname, flexos_intelpku_mem_get_key( + addr & ~((__PAGE_SIZE) - 1))); + } +#endif + dump_regs(regs); + +#if CONFIG_LIBFLEXOS_INTELPKU + uk_pr_crit("PKU: %016lx\n", pku); +#endif + stack_walk_for_frame(regs->rbp); uk_asmdumpk(KLVL_CRIT, (void *) regs->rip, 6); dump_mem(regs->rsp); diff --git a/plat/drivers/virtio/virtio_net.c b/plat/drivers/virtio/virtio_net.c index b5979d1e26..3202246b65 100644 --- a/plat/drivers/virtio/virtio_net.c +++ b/plat/drivers/virtio/virtio_net.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -156,7 +157,9 @@ struct virtio_net_device { /* The mtu */ __u16 mtu; /* The hw address of the netdevice */ - struct uk_hwaddr hw_addr; + /* NOTE FLEXOS: as a pointer here, we want to be able to share + * this with lwip */ + struct uk_hwaddr *hw_addr; /* Netdev state */ __u8 state; /* RX promiscuous mode. */ @@ -818,7 +821,7 @@ static const struct uk_hwaddr *virtio_net_mac_get(struct uk_netdev *n) UK_ASSERT(n); d = to_virtionetdev(n); - return &d->hw_addr; + return d->hw_addr; } static __u16 virtio_net_mtu_get(struct uk_netdev *n) @@ -865,7 +868,7 @@ static int virtio_netdev_feature_negotiate(struct virtio_net_device *vndev) */ hw_len = virtio_config_get(vndev->vdev, __offsetof(struct virtio_net_config, mac), - &vndev->hw_addr.addr_bytes[0], + &vndev->hw_addr->addr_bytes[0], UK_NETDEV_HWADDR_LEN, 1); if (unlikely(hw_len != UK_NETDEV_HWADDR_LEN)) { uk_pr_err("Failed to retrieve the mac address from device\n"); @@ -1131,6 +1134,10 @@ static int virtio_net_add_dev(struct virtio_dev *vdev) vndev->netdev.rx_one = virtio_netdev_recv; vndev->netdev.tx_one = virtio_netdev_xmit; vndev->netdev.ops = &virtio_netdev_ops; + vndev->hw_addr = flexos_calloc_whitelist(1, sizeof(*(vndev->hw_addr))); + /* TODO FLEXOS: investigate, can we actually put this in lwip's domain + * instead of the shared one? */ + vndev->netdev.scratch_pad = flexos_calloc_whitelist(1, __PAGE_SIZE); rc = uk_netdev_drv_register(&vndev->netdev, a, drv_name); if (rc < 0) { @@ -1148,6 +1155,8 @@ static int virtio_net_add_dev(struct virtio_dev *vdev) exit: return rc; err_netdev_data: + flexos_free_whitelist(vndev->hw_addr); + flexos_free_whitelist(vndev->netdev.scratch_pad); uk_free(a, vndev); err_out: goto exit; diff --git a/plat/kvm/Makefile.uk b/plat/kvm/Makefile.uk index 94321e0c63..0d086bd6f3 100644 --- a/plat/kvm/Makefile.uk +++ b/plat/kvm/Makefile.uk @@ -24,9 +24,9 @@ LIBKVMPLAT_CINCLUDES-y += -I$(LIBKVMPLAT_BASE)/include LIBKVMPLAT_CINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include LIBKVMPLAT_CINCLUDES-y += -I$(UK_PLAT_DRIVERS_BASE)/include -LIBKVMPLAT_ASFLAGS += -DKVMPLAT -LIBKVMPLAT_CFLAGS += -DKVMPLAT -LIBKVMPLAT_CXXFLAGS += -DKVMPLAT +LIBKVMPLAT_ASFLAGS += -DKVMPLAT -fno-sanitize=kernel-address +LIBKVMPLAT_CFLAGS += -DKVMPLAT -fno-sanitize=kernel-address +LIBKVMPLAT_CXXFLAGS += -DKVMPLAT -fno-sanitize=kernel-address ## ## Default Linker script @@ -42,6 +42,9 @@ endif ## ## Architecture library definitions for x86_64 ## +ifeq ($(CONFIG_PT_API),y) +LIBKVMPLAT_SRCS-$(CONFIG_ARCH_X86_64) += $(UK_PLAT_COMMON_BASE)/mm.c|common +endif LIBKVMPLAT_SRCS-$(CONFIG_ARCH_X86_64) += $(UK_PLAT_COMMON_BASE)/x86/trace.c|common LIBKVMPLAT_SRCS-$(CONFIG_ARCH_X86_64) += $(UK_PLAT_COMMON_BASE)/x86/traps.c|isr LIBKVMPLAT_SRCS-$(CONFIG_ARCH_X86_64) += $(UK_PLAT_COMMON_BASE)/x86/cpu_features.c|common diff --git a/plat/kvm/arm/entry64.S b/plat/kvm/arm/entry64.S index 104fd8b645..73eed751ff 100644 --- a/plat/kvm/arm/entry64.S +++ b/plat/kvm/arm/entry64.S @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include .global page_table_size diff --git a/plat/kvm/arm/setup.c b/plat/kvm/arm/setup.c index 41e63755d8..8513b48c55 100644 --- a/plat/kvm/arm/setup.c +++ b/plat/kvm/arm/setup.c @@ -20,7 +20,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/plat/kvm/io.c b/plat/kvm/io.c index 14a8c01a90..8854d29eb2 100644 --- a/plat/kvm/io.c +++ b/plat/kvm/io.c @@ -1,8 +1,10 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* * Authors: Sharan Santhanam + * Stefan Teodorescu * * Copyright (c) 2018, NEC Europe Ltd., NEC Corporation. All rights reserved. + * Copyright (c) 2020, University Politehnica of Bucharest., NEC Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,13 +33,29 @@ */ #include +#include + +#ifdef CONFIG_PT_API +#include +#endif -/** - * TODO: - * For our kvm platform, the guest virtual address == guest physical address. - * We may have to reconsider this implementation when condition changes. - */ __phys_addr ukplat_virt_to_phys(const volatile void *address) { +#ifdef CONFIG_PT_API + unsigned long vaddr = address; + unsigned long pte = uk_virt_to_pte(PAGE_ALIGN_DOWN(vaddr)); + unsigned long offset; + + /* TODO: add support for huge pages */ + if (PAGE_LARGE(pte)) { + offset = vaddr - PAGE_LARGE_ALIGN_DOWN(vaddr); + } else { + offset = vaddr - PAGE_ALIGN_DOWN(vaddr); + } + + + return PTE_REMOVE_FLAGS(pte) + offset; +#else return (__phys_addr)address; +#endif } diff --git a/plat/kvm/irq.c b/plat/kvm/irq.c index 8f43aa6f0f..faa09a377e 100644 --- a/plat/kvm/irq.c +++ b/plat/kvm/irq.c @@ -80,6 +80,22 @@ extern unsigned long sched_have_pending_events; void _ukplat_irq_handle(unsigned long irq) { +#if CONFIG_LIBFLEXOS_INTELPKU + /* save PKRU state */ + unsigned long pkru = rdpkru(); + + /* Interrupt handlers can execute at any point in time. Such + * handlers can access data in a variety of compartments. An + * ideal approach would be to establish a mapping such as + * (interrupt, compartment), in which case we would do a domain + * transition now. However, for the sake of simplicity, this + * prototype simply trusts the handler. + * + * TODO FLEXOS: interrupt handler domain transitions. + */ + wrpkru(0x0); +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ + struct irq_handler *h; UK_SLIST_FOREACH(h, &irq_handlers[irq], entries) { @@ -110,6 +126,10 @@ void _ukplat_irq_handle(unsigned long irq) exit_ack: intctrl_ack_irq(irq); +#if CONFIG_LIBFLEXOS_INTELPKU + /* restore PKRU state */ + wrpkru(pkru); +#endif /* CONFIG_LIBFLEXOS_INTELPKU */ } int ukplat_irq_init(struct uk_alloc *a) diff --git a/plat/kvm/memory.c b/plat/kvm/memory.c index 1d9269ec5d..a0ce87d328 100644 --- a/plat/kvm/memory.c +++ b/plat/kvm/memory.c @@ -19,7 +19,7 @@ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include +#include #include #include #include @@ -27,7 +27,7 @@ int ukplat_memregion_count(void) { - return (9 + return (10 + ((_libkvmplat_cfg.initrd.len > 0) ? 1 : 0) + ((_libkvmplat_cfg.heap2.len > 0) ? 1 : 0)); } @@ -91,7 +91,7 @@ int ukplat_memregion_get(int i, struct ukplat_memregion_desc *m) #endif ret = 0; break; - case 5: /* data */ + case 5: /* shared */ m->base = (void *) __DATA; m->len = (size_t) __EDATA - (size_t) __DATA; m->flags = (UKPLAT_MEMRF_RESERVED @@ -102,7 +102,18 @@ int ukplat_memregion_get(int i, struct ukplat_memregion_desc *m) #endif ret = 0; break; - case 6: /* bss */ + case 6: /* data */ + m->base = (void *) __DATA; + m->len = (size_t) __EDATA - (size_t) __DATA; + m->flags = (UKPLAT_MEMRF_RESERVED + | UKPLAT_MEMRF_READABLE + | UKPLAT_MEMRF_WRITABLE); +#if CONFIG_UKPLAT_MEMRNAME + m->name = "data"; +#endif + ret = 0; + break; + case 7: /* bss */ m->base = (void *) __BSS_START; m->len = (size_t) __END - (size_t) __BSS_START; m->flags = (UKPLAT_MEMRF_RESERVED @@ -113,7 +124,7 @@ int ukplat_memregion_get(int i, struct ukplat_memregion_desc *m) #endif ret = 0; break; - case 7: /* heap */ + case 8: /* heap */ m->base = (void *) _libkvmplat_cfg.heap.start; m->len = _libkvmplat_cfg.heap.len; m->flags = UKPLAT_MEMRF_ALLOCATABLE; @@ -122,7 +133,7 @@ int ukplat_memregion_get(int i, struct ukplat_memregion_desc *m) #endif ret = 0; break; - case 8: /* stack */ + case 9: /* stack */ m->base = (void *) _libkvmplat_cfg.bstack.start; m->len = _libkvmplat_cfg.bstack.len; m->flags = (UKPLAT_MEMRF_RESERVED @@ -133,7 +144,7 @@ int ukplat_memregion_get(int i, struct ukplat_memregion_desc *m) m->name = "bstack"; #endif break; - case 9: /* initrd */ + case 10: /* initrd */ if (_libkvmplat_cfg.initrd.len) { m->base = (void *) _libkvmplat_cfg.initrd.start; m->len = _libkvmplat_cfg.initrd.len; @@ -146,7 +157,7 @@ int ukplat_memregion_get(int i, struct ukplat_memregion_desc *m) break; } /* fall-through */ - case 10: /* heap2 + case 11: /* heap2 * NOTE: heap2 could only exist if initrd was there, * otherwise we fall through */ if (_libkvmplat_cfg.initrd.len && _libkvmplat_cfg.heap2.len) { diff --git a/plat/kvm/x86/cpu_vectors_x86_64.S b/plat/kvm/x86/cpu_vectors_x86_64.S index 35aa58d2a8..688d65d3cc 100644 --- a/plat/kvm/x86/cpu_vectors_x86_64.S +++ b/plat/kvm/x86/cpu_vectors_x86_64.S @@ -64,6 +64,8 @@ popq %rdi .endm +/* IMPORTANT NOTE: the interrupt/exception handler MUST be able to write + * the stack. Otherwise the kernel will triple fault! */ .macro TRAP_ENTRY trapname, has_ec ENTRY(ASM_TRAP_SYM(\trapname)) cld diff --git a/plat/kvm/x86/link64.lds.S b/plat/kvm/x86/link64.lds.S index eb4a19d28b..ccfe92f2e0 100644 --- a/plat/kvm/x86/link64.lds.S +++ b/plat/kvm/x86/link64.lds.S @@ -31,17 +31,36 @@ SECTIONS { . = 0x100000; + .multiboot_header : + { + KEEP (*(.data.boot)) + } + + . = ALIGN(__PAGE_SIZE); + _sshared = .; + .data_shared : + { + KEEP (*(.data_shared .data_shared.*)) + . = ALIGN(__PAGE_SIZE); + /* this padding might not be actually needed */ + . = . + 0x1000; + } + _eshared = .; + /* Code */ _text = .; .text : { /* prevent linker gc from removing multiboot header */ - KEEP (*(.data.boot)) *(.text.boot) + *(.text_shared) *(.text) *(.text.*) } + + /* __FLEXOS MARKER__: insert compartment text sections here. */ + _etext = .; EXCEPTION_SECTIONS @@ -72,19 +91,24 @@ SECTIONS . = ALIGN(0x8); .init_array : { PROVIDE_HIDDEN (__init_array_start = .); - KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) - KEEP (*(.init_array .ctors)) + /* __FLEXOS MARKER__: insert compartment init array sections here. */ PROVIDE_HIDDEN (__init_array_end = .); } _ectors = .; TLS_SECTIONS + /* -- compartment data sections begin -- */ + /* __FLEXOS MARKER__: insert compartment data sections here. */ + /* -- compartment data sections end -- */ + /* Read-write data (initialized) */ . = ALIGN(__PAGE_SIZE); _data = .; .data : { + *(.data_comp0) + *(.data_comp0.*) *(.data) *(.data.*) } @@ -95,16 +119,23 @@ SECTIONS __bss_start = .; .bss : { + *(.bss_comp0) + *(.bss_comp0.*) *(.bss) *(.bss.*) *(COMMON) . = ALIGN(__PAGE_SIZE); } + /* -- compartment bss sections begin -- */ + /* __FLEXOS MARKER__: insert compartment bss sections here. */ + /* -- compartment bss sections end -- */ + /* We keep the interrupt stack on a different section * given that it may have a big alignment and it would * change the entire binary layout */ + _intrstack_start = .; .intrstack : { *(.intrstack) diff --git a/plat/kvm/x86/setup.c b/plat/kvm/x86/setup.c index fbc5c0d212..1cb8448068 100644 --- a/plat/kvm/x86/setup.c +++ b/plat/kvm/x86/setup.c @@ -27,7 +27,7 @@ */ #include -#include +#include #include #include #include @@ -36,11 +36,16 @@ #include #include #include +#include #include #include #include #include +#if CONFIG_PT_API +#include +#endif /* CONFIG_PT_API */ + #define PLATFORM_MEM_START 0x100000 #define PLATFORM_MAX_MEM_ADDR 0x40000000 @@ -73,6 +78,37 @@ static inline void _mb_get_cmdline(struct multiboot_info *mi) cmdline[(sizeof(cmdline) - 1)] = '\0'; } +static inline void _mb_init_initrd2(struct multiboot_info *mi) +{ + multiboot_module_t *mod1; + + /* + * Search for initrd (called boot module according multiboot) + */ + if (mi->mods_count == 0) { + uk_pr_debug("No initrd present\n"); + return; + } + + /* + * NOTE: We are only taking the first boot module as initrd. + * Initrd arguments and further modules are ignored. + */ + UK_ASSERT(mi->mods_addr); + + mod1 = (multiboot_module_t *)((uintptr_t) mi->mods_addr); + UK_ASSERT(mod1->mod_end >= mod1->mod_start); + + if (mod1->mod_end == mod1->mod_start) { + uk_pr_debug("Ignoring empty initrd\n"); + return; + } + + _libkvmplat_cfg.initrd.start = (uintptr_t) mod1->mod_start; + _libkvmplat_cfg.initrd.end = (uintptr_t) mod1->mod_end; + _libkvmplat_cfg.initrd.len = (size_t) (mod1->mod_end - mod1->mod_start); +} + static inline void _mb_init_mem(struct multiboot_info *mi) { multiboot_memory_map_t *m; @@ -96,8 +132,10 @@ static inline void _mb_init_mem(struct multiboot_info *mi) * page tables for. */ max_addr = m->addr + m->len; +#ifndef CONFIG_DYNAMIC_PT if (max_addr > PLATFORM_MAX_MEM_ADDR) max_addr = PLATFORM_MAX_MEM_ADDR; +#endif /* CONFIG_DYNAMIC_PT */ UK_ASSERT((size_t) __END <= max_addr); /* @@ -106,13 +144,68 @@ static inline void _mb_init_mem(struct multiboot_info *mi) if ((max_addr - m->addr) < __STACK_SIZE) UK_CRASH("Not enough memory to allocate boot stack\n"); + _mb_init_initrd2(mi); + +#if CONFIG_DYNAMIC_PT + _libkvmplat_cfg.heap.start = HEAP_AREA_START; + _libkvmplat_cfg.heap.end = HEAP_AREA_START + + PAGE_LARGE_ALIGN_DOWN( + m->len + - STACK_AREA_SIZE + - KERNEL_AREA_SIZE + - BOOKKEEP_AREA_SIZE + - _libkvmplat_cfg.initrd.len); + _libkvmplat_cfg.heap.len = _libkvmplat_cfg.heap.end + - _libkvmplat_cfg.heap.start; +#if CONFIG_LIBPOSIX_MMAP + /* TODO: implement a way to dynamically resize the heap (e.g. brk()) */ + _libkvmplat_cfg.heap.len /= 2; + _libkvmplat_cfg.heap.end = _libkvmplat_cfg.heap.start + + _libkvmplat_cfg.heap.len; +#endif /* CONFIG_LIBPOSIX_MMAP */ + uk_pt_build(PAGE_ALIGN_UP(m->addr + KERNEL_AREA_SIZE + _libkvmplat_cfg.initrd.len), + m->len, KERNEL_AREA_START, KERNEL_AREA_START, KERNEL_AREA_SIZE); + + _libkvmplat_cfg.bstack.start = (uintptr_t) uk_stack_alloc(); + _libkvmplat_cfg.bstack.end = _libkvmplat_cfg.bstack.start + + __STACK_SIZE; + _libkvmplat_cfg.bstack.len = __STACK_SIZE; + +#else _libkvmplat_cfg.heap.start = ALIGN_UP((uintptr_t) __END, __PAGE_SIZE); +#if CONFIG_PT_API + uk_pt_init(_libkvmplat_cfg.heap.start + _libkvmplat_cfg.initrd.len, PLATFORM_MAX_MEM_ADDR, + m->addr + m->len - max_addr); + + _libkvmplat_cfg.heap.start += BOOKKEEP_AREA_SIZE; +#endif /* CONFIG_PT_API */ _libkvmplat_cfg.heap.end = (uintptr_t) max_addr - __STACK_SIZE; _libkvmplat_cfg.heap.len = _libkvmplat_cfg.heap.end - _libkvmplat_cfg.heap.start; _libkvmplat_cfg.bstack.start = _libkvmplat_cfg.heap.end; _libkvmplat_cfg.bstack.end = max_addr; _libkvmplat_cfg.bstack.len = __STACK_SIZE; +#endif /* CONFIG_DYNAMIC_PT */ + + /* TODO rewrite initrd code here nicely */ + uk_pr_info("Mapping initrd: %p - %p\n", (void*) _libkvmplat_cfg.initrd.start, + (void*) ((uintptr_t) _libkvmplat_cfg.initrd.start + + (uintptr_t) ALIGN_UP((_libkvmplat_cfg.initrd.len), __PAGE_SIZE))); + + if (uk_map_region(_libkvmplat_cfg.initrd.start, _libkvmplat_cfg.initrd.start, + ALIGN_UP((_libkvmplat_cfg.initrd.len), __PAGE_SIZE) >> PAGE_SHIFT, + PAGE_PROT_READ | PAGE_PROT_WRITE, 0)) + uk_pr_err("Couldn't map initrd\n"); + + for (offset = 0; offset < mi->mmap_length; + offset += m->size + sizeof(m->size)) { + m = (void *)(__uptr)(mi->mmap_addr + offset); + if (m->addr > PLATFORM_MEM_START + && m->type == MULTIBOOT_MEMORY_AVAILABLE) { + uk_pt_add_mem(m->addr, m->len); + } + } + } static inline void _mb_init_initrd(struct multiboot_info *mi) diff --git a/plat/kvm/x86/traps.c b/plat/kvm/x86/traps.c index 18b510c412..b7d9c3fa70 100644 --- a/plat/kvm/x86/traps.c +++ b/plat/kvm/x86/traps.c @@ -31,7 +31,8 @@ #include #include -static struct seg_desc32 cpu_gdt64[GDT_NUM_ENTRIES] __align64b; +__section(".intrstack") +struct seg_desc32 cpu_gdt64[GDT_NUM_ENTRIES] __align64b; /* * The monitor (ukvm) or bootloader + bootstrap (virtio) starts us up with a @@ -59,13 +60,22 @@ static void gdt_init(void) */ } -static struct tss64 cpu_tss; +/* FIXME FLEXOS: like the interrupt stack, the CPU task state segment + * has to be reachable as part of the interrupt handling process. If the + * TSS is protected with privileged PKU keys the CPU will silently (!) + * triple fault. This looks like a CPU design issue. We should investigate + * further on the consequences of leaving this accessible to all, and + * try to find a workaround. + */ +__section(".intrstack") +struct tss64 cpu_tss; __section(".intrstack") __align(STACK_SIZE) char cpu_intr_stack[STACK_SIZE]; /* IST1 */ __section(".intrstack") __align(STACK_SIZE) char cpu_trap_stack[STACK_SIZE]; /* IST2 */ -static char cpu_nmi_stack[4096]; /* IST3 */ +__section(".intrstack") +char cpu_nmi_stack[4096]; /* IST3 */ static void tss_init(void) { @@ -101,7 +111,8 @@ DECLARE_TRAP_EC(double_fault, "double fault") DECLARE_TRAP_EC(virt_error, "virtualization error") -static struct seg_gate_desc64 cpu_idt[IDT_NUM_ENTRIES] __align64b; +__section(".intrstack") +struct seg_gate_desc64 cpu_idt[IDT_NUM_ENTRIES] __align64b; static void idt_fillgate(unsigned int num, void *fun, unsigned int ist) { diff --git a/plat/linuxu/include/linuxu/setup.h b/plat/linuxu/include/linuxu/setup.h index a4f23f37eb..7eea2ee251 100644 --- a/plat/linuxu/include/linuxu/setup.h +++ b/plat/linuxu/include/linuxu/setup.h @@ -36,11 +36,14 @@ #include +struct liblinuxuplat_memregion { + void *base; + size_t len; +}; + struct liblinuxuplat_opts { - struct { - void *base; - size_t len; - } heap; + struct liblinuxuplat_memregion heap; + struct liblinuxuplat_memregion initrd; }; extern struct liblinuxuplat_opts _liblinuxuplat_opts; diff --git a/plat/linuxu/include/linuxu/syscall-arm_32.h b/plat/linuxu/include/linuxu/syscall-arm_32.h index 399e167e8e..b3b27cc17f 100644 --- a/plat/linuxu/include/linuxu/syscall-arm_32.h +++ b/plat/linuxu/include/linuxu/syscall-arm_32.h @@ -44,6 +44,7 @@ #define __SC_MUNMAP 91 #define __SC_EXIT 1 #define __SC_IOCTL 54 +#define __SC_FSTAT 108 #define __SC_RT_SIGPROCMASK 126 #define __SC_ARCH_PRCTL 172 #define __SC_RT_SIGACTION 174 diff --git a/plat/linuxu/include/linuxu/syscall-x86_64.h b/plat/linuxu/include/linuxu/syscall-x86_64.h index 3e43b3f17f..990eb109b3 100644 --- a/plat/linuxu/include/linuxu/syscall-x86_64.h +++ b/plat/linuxu/include/linuxu/syscall-x86_64.h @@ -40,6 +40,7 @@ #define __SC_WRITE 1 #define __SC_OPEN 2 #define __SC_CLOSE 3 +#define __SC_FSTAT 5 #define __SC_MMAP 9 #define __SC_MUNMAP 11 #define __SC_RT_SIGACTION 13 diff --git a/plat/linuxu/include/linuxu/syscall.h b/plat/linuxu/include/linuxu/syscall.h index 949c04253f..267bfa625c 100644 --- a/plat/linuxu/include/linuxu/syscall.h +++ b/plat/linuxu/include/linuxu/syscall.h @@ -46,6 +46,13 @@ #error "Unsupported architecture" #endif + +static inline int sys_open(const char *pathname, int flags, mode_t mode) +{ + return (int)syscall3(__SC_OPEN, (long)pathname, (long)flags, + (long)mode); +} + static inline ssize_t sys_read(int fd, const char *buf, size_t len) { return (ssize_t) syscall3(__SC_READ, @@ -62,6 +69,13 @@ static inline ssize_t sys_write(int fd, const char *buf, size_t len) (long) (len)); } +struct stat; + +static inline int sys_fstat(int fd, struct stat *statbuf) +{ + return (int)syscall2(__SC_FSTAT, (long)(fd), (long)(statbuf)); +} + static inline int sys_exit(int status) { return (int) syscall1(__SC_EXIT, @@ -79,6 +93,7 @@ static inline int sys_clock_gettime(k_clockid_t clk_id, struct k_timespec *tp) * Please note that on failure sys_mmap() is returning -errno */ #define MAP_SHARED (0x01) +#define MAP_PRIVATE (0x02) #define MAP_ANONYMOUS (0x20) #define PROT_NONE (0x0) #define PROT_READ (0x1) diff --git a/plat/linuxu/memory.c b/plat/linuxu/memory.c index 5b3df9b3ac..e37c7bffea 100644 --- a/plat/linuxu/memory.c +++ b/plat/linuxu/memory.c @@ -40,11 +40,19 @@ #include #include -#define MB2B (1024 * 1024) +#include +#include +#include +#include + +#define MB2B (1024 * 1024) static __u32 heap_size = CONFIG_LINUXU_DEFAULT_HEAPMB; UK_LIB_PARAM(heap_size, __u32); +static const char *initrd_file = NULL; +UK_LIB_PARAM_STR(initrd_file); + static int __linuxu_plat_heap_init(void) { void *pret; @@ -61,32 +69,86 @@ static int __linuxu_plat_heap_init(void) if (PTRISERR(pret)) { rc = PTR2ERR(pret); uk_pr_err("Failed to allocate memory for heap: %d\n", - rc); + rc); } else _liblinuxuplat_opts.heap.base = pret; } return rc; +} +static int __linuxu_plat_initrd_init(void) +{ + void *pret; + int rc = 0; + struct stat file_info; + + if (initrd_file == NULL) { + uk_pr_debug("No initrd present.\n"); + } else { + uk_pr_debug("Mapping in initrd file: %s\n", initrd_file); + int initrd_fd = sys_open(initrd_file, O_RDONLY, 0); + + if (initrd_fd < 0) { + uk_pr_err("Failed to open intrd file"); + return -1; + } + + /** + * Find initrd file size + */ + if (sys_fstat(initrd_fd, &file_info) < 0) { + uk_pr_err("sys_fstat failed for initrd file"); + close(initrd_fd); + return -1; + } + _liblinuxuplat_opts.initrd.len = file_info.st_size; + /** + * Allocate initrd memory + */ + if (_liblinuxuplat_opts.initrd.len > 0) { + pret = sys_mmap((void *)_liblinuxuplat_opts.heap.len, + _liblinuxuplat_opts.initrd.len, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE, initrd_fd, 0); + if (PTRISERR(pret)) { + rc = PTR2ERR(pret); + uk_pr_err("Failed to allocate memory for initrd: %d\n", + rc); + close(initrd_fd); + return -1; + } + _liblinuxuplat_opts.initrd.base = pret; + } else { + uk_pr_err("Empty initrd file given.\n"); + close(initrd_fd); + return -1; + } + } + return rc; } int ukplat_memregion_count(void) { static int have_heap = 0; + static int have_initrd = 0; int rc = 0; + /* + * NOTE: The heap size and initrd file can be changed by a + * library parameter. We assume that those ones are processed + * by the boot library shortly before memory regions are + * scanned. This is why we initialize the heap here. + */ if (!have_heap) { - /* - * NOTE: The heap size can be changed by a library parameter. - * We assume that those ones are processed by the boot library - * shortly before memory regions are scanned. This is why - * we initialize the heap here. - */ rc = __linuxu_plat_heap_init(); have_heap = (rc == 0) ? 1 : 0; } - - return (have_heap) ? 1 : 0; + if (!have_initrd) { + rc = __linuxu_plat_initrd_init(); + have_initrd = (rc == 0) ? 1 : 0; + } + return have_heap + have_initrd; } int ukplat_memregion_get(int i, struct ukplat_memregion_desc *m) @@ -96,20 +158,31 @@ int ukplat_memregion_get(int i, struct ukplat_memregion_desc *m) UK_ASSERT(m); if (i == 0 && _liblinuxuplat_opts.heap.base) { - m->base = _liblinuxuplat_opts.heap.base; - m->len = _liblinuxuplat_opts.heap.len; + m->base = _liblinuxuplat_opts.heap.base; + m->len = _liblinuxuplat_opts.heap.len; m->flags = UKPLAT_MEMRF_ALLOCATABLE; #if CONFIG_UKPLAT_MEMRNAME - m->name = "heap"; + m->name = "heap"; +#endif + ret = 0; + } else if ((i == 0 && !_liblinuxuplat_opts.heap.base + && _liblinuxuplat_opts.initrd.base) + || (i == 1 && _liblinuxuplat_opts.heap.base + && _liblinuxuplat_opts.initrd.base)) { + m->base = _liblinuxuplat_opts.initrd.base; + m->len = _liblinuxuplat_opts.initrd.len; + m->flags = UKPLAT_MEMRF_INITRD | UKPLAT_MEMRF_WRITABLE; +#if CONFIG_UKPLAT_MEMRNAME + m->name = "initrd"; #endif ret = 0; } else { /* invalid memory region index or no heap allocated */ - m->base = __NULL; - m->len = 0; + m->base = __NULL; + m->len = 0; m->flags = 0x0; #if CONFIG_UKPLAT_MEMRNAME - m->name = __NULL; + m->name = __NULL; #endif ret = -1; } diff --git a/plat/xen/Config.uk b/plat/xen/Config.uk index 2421e182d4..573026ba70 100644 --- a/plat/xen/Config.uk +++ b/plat/xen/Config.uk @@ -9,6 +9,7 @@ menuconfig PLAT_XEN select LIBUKTIME if !HAVE_LIBC && ARCH_X86_64 select LIBFDT if ARCH_ARM_32 select XEN_DBGEMERGENCY if ARCH_ARM_32 + select DYNAMIC_PT help Create a Unikraft image that runs as a Xen guest diff --git a/plat/xen/Makefile.uk b/plat/xen/Makefile.uk index 2a8cdbf630..1a7c4de3ba 100644 --- a/plat/xen/Makefile.uk +++ b/plat/xen/Makefile.uk @@ -46,6 +46,10 @@ LIBXENPLAT_SRCS-y += $(UK_PLAT_COMMON_BASE)/lcpu.c|common LIBXENPLAT_SRCS-y += $(UK_PLAT_COMMON_BASE)/memory.c|common +ifeq ($(CONFIG_PT_API),y) +LIBXENPLAT_MM_FLAGS-y += -I$(LIBXENPLAT_BASE)/include +LIBXENPLAT_SRCS-$(CONFIG_ARCH_X86_64) += $(UK_PLAT_COMMON_BASE)/mm.c|common +endif LIBXENPLAT_SRCS-$(CONFIG_ARCH_X86_64) += $(UK_PLAT_COMMON_BASE)/x86/trace.c|common LIBXENPLAT_SRCS-$(CONFIG_ARCH_X86_64) += $(UK_PLAT_COMMON_BASE)/x86/traps.c|common LIBXENPLAT_SRCS-$(CONFIG_ARCH_X86_64) += $(UK_PLAT_COMMON_BASE)/x86/cpu_features.c|common diff --git a/plat/xen/arm/setup.c b/plat/xen/arm/setup.c index 2df3b46c37..bcbc939eb4 100644 --- a/plat/xen/arm/setup.c +++ b/plat/xen/arm/setup.c @@ -25,7 +25,7 @@ /* Ported from Mini-OS */ #include -#include +#include #include #include #include diff --git a/plat/xen/drivers/9p/9pfront.c b/plat/xen/drivers/9p/9pfront.c index 6c9f232c00..d32906ed38 100644 --- a/plat/xen/drivers/9p/9pfront.c +++ b/plat/xen/drivers/9p/9pfront.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #if CONFIG_LIBUKSCHED @@ -168,7 +169,7 @@ static void p9front_handler(evtchn_port_t evtchn, uk_9pdev_xmit_notify(ring->dev->p9dev); #if CONFIG_LIBUKSCHED UK_WRITE_ONCE(ring->data_avail, true); - uk_waitq_wake_up(&ring->bh_wq); + flexos_gate(libuksched, uk_waitq_wake_up, &ring->bh_wq); #else p9front_recv(ring); #endif @@ -183,7 +184,7 @@ static void p9front_free_dev_ring(struct p9front_dev *p9fdev, int idx) if (ring->bh_thread_name) free(ring->bh_thread_name); - uk_thread_kill(ring->bh_thread); + flexos_gate(libuksched, uk_thread_kill, ring->bh_thread); unbind_evtchn(ring->evtchn); for (i = 0; i < (1 << p9fdev->ring_order); i++) gnttab_end_access(ring->intf->ref[i]); @@ -259,7 +260,7 @@ static int p9front_allocate_dev_ring(struct p9front_dev *p9fdev, int idx) #if CONFIG_LIBUKSCHED /* Allocate bottom-half thread. */ ring->data_avail = false; - uk_waitq_init(&ring->bh_wq); + flexos_gate(libuksched, uk_waitq_init, &ring->bh_wq); rc = asprintf(&ring->bh_thread_name, DRIVER_NAME"-recv-%s-%u", p9fdev->tag, idx); @@ -289,7 +290,7 @@ static int p9front_allocate_dev_ring(struct p9front_dev *p9fdev, int idx) out_free_thread: if (ring->bh_thread_name) free(ring->bh_thread_name); - uk_thread_kill(ring->bh_thread); + flexos_gate(libuksched, uk_thread_kill, ring->bh_thread); out_free_grants: for (i = 0; i < (1 << p9fdev->ring_order); i++) gnttab_end_access(ring->intf->ref[i]); diff --git a/plat/xen/events.c b/plat/xen/events.c index 3a1d155765..ff6d4ba1d6 100644 --- a/plat/xen/events.c +++ b/plat/xen/events.c @@ -335,3 +335,12 @@ int ukplat_irq_init(struct uk_alloc *a __unused) /* Nothing for now */ return 0; } + +/* For some reason, the inline version in the header file doesn't work */ +int notify_remote_via_evtchn2(evtchn_port_t port) +{ + evtchn_send_t op; + + op.port = port; + return HYPERVISOR_event_channel_op(EVTCHNOP_send, &op); +} diff --git a/plat/xen/include/xen-arm/mm.h b/plat/xen/include/xen-arm/mm.h index 659de84333..bbd31ddd5c 100644 --- a/plat/xen/include/xen-arm/mm.h +++ b/plat/xen/include/xen-arm/mm.h @@ -28,7 +28,7 @@ #define _ARCH_MM_H_ #include -#include +#include #include typedef uint64_t paddr_t; diff --git a/plat/xen/include/xen-x86/hypercall64.h b/plat/xen/include/xen-x86/hypercall64.h index 467d8fae89..f7d993dd5f 100644 --- a/plat/xen/include/xen-x86/hypercall64.h +++ b/plat/xen/include/xen-x86/hypercall64.h @@ -40,9 +40,7 @@ #include #include -#define PAGE_SHIFT __PAGE_SHIFT -#define PAGE_MASK __PAGE_MASK -#define PAGE_SIZE __PAGE_SIZE +#include #define STACK_SIZE_PAGE_ORDER __STACK_SIZE_PAGE_ORDER #define STACK_SIZE __STACK_SIZE diff --git a/plat/xen/include/xen-x86/mm.h b/plat/xen/include/xen-x86/mm.h index ffbedb09b1..2fa6731e83 100644 --- a/plat/xen/include/xen-x86/mm.h +++ b/plat/xen/include/xen-x86/mm.h @@ -25,7 +25,7 @@ #ifndef _ARCH_MM_H_ #define _ARCH_MM_H_ -#include +#include #ifndef __ASSEMBLY__ #include #if defined(__i386__) @@ -46,6 +46,8 @@ #include #endif +#include + /* * Physical address space usage: * @@ -108,22 +110,8 @@ typedef uint64_t pgentry_t; #elif defined(__x86_64__) -#define L2_PAGETABLE_SHIFT 21 -#define L3_PAGETABLE_SHIFT 30 -#define L4_PAGETABLE_SHIFT 39 - -#define L1_PAGETABLE_ENTRIES 512 -#define L2_PAGETABLE_ENTRIES 512 -#define L3_PAGETABLE_ENTRIES 512 -#define L4_PAGETABLE_ENTRIES 512 - -#define PAGETABLE_LEVELS 4 - /* These are page-table limitations. Current CPUs support only 40-bit phys. */ -#define PADDR_BITS 52 #define VADDR_BITS 48 -#define PADDR_MASK ((1UL << PADDR_BITS)-1) -#define VADDR_MASK ((1UL << VADDR_BITS)-1) #define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) #define L3_MASK ((1UL << L4_PAGETABLE_SHIFT) - 1) @@ -165,38 +153,18 @@ typedef unsigned long pgentry_t; (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) #endif -#define _PAGE_PRESENT CONST(0x001) -#define _PAGE_RW CONST(0x002) -#define _PAGE_USER CONST(0x004) -#define _PAGE_PWT CONST(0x008) -#define _PAGE_PCD CONST(0x010) -#define _PAGE_ACCESSED CONST(0x020) -#define _PAGE_DIRTY CONST(0x040) -#define _PAGE_PAT CONST(0x080) -#define _PAGE_PSE CONST(0x080) -#define _PAGE_GLOBAL CONST(0x100) - #if defined(__i386__) #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) #define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED) #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER) #define L3_PROT (_PAGE_PRESENT) -#elif defined(__x86_64__) -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) -#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_USER) -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) -#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) -#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) -#endif /* __i386__ || __x86_64__ */ +#endif /* flags for ioremap */ #define IO_PROT (L1_PROT) #define IO_PROT_NOCACHE (L1_PROT | _PAGE_PCD) #include -#define PAGE_SIZE __PAGE_SIZE -#define PAGE_SHIFT __PAGE_SHIFT -#define PAGE_MASK __PAGE_MASK #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT) #define PFN_DOWN(x) ((x) >> L1_PAGETABLE_SHIFT) @@ -244,7 +212,8 @@ static __inline__ paddr_t machine_to_phys(maddr_t machine) #define to_virt(x) ((void *)((unsigned long)(x)+VIRT_START)) #define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) -#define virt_to_mfn(_virt) (pfn_to_mfn(virt_to_pfn(_virt))) +// TODO(fane) +#define virt_to_mfn(virt) (PTE_REMOVE_FLAGS(uk_virt_to_pte(PAGE_ALIGN_DOWN((unsigned long) virt))) >> L1_PAGETABLE_SHIFT) #define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach))) #define virt_to_mach(_virt) (phys_to_machine(to_phys(_virt))) #define mfn_to_virt(_mfn) (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT)) diff --git a/plat/xen/memory.c b/plat/xen/memory.c index b390aab624..271f44fb52 100644 --- a/plat/xen/memory.c +++ b/plat/xen/memory.c @@ -32,7 +32,7 @@ */ #include -#include +#include #include #if (defined __X86_32__) || (defined __X86_64__) diff --git a/plat/xen/x86/mm.c b/plat/xen/x86/mm.c index e006ab7f01..9fb6ee8843 100644 --- a/plat/xen/x86/mm.c +++ b/plat/xen/x86/mm.c @@ -36,7 +36,7 @@ */ #include -#include +#include #include #include #include @@ -226,45 +226,6 @@ void _init_mem_build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn) *start_pfn = pt_pfn; } -/* - * Get the PTE for virtual address va if it exists. Otherwise NULL. - */ -static pgentry_t *get_pte(unsigned long va) -{ - unsigned long mfn; - pgentry_t *tab; - unsigned int offset; - - tab = pt_base; - -#if defined(__x86_64__) - offset = l4_table_offset(va); - if (!(tab[offset] & _PAGE_PRESENT)) - return NULL; - - mfn = pte_to_mfn(tab[offset]); - tab = mfn_to_virt(mfn); -#endif - offset = l3_table_offset(va); - if (!(tab[offset] & _PAGE_PRESENT)) - return NULL; - - mfn = pte_to_mfn(tab[offset]); - tab = mfn_to_virt(mfn); - offset = l2_table_offset(va); - if (!(tab[offset] & _PAGE_PRESENT)) - return NULL; - - if (tab[offset] & _PAGE_PSE) - return &tab[offset]; - - mfn = pte_to_mfn(tab[offset]); - tab = mfn_to_virt(mfn); - offset = l1_table_offset(va); - - return &tab[offset]; -} - /* * Return a valid PTE for a given virtual address. * If PTE does not exist, allocate page-table pages. @@ -433,18 +394,12 @@ unsigned long allocate_ondemand(unsigned long n, unsigned long align) unsigned long addr = demand_map_area_start + page_idx * PAGE_SIZE; - pgentry_t *pte = get_pte(addr); for (contig = 0; contig < n; contig++, addr += PAGE_SIZE) { - if (!(addr & L1_MASK)) - pte = get_pte(addr); - - if (pte) { - if (*pte & _PAGE_PRESENT) - break; + unsigned long pte = uk_virt_to_pte(addr); - pte++; - } + if (pte & _PAGE_PRESENT) + break; } if (contig == n) @@ -483,16 +438,29 @@ unsigned long allocate_ondemand(unsigned long n, unsigned long align) void *map_frames_ex(const unsigned long *mfns, unsigned long n, unsigned long stride, unsigned long incr, unsigned long alignment, - domid_t id, int *err, unsigned long prot, - struct uk_alloc *a) + domid_t id __unused, int *err, unsigned long prot, + struct uk_alloc *a __unused) { unsigned long va = allocate_ondemand(n, alignment); + size_t i; + int rc; if (!va) return NULL; + for (i = 0; i < n; i++) { + rc = uk_page_map(va + i * PAGE_SIZE, (mfns[i * stride] + i * incr) << PAGE_SHIFT, prot, 0); + if (rc) { + *err = rc; + return NULL; + } + } + + /* TODO(fane) */ + /* if (do_map_frames(va, mfns, n, stride, incr, id, err, prot, a)) return NULL; + */ return (void *) va; } @@ -604,7 +572,7 @@ void _init_mem_set_readonly(void *text, void *etext) page = tab[offset]; mfn = pte_to_mfn(page); tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); - offset = l2_table_offset(start_address); + offset = l2_table_offset(start_address); if ( !(tab[offset] & _PAGE_PSE) ) { page = tab[offset]; @@ -617,7 +585,7 @@ void _init_mem_set_readonly(void *text, void *etext) if ( start_address != (unsigned long)&_libxenplat_shared_info ) { #ifdef CONFIG_PARAVIRT - mmu_updates[count].ptr = + mmu_updates[count].ptr = ((pgentry_t)mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; mmu_updates[count].val = tab[offset] & ~_PAGE_RW; count++; @@ -631,7 +599,7 @@ void _init_mem_set_readonly(void *text, void *etext) start_address += page_size; #ifdef CONFIG_PARAVIRT - if ( count == L1_PAGETABLE_ENTRIES || + if ( count == L1_PAGETABLE_ENTRIES || start_address + page_size > end_address ) { rc = HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF); @@ -704,7 +672,7 @@ void _arch_init_p2m(struct uk_alloc *a) for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES) { if (!(pfn % (P2M_ENTRIES * P2M_ENTRIES))) { l2_list = uk_palloc(a, 1); - l3_list[L3_P2M_IDX(pfn)] = virt_to_mfn(l2_list); + l3_list[L3_P2M_IDX(pfn)] = pte_to_mfn(uk_virt_to_pte((unsigned long) l2_list)); l2_list_pages[L3_P2M_IDX(pfn)] = l2_list; } @@ -712,7 +680,7 @@ void _arch_init_p2m(struct uk_alloc *a) virt_to_mfn(phys_to_machine_mapping + pfn); } HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = - virt_to_mfn(l3_list); + pte_to_mfn(uk_virt_to_pte((unsigned long) l3_list)); HYPERVISOR_shared_info->arch.max_pfn = max_pfn; } diff --git a/plat/xen/x86/setup.c b/plat/xen/x86/setup.c index 486b467084..af55812fc5 100644 --- a/plat/xen/x86/setup.c +++ b/plat/xen/x86/setup.c @@ -74,6 +74,8 @@ #include #include #include +#include +#include #include #include @@ -138,29 +140,37 @@ static inline void _init_mem(void) uk_pr_info(" start_pfn: %lx\n", start_pfn); uk_pr_info(" max_pfn: %lx\n", max_pfn); - _init_mem_build_pagetable(&start_pfn, &max_pfn); - _init_mem_clear_bootstrap(); - _init_mem_set_readonly((void *)__TEXT, (void *)__ERODATA); + //_init_mem_build_pagetable(&start_pfn, &max_pfn); + //_init_mem_clear_bootstrap(); + //_init_mem_set_readonly((void *)__TEXT, (void *)__ERODATA); + /* TODO(fane) */ + /* TODO: what to do with initrd on Xen? */ + uk_pt_build(start_pfn << PAGE_SHIFT, (max_pfn - start_pfn) << PAGE_SHIFT, 0x2000, 0x2000, ukarch_read_pt_base() - 0x2000); /* Fill out mrd array */ /* heap */ - _libxenplat_mrd[0].base = to_virt(start_pfn << __PAGE_SHIFT); + _libxenplat_mrd[0].base = HEAP_AREA_START; _libxenplat_mrd[0].len = (size_t) to_virt(max_pfn << __PAGE_SHIFT) - - (size_t) to_virt(start_pfn << __PAGE_SHIFT); + - (size_t) to_virt(start_pfn << __PAGE_SHIFT) - BOOKKEEP_AREA_SIZE; _libxenplat_mrd[0].flags = (UKPLAT_MEMRF_ALLOCATABLE); +#if CONFIG_LIBPOSIX_MMAP + /* TODO(fane) */ + _libxenplat_mrd[0].len /= 2; +#endif /* CONFIG_LIBPOSIX_MMAP */ + #if CONFIG_UKPLAT_MEMRNAME _libxenplat_mrd[0].name = "heap"; #endif /* demand area */ - _libxenplat_mrd[1].base = (void *) VIRT_DEMAND_AREA; - _libxenplat_mrd[1].len = DEMAND_MAP_PAGES * PAGE_SIZE; + _libxenplat_mrd[1].base = (void *) MAPPINGS_AREA_START; + _libxenplat_mrd[1].len = MAPPINGS_AREA_SIZE; _libxenplat_mrd[1].flags = UKPLAT_MEMRF_RESERVED; #if CONFIG_UKPLAT_MEMRNAME _libxenplat_mrd[1].name = "demand"; #endif _init_mem_demand_area((unsigned long) _libxenplat_mrd[1].base, - DEMAND_MAP_PAGES); + MAPPINGS_AREA_SIZE >> PAGE_SHIFT); _libxenplat_mrd_num = 2; } diff --git a/plat/xen/xenbus/client.c b/plat/xen/xenbus/client.c index 20e671ec16..858bf52e07 100644 --- a/plat/xen/xenbus/client.c +++ b/plat/xen/xenbus/client.c @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -126,7 +127,7 @@ int xenbus_watch_notify_event(struct xenbus_watch *watch) ukarch_spin_lock(&watch->lock); watch->pending_events++; - uk_waitq_wake_up(&watch->wq); + flexos_gate(libuksched, uk_waitq_wake_up, &watch->wq); ukarch_spin_unlock(&watch->lock); return 0; diff --git a/plat/xen/xenbus/xs_comms.c b/plat/xen/xenbus/xs_comms.c index b81d16bd21..51615d49b4 100644 --- a/plat/xen/xenbus/xs_comms.c +++ b/plat/xen/xenbus/xs_comms.c @@ -39,6 +39,7 @@ */ #include +#include #include #include #include @@ -129,13 +130,13 @@ static void xs_request_pool_init(struct xs_request_pool *pool) pool->num_live = 0; pool->last_probed = -1; ukarch_spin_lock_init(&pool->lock); - uk_waitq_init(&pool->waitq); + flexos_gate(libuksched, uk_waitq_init, &pool->waitq); UK_TAILQ_INIT(&pool->queued); uk_bitmap_zero(pool->entries_bm, XS_REQ_POOL_SIZE); for (int i = 0; i < XS_REQ_POOL_SIZE; i++) { xs_req = &pool->entries[i]; xs_req->hdr.req_id = i; - uk_waitq_init(&xs_req->waitq); + flexos_gate(libuksched, uk_waitq_init, &xs_req->waitq); } } @@ -191,7 +192,7 @@ static void xs_request_put(struct xs_request *xs_req) xs_req_pool.num_live--; if (xs_req_pool.num_live == XS_REQ_POOL_SIZE - 1) - uk_waitq_wake_up(&xs_req_pool.waitq); + flexos_gate(libuksched, uk_waitq_wake_up, &xs_req_pool.waitq); ukarch_spin_unlock(&xs_req_pool.lock); } @@ -358,7 +359,7 @@ int xs_msg_reply(enum xsd_sockmsg_type msg_type, xenbus_transaction_t xbt, /* enqueue the request */ xs_request_enqueue(xs_req); /* wake xenstore thread to send it */ - uk_waitq_wake_up(&xsh.waitq); + flexos_gate(libuksched, uk_waitq_wake_up, &xsh.waitq); /* wait reply */ uk_waitq_wait_event(&xs_req->waitq, @@ -453,7 +454,7 @@ static void process_reply(struct xsd_sockmsg *hdr, char *payload) xs_req->reply.recvd = 1; /* notify waiting requester */ - uk_waitq_wake_up(&xs_req->waitq); + flexos_gate(libuksched, uk_waitq_wake_up, &xs_req->waitq); } /* Process an incoming xs watch event */ @@ -576,7 +577,7 @@ static void xs_evtchn_handler(evtchn_port_t port, struct __regs *regs __unused, void *ign __unused) { UK_ASSERT(xsh.evtchn == port); - uk_waitq_wake_up(&xsh.waitq); + flexos_gate(libuksched, uk_waitq_wake_up, &xsh.waitq); } int xs_comms_init(void) @@ -586,7 +587,7 @@ int xs_comms_init(void) xs_request_pool_init(&xs_req_pool); - uk_waitq_init(&xsh.waitq); + flexos_gate(libuksched, uk_waitq_init, &xsh.waitq); thread = uk_thread_create("xenstore", xs_thread_func, NULL); if (PTRISERR(thread)) @@ -615,6 +616,6 @@ void xs_comms_fini(void) xsh.buf = NULL; /* TODO stop thread, instead of killing it */ - uk_thread_kill(xsh.thread); + flexos_gate(libuksched, uk_thread_kill, xsh.thread); xsh.thread = NULL; } diff --git a/plat/xen/xenbus/xs_watch.c b/plat/xen/xenbus/xs_watch.c index b958b58622..b315b6b31f 100644 --- a/plat/xen/xenbus/xs_watch.c +++ b/plat/xen/xenbus/xs_watch.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "xs_watch.h" /* Watches list */ @@ -64,7 +65,7 @@ struct xs_watch *xs_watch_create(const char *path) ukarch_spin_lock_init(&xsw->base.lock); xsw->base.pending_events = 0; - uk_waitq_init(&xsw->base.wq); + flexos_gate(libuksched, uk_waitq_init, &xsw->base.wq); /* set path */ tmpstr = (char *) (xsw + 1); diff --git a/support/build/Makefile.rules b/support/build/Makefile.rules index 50e61fe133..ad86eadd25 100644 --- a/support/build/Makefile.rules +++ b/support/build/Makefile.rules @@ -408,7 +408,7 @@ $(4): $(2) | prepare $$($(call vprefix_lib,$(1),ASINCLUDES)) $$($(call vprefix_lib,$(1),ASINCLUDES-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES)) $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES-y)) \ $$($(call vprefix_glb,$(3),ARCHFLAGS)) $$($(call vprefix_glb,$(3),ARCHFLAGS-y)) \ - $$(ASFLAGS) $$(ASFLAGS-y) \ + $$(ASFLAGS) $$(ASFLAGS-y) $$(ASFLAGS_EXTRA) \ $$($(call vprefix_lib,$(1),ASFLAGS)) $$($(call vprefix_lib,$(1),ASFLAGS-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),FLAGS)) $$($(call vprefix_src,$(1),$(2),$(3),FLAGS-y)) \ $(5) \ @@ -435,7 +435,7 @@ $(4): $(2) | prepare $$($(call vprefix_lib,$(1),ASINCLUDES)) $$($(call vprefix_lib,$(1),ASINCLUDES-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES)) $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES-y)) \ $$($(call vprefix_glb,$(3),ARCHFLAGS)) $$($(call vprefix_glb,$(3),ARCHFLAGS-y)) \ - $$(ASFLAGS) $$(ASFLAGS-y) \ + $$(ASFLAGS) $$(ASFLAGS-y) $$(ASFLAGS_EXTRA) \ $$($(call vprefix_lib,$(1),ASFLAGS)) $$($(call vprefix_lib,$(1),ASFLAGS-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),FLAGS)) $$($(call vprefix_src,$(1),$(2),$(3),FLAGS-y)) \ $(5) \ @@ -460,7 +460,7 @@ $(4): $(2) | prepare $$($(call vprefix_lib,$(1),CINCLUDES)) $$($(call vprefix_lib,$(1),CINCLUDES-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES)) $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES-y)) \ $$($(call vprefix_glb,$(3),ARCHFLAGS)) $$($(call vprefix_glb,$(3),ARCHFLAGS-y)) \ - $$(CFLAGS) $$(CFLAGS-y) \ + $$(CFLAGS) $$(CFLAGS-y) $$(CFLAGS_EXTRA) \ $$($(call vprefix_lib,$(1),CFLAGS)) $$($(call vprefix_lib,$(1),CFLAGS-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),FLAGS)) $$($(call vprefix_src,$(1),$(2),$(3),FLAGS-y)) \ $(5) \ @@ -484,7 +484,7 @@ $(4): $(2) | prepare $$($(call vprefix_lib,$(1),CXXINCLUDES)) $$($(call vprefix_lib,$(1),CXXINCLUDES-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES)) $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES-y)) \ $$($(call vprefix_glb,$(3),ARCHFLAGS)) $$($(call vprefix_glb,$(3),ARCHFLAGS-y)) \ - $$(CXXFLAGS) $$(CXXFLAGS-y) \ + $$(CXXFLAGS) $$(CXXFLAGS-y) $$(CXXFLAGS_EXTRA) \ $$($(call vprefix_lib,$(1),CXXFLAGS)) $$($(call vprefix_lib,$(1),CXXFLAGS-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),FLAGS)) $$($(call vprefix_src,$(1),$(2),$(3),FLAGS-y)) \ $(5) \ @@ -516,7 +516,7 @@ $(4): $(2) | prepare $$($(call vprefix_lib,$(1),GOCINCLUDES)) $$($(call vprefix_lib,$(1),GOCINCLUDES-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES)) $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES-y)) \ $$($(call vprefix_glb,$(3),ARCHFLAGS)) $$($(call vprefix_glb,$(3),ARCHFLAGS-y)) \ - $$(GOCFLAGS) $$(GOCFLAGS-y) \ + $$(GOCFLAGS) $$(GOCFLAGS-y) $$(GOCFLAGS_EXTRA) \ $$($(call vprefix_lib,$(1),GOCFLAGS)) $$($(call vprefix_lib,$(1),GOCFLAGS-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),FLAGS)) $$($(call vprefix_src,$(1),$(2),$(3),FLAGS-y)) \ $(5) \ @@ -532,6 +532,24 @@ $(eval $(call vprefix_lib,$(1),OBJS-y) += $(4)) $(eval $(call vprefix_lib,$(1),CLEAN-y) += $(call build_clean,$(4)) $(call out2dep,$(4))) endef +# NOTE: We are not using most of the flags such as COMPFLAGS due to incompatibilities between rustc and GCC. +define buildrule_rs = +$(4): $(2) | preprocess + $(call build_cmd,RUSTC,$(1),$(4),\ + $(RUSTC) $$(RUSTCFLAGS) $$(RUSTCFLAGS-y) \ + $$($(call vprefix_lib,$(1),RUSTCFLAGS)) $$($(call vprefix_lib,$(1),RUSTCFLAGS-y)) \ + $$($(call vprefix_src,$(1),$(2),$(3),FLAGS)) $$($(call vprefix_src,$(1),$(2),$(3),FLAGS-y)) \ + --cfg '__LIBNAME__="$(1)"' --cfg '__BASENAME__="$(notdir $(2))"' $(if $(3),--cfg '__VARIANT__="$(3)"') \ + $(2) -o $(4) + ) + +UK_SRCS-y += $(2) +UK_DEPS-y += $(call out2dep,$(4)) +UK_OBJS-y += $(4) +$(eval $(call vprefix_lib,$(1),OBJS-y) += $(4)) +$(eval $(call vprefix_lib,$(1),CLEAN-y) += $(call build_clean,$(4)) $(call out2dep,$(4))) +endef + define add_lds_to_plat = $(eval $(call uc,$(2))_LD_SCRIPT-y += $(1)) endef @@ -547,8 +565,8 @@ $(4): $(2) | prepare $$(ASINCLUDES) $$(ASINCLUDES-y) \ $$($(call vprefix_lib,$(1),ASINCLUDES)) $$($(call vprefix_lib,$(1),ASINCLUDES-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES)) $$($(call vprefix_src,$(1),$(2),$(3),INCLUDES-y)) \ - $$(ARCHFLAGS) $$(ARCHFLAGS-y) \ - $$(ASFLAGS) $$(ASFLAGS-y) \ + $$(ARCHFLAGS) $$(ARCHFLAGS-y) $$(ARCHFLAGS_EXTRA) \ + $$(ASFLAGS) $$(ASFLAGS-y) $$(ASFLAGS_EXTRA) \ $$($(call vprefix_lib,$(1),ASFLAGS)) $$($(call vprefix_lib,$(1),ASFLAGS-y)) \ $$($(call vprefix_src,$(1),$(2),$(3),FLAGS)) $$($(call vprefix_src,$(1),$(2),$(3),FLAGS-y)) \ $(5) \ @@ -632,7 +650,7 @@ $(3): $(2) | prepare $(call build_cmd,M4,$(1),$(3),\ $(M4) $(M4INCLUDES) $(M4INCLUDES-y) \ $($(call vprefix_lib,$(1),M4INCLUDES)) $($(call vprefix_lib,$(1),M4INCLUDES-y)) \ - $(M4FLAGS) $(M4FLAGS-y) \ + $(M4FLAGS) $(M4FLAGS-y) $(M4FLAGS_EXTRA) \ $($(call vprefix_lib,$(1),M4FLAGS)) $($(call vprefix_lib,$(1),M4FLAGS-y)) \ $(4) \ $(2) > $(3)