diff --git a/examples/virtio/Makefile b/examples/virtio/Makefile index 0f4367a5b..f50181769 100644 --- a/examples/virtio/Makefile +++ b/examples/virtio/Makefile @@ -1,5 +1,5 @@ # -# Copyright 2024, UNSW +# Copyright 2025, UNSW # # SPDX-License-Identifier: BSD-2-Clause # @@ -33,7 +33,7 @@ else ifeq ($(strip $(MICROKIT_BOARD)), qemu_virt_aarch64) export GIC_DT_OVERLAY := gic_v2_overlay.dts QEMU := qemu-system-aarch64 - export BLK_NUM_PART = 1 + export BLK_NUM_PART = 2 export BLK_SIZE = 512 # 16MiB of disk space export BLK_MEM ?= 16777216 @@ -48,7 +48,7 @@ export PARTITION_ARG := --partition $(PARTITION) endif # All platforms use the same Linux and initrd images. -export LINUX := a3f4bf9e2eb24fa8fc0d3d8cd02e4d8097062e8b-linux +export LINUX := 372b2c56f55f2d461e4570d79332298e0f7ecce4-linux export INITRD := b6a276df6a0e39f76bc8950e975daa2888ad83df-rootfs.cpio.gz export BUILD_DIR:=$(abspath $(BUILD_DIR)) diff --git a/examples/virtio/README.md b/examples/virtio/README.md index fb7fb59cc..843489ea6 100644 --- a/examples/virtio/README.md +++ b/examples/virtio/README.md @@ -1,24 +1,29 @@ -# Using multiple virtIO devices with a Linux guest +# Using multiple virtIO devices with Linux guests -This example shows off the virtIO support that libvmm provides using the -[seL4 Device Driver Framework (sDDF)](https://github.com/au-ts/sddf) to talk to -the actual hardware. +This example shows off the multiple guests and virtIO support that +libvmm provides using the +[seL4 Device Driver Framework (sDDF)](https://github.com/au-ts/sddf) +to talk to the actual hardware. This example makes use of the following virtIO devices emulated by libvmm: * console * block * network +* socket (also known as 'vsock' and does not interact with any real hardware) -All of the virtIO devices are emulated with their corresponding native drivers -from sDDF. +Note that while we demo all virtIO devices together in one unified example system. +THeir implementation are independant of each others and thus can be deployed +independantly. -The example currently works on the following platforms: +All of the virtIO devices (except socket) are emulated with their corresponding +native drivers from sDDF. +The example currently works on the following platforms: * QEMU virt AArch64 * Avnet MaaXBoard @@ -28,7 +33,7 @@ Unlike the other examples, this one uses a metaprogram (`meta.py`) with the [sdfgen](https://github.com/au-ts/microkit_sdf_gen) tooling to generate the System Description File (SDF) and other necessary artefacts. Previously, SDFs were written manually, along with C headers for sDDF-specific configurations, -but this approach was tedious and error-prone. Wit this tooling, we can describe +but this approach was tedious and error-prone. With this tooling, we can describe the system at a higher level, automating the generation of system-specific data. ## Dependencies @@ -39,7 +44,7 @@ dependencies are needed: * sdfgen (for generating the System Description File with a metaprogram). ### Linux - + On apt based Linux distributions run the following commands: ```sh sudo apt-get install dosfstools @@ -104,14 +109,18 @@ system running the whole system. ### virtIO console -This example makes use of the virtIO console device so that the guest has access +This example makes use of the virtIO console device so that guests has access to the serial device on the platform. The virtIO console support in libvmm talks to the sDDF serial sub-system which contains a driver for input/output to the physical serial device. +Since only 1 guest can read from the console at any given time, by default console +reading is granted to the first guest (guest #0). You can switch the console input +by pressing `Ctrl` + `\`, then the guest's index (`0` or `1` in this example). + ### virtIO block -The guest also doubles as a client in the block system that talks virtIO to a native +The guests also doubles as clients in the block system that talks virtIO to a native block device. The requests from the guest are multiplexed through the additional block virtualiser component. @@ -139,17 +148,17 @@ starting block number that is a multiple of sDDF block's transfer size of 4096 b divided by the disk's logical size. Partitions that do not follow this restriction are unsupported. -By default on QEMU virt AArch64, we mount the first partition of the disk image, -on Avnet MaaXBoard we mount the third partition of the SD Card. You can change the partition mounted -by passing `PARTITION=n` when executing the Makefile. +By default on QEMU virt AArch64, we mount the first and second partitions of the disk image to +guest #0 and #1 respectively. On Avnet MaaXBoard we mount the second and third partitions +of the SD Card. You can change the partition mounted by passing `PARTITION=n` when executing the Makefile. ### virtIO net -In addition to virtIO console and block, the guest can also talk with the native +In addition to virtIO console and block, the guests can also talk with the native sDDF network driver via virtIO for in-guest networking. Packets in and out of -the guest are multiplexed through the network virtualiser components. +the guests are multiplexed through the network virtualiser components. -When the guest boots up, you must bring up the network device. First check the +When the guests boots up, you must bring up the network device. First check the name of the network device, it should be called `eth0`: ``` # ip link show @@ -172,7 +181,7 @@ To obtain an IP address, initiate DHCP with: udhcpc ``` -Now the guest network, you can try to ping Google DNS with: +Now the guest can talk on the network, you can try to ping Google DNS with: ``` # ping 8.8.8.8 PING 8.8.8.8 (8.8.8.8): 56 data bytes @@ -189,6 +198,72 @@ round-trip min/avg/max = 5.361/9.776/18.560 ms The guest has a DNS resolver so you can also ping a URL. +### virtIO socket +The virtIO socket device allow inter-guests communication without Ethernet +or IP protocols. In this example, the two guests act as a sender as receiver +with corresponding user-level programs packaged into the ramdisk. Guest #0 +and #1 are allocated CID 3 and 4 respectively. + +``` +--------- VM --------- --------- VM --------- +Userland: Userland: + vsock_recv vsock_send + ^ | + | v +Kernel: Kernel: + virtio MMIO vsock driver virtio MMIO vsock driver + ^ | + | v +--------- VMM -------- --------- VMM -------- +virtio vsock device /-- virtio vsock device + ^ / + | / +rx buffer <----------------/ rx buffer +``` + +In sender VM, `vsock_send` will send 32k worth of data through virtIO socket +to `vsock_recv` in the receiver VM. Then, the receiver VM will verify that the +data are all correct and both programs will quit. + +Each virtIO socket device have a small receive buffer that the peer can write to +to send data. The buffer size is advertised to both the sender and receiver via the +`buf_alloc` field of the packet header for the guest driver to split up packets +as necessary. + +Here is a demo of this process happening. Red (-) is guest #0 while +green (+) is guest #1: +```diff ++ Welcome to Buildroot ++ buildroot login: +- Welcome to Buildroot +- buildroot login: root +- login[262]: root login on 'console' +- # ./vsock_recv 3 +- VSOCK RECV|INFO: starting +- VSOCK RECV|INFO: creating socket to wait on CID 3 +VIRT_RX|LOG: switching to client 1 + ++ Welcome to Buildroot ++ buildroot login: root ++ login[260]: root login on 'console' ++ # ./vsock_send 3 ++ VSOCK SEND|INFO: starting ++ VSOCK SEND|INFO: creating socket to send on CID 3 +- VSOCK RECV|INFO: peer connected ++ VSOCK SEND|INFO: connected, preparing payload ++ VSOCK SEND|INFO: now sending 32768 bytes! +- VSOCK RECV|INFO: Accumulatively received 4050 bytes +- VSOCK RECV|INFO: Accumulatively received 8100 bytes +- VSOCK RECV|INFO: Accumulatively received 12150 bytes +- VSOCK RECV|INFO: Accumulatively received 16200 bytes +- VSOCK RECV|INFO: Accumulatively received 20250 bytes +- VSOCK RECV|INFO: Accumulatively received 24300 bytes +- VSOCK RECV|INFO: Accumulatively received 28350 bytes +- VSOCK RECV|INFO: Accumulatively received 32400 bytes +- VSOCK RECV|INFO: Total bytes received 32768, verifying data... +- VSOCK RECV|INFO: All is well in the universe +``` + ### QEMU set up When running on QEMU, read and writes go to an emulated ramdisk instead of to your diff --git a/examples/virtio/client_vm/buildroot_config b/examples/virtio/client_vm/buildroot_config index 0fafac540..4c582ee6b 100644 --- a/examples/virtio/client_vm/buildroot_config +++ b/examples/virtio/client_vm/buildroot_config @@ -1080,7 +1080,7 @@ BR2_PACKAGE_QT6_ARCH_SUPPORTS=y # BR2_PACKAGE_BRLTTY is not set # -# cc-tool needs a toolchain w/ C++, threads, wchar, gcc >= 4.9 +# cc-tool needs a toolchain w/ C++, threads, wchar, gcc >= 4.9 # # BR2_PACKAGE_CDRKIT is not set BR2_PACKAGE_CPUBURN_ARM_ARCH_SUPPORTS=y @@ -3270,35 +3270,35 @@ BR2_PACKAGE_MONGREL2_LIBC_SUPPORTS=y # # -# Please note: +# Please note: # # -# - Buildroot does *not* generate binary packages, +# - Buildroot does *not* generate binary packages, # # -# - Buildroot does *not* install any package database. +# - Buildroot does *not* install any package database. # # -# * +# * # # -# It is up to you to provide those by yourself if you +# It is up to you to provide those by yourself if you # # -# want to use any of those package managers. +# want to use any of those package managers. # # -# * +# * # # -# See the manual: +# See the manual: # # diff --git a/examples/virtio/client_vm/linux.dts b/examples/virtio/client_vm/linux.dts index 664a115ad..ad518770a 100644 --- a/examples/virtio/client_vm/linux.dts +++ b/examples/virtio/client_vm/linux.dts @@ -66,4 +66,10 @@ reg = <0x00 0x160000 0x00 0x200>; interrupts = <0x00 44 0x04>; }; + + virtio-socket@170000 { + compatible = "virtio,mmio"; + reg = <0x00 0x170000 0x00 0x200>; + interrupts = <0x00 45 0x04>; + }; }; diff --git a/examples/virtio/client_vm/linux_config b/examples/virtio/client_vm/linux_config index af48c31ed..fe5c31c2f 100644 --- a/examples/virtio/client_vm/linux_config +++ b/examples/virtio/client_vm/linux_config @@ -1090,7 +1090,11 @@ CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_DNS_RESOLVER=y # CONFIG_BATMAN_ADV is not set # CONFIG_OPENVSWITCH is not set -# CONFIG_VSOCKETS is not set +CONFIG_VSOCKETS=y +CONFIG_VSOCKETS_DIAG=y +CONFIG_VSOCKETS_LOOPBACK=y +CONFIG_VIRTIO_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y # CONFIG_NETLINK_DIAG is not set # CONFIG_MPLS is not set # CONFIG_NET_NSH is not set @@ -2299,6 +2303,7 @@ CONFIG_VIRTIO_MMIO=y # CONFIG_VDPA is not set CONFIG_VHOST_MENU=y # CONFIG_VHOST_NET is not set +# CONFIG_VHOST_VSOCK is not set # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set # diff --git a/examples/virtio/client_vm_userlevel/vsock_recv.c b/examples/virtio/client_vm_userlevel/vsock_recv.c new file mode 100644 index 000000000..22669c6e9 --- /dev/null +++ b/examples/virtio/client_vm_userlevel/vsock_recv.c @@ -0,0 +1,77 @@ +/* + * Copyright 2025, UNSW + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUMS_TO_RECV (16384) +/* 32k buffers of 2-bytes unsigneds. */ +uint16_t nums[NUMS_TO_RECV]; + +_Static_assert(NUMS_TO_RECV <= UINT16_MAX); + +int main(int argc, char *argv[]) +{ + printf("VSOCK RECV|INFO: starting\n"); + + if (argc != 2) { + printf("./vsock_recv.elf \n"); + return 1; + } + uint32_t cid = atoi(argv[1]); + printf("VSOCK RECV|INFO: creating socket to wait on CID %d\n", cid); + + int s = socket(AF_VSOCK, SOCK_STREAM, 0); + + struct sockaddr_vm addr; + memset(&addr, 0, sizeof(struct sockaddr_vm)); + addr.svm_family = AF_VSOCK; + addr.svm_port = 9999; + addr.svm_cid = cid; + + int r = bind(s, &addr, sizeof(struct sockaddr_vm)); + if (r) { + printf("VSOCK RECV|ERROR: bind failed with '%s'\n", strerror(errno)); + return 1; + } + + r = listen(s, 0); + if (r) { + printf("VSOCK RECV|ERROR: listen failed with '%s'\n", strerror(errno)); + return 1; + } + + struct sockaddr_vm peer_addr; + socklen_t peer_addr_size = sizeof(struct sockaddr_vm); + int peer_fd = accept(s, &peer_addr, &peer_addr_size); + + printf("VSOCK RECV|INFO: peer connected\n"); + + size_t bytes_recved = 0; + char *buf = (char *) nums; + while ((bytes_recved += recv(peer_fd, &buf[bytes_recved], sizeof(nums), 0)) != sizeof(nums)) { + printf("VSOCK RECV|INFO: Accumulatively received %lu bytes\n", bytes_recved); + } + + printf("VSOCK RECV|INFO: Total bytes received %lu, verifying data...\n", sizeof(nums)); + + for (uint16_t i = 0; i < NUMS_TO_RECV; i++) { + if (nums[i] != i) { + printf("VSOCK RECV|ERROR: at sequence %u, but got %u!\n", i, nums[i]); + return 1; + } + } + + printf("VSOCK RECV|INFO: All is well in the universe\n"); + + return 0; +} diff --git a/examples/virtio/client_vm_userlevel/vsock_send.c b/examples/virtio/client_vm_userlevel/vsock_send.c new file mode 100644 index 000000000..a9daf5979 --- /dev/null +++ b/examples/virtio/client_vm_userlevel/vsock_send.c @@ -0,0 +1,65 @@ +/* + * Copyright 2025, UNSW + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUMS_TO_SEND (16384) +/* 32k buffers of 2-bytes unsigneds. */ +uint16_t nums[NUMS_TO_SEND]; + +_Static_assert(NUMS_TO_SEND <= UINT16_MAX); + +int main(int argc, char *argv[]) +{ + printf("VSOCK SEND|INFO: starting\n"); + + if (argc != 2) { + printf("./vsock_send.elf \n"); + return 1; + } + uint32_t cid = atoi(argv[1]); + printf("VSOCK SEND|INFO: creating socket to send on CID %d\n", cid); + + int s = socket(AF_VSOCK, SOCK_STREAM, 0); + + struct sockaddr_vm addr; + memset(&addr, 0, sizeof(struct sockaddr_vm)); + addr.svm_family = AF_VSOCK; + addr.svm_port = 9999; + addr.svm_cid = cid; + + int r; + + r = connect(s, &addr, sizeof(struct sockaddr_vm)); + if (r < 0) { + printf("VSOCK SEND|ERROR: connect failed %d with '%s'\n", r, strerror(errno)); + return 1; + } + + printf("VSOCK SEND|INFO: connected, preparing payload\n"); + for (uint16_t i = 0; i < NUMS_TO_SEND; i++) { + nums[i] = i; + } + printf("VSOCK SEND|INFO: now sending %lu bytes!\n", sizeof(nums)); + + r = send(s, nums, sizeof(nums), 0); + if (r < 0) { + printf("VSOCK SEND|ERROR: send failed with '%s'\n", strerror(errno)); + return 1; + } + + close(s); + + return 0; +} diff --git a/examples/virtio/client_vmm.c b/examples/virtio/client_vmm.c index e42254b87..188b7ebad 100644 --- a/examples/virtio/client_vmm.c +++ b/examples/virtio/client_vmm.c @@ -53,6 +53,9 @@ net_queue_handle_t net_rx_queue; net_queue_handle_t net_tx_queue; static struct virtio_net_device virtio_net; +/* Virtio Socket */ +static struct virtio_vsock_device virtio_vsock; + void init(void) { assert(serial_config_check_magic(&serial_config)); @@ -96,46 +99,34 @@ void init(void) return; } - /* Find the details of VirtIO console, net and block devices from sdfgen */ - int console_vdev_idx = -1; - int blk_vdev_idx = -1; - int net_vdev_idx = -1; - assert(vmm_config.num_virtio_mmio_devices == 3); - for (int i = 0; i < vmm_config.num_virtio_mmio_devices; i += 1) { - switch (vmm_config.virtio_mmio_devices[i].type) { - case VIRTIO_DEVICE_ID_CONSOLE: - console_vdev_idx = i; - break; - case VIRTIO_DEVICE_ID_BLOCK: - blk_vdev_idx = i; - break; - case VIRTIO_DEVICE_ID_NET: - net_vdev_idx = i; - break; - } - } - assert(console_vdev_idx != -1); - assert(blk_vdev_idx != -1); - assert(net_vdev_idx != -1); - + /* Initialise sDDF serial queues */ serial_queue_init(&serial_rx_queue, serial_config.rx.queue.vaddr, serial_config.rx.data.size, serial_config.rx.data.vaddr); serial_queue_init(&serial_tx_queue, serial_config.tx.queue.vaddr, serial_config.tx.data.size, serial_config.tx.data.vaddr); + /* Fetch VirtIO console device details from sdfgen */ + assert(vmm_config.num_virtio_mmio_console_devices == 1); + vmm_config_virtio_console_device_t *mmio_console_dev = &vmm_config.virtio_mmio_console_devices[0]; + /* Initialise virtIO console device */ success = virtio_mmio_console_init(&virtio_console, - vmm_config.virtio_mmio_devices[console_vdev_idx].base, - vmm_config.virtio_mmio_devices[console_vdev_idx].size, - vmm_config.virtio_mmio_devices[console_vdev_idx].irq, + mmio_console_dev->regs.base, + mmio_console_dev->regs.size, + mmio_console_dev->regs.irq, &serial_rx_queue, &serial_tx_queue, serial_config.tx.id); + assert(success); + + /* Fetch VirtIO block device details from sdfgen */ + assert(vmm_config.num_virtio_mmio_block_devices == 1); + vmm_config_virtio_block_device_t *mmio_block_dev = &vmm_config.virtio_mmio_block_devices[0]; /* Initialise virtIO block device */ success = virtio_mmio_blk_init(&virtio_blk, - vmm_config.virtio_mmio_devices[blk_vdev_idx].base, - vmm_config.virtio_mmio_devices[blk_vdev_idx].size, - vmm_config.virtio_mmio_devices[blk_vdev_idx].irq, + mmio_block_dev->regs.base, + mmio_block_dev->regs.size, + mmio_block_dev->regs.irq, (uintptr_t)blk_config.data.vaddr, blk_config.data.size, storage_info, @@ -143,6 +134,10 @@ void init(void) blk_config.virt.id); assert(success); + /* Fetch VirtIO net device details from sdfgen */ + assert(vmm_config.num_virtio_mmio_net_devices == 1); + vmm_config_virtio_net_device_t *mmio_net_dev = &vmm_config.virtio_mmio_net_devices[0]; + /* Initialise virtIO net device */ net_queue_init(&net_rx_queue, net_config.rx.free_queue.vaddr, net_config.rx.active_queue.vaddr, net_config.rx.num_buffers); @@ -150,9 +145,9 @@ void init(void) net_config.tx.num_buffers); net_buffers_init(&net_tx_queue, 0); success = virtio_mmio_net_init(&virtio_net, - vmm_config.virtio_mmio_devices[net_vdev_idx].base, - vmm_config.virtio_mmio_devices[net_vdev_idx].size, - vmm_config.virtio_mmio_devices[net_vdev_idx].irq, + mmio_net_dev->regs.base, + mmio_net_dev->regs.size, + mmio_net_dev->regs.irq, &net_rx_queue, &net_tx_queue, (uintptr_t)net_config.rx_data.vaddr, (uintptr_t)net_config.tx_data.vaddr, net_config.rx.id, net_config.tx.id, @@ -160,6 +155,22 @@ void init(void) ); assert(success); + /* Fetch VirtIO socket device details from sdfgen */ + assert(vmm_config.num_virtio_mmio_socket_devices == 1); + vmm_config_virtio_socket_device_t *mmio_socket_dev = &vmm_config.virtio_mmio_socket_devices[0]; + + /* Initialise virtIO socket device */ + success = virtio_mmio_vsock_init(&virtio_vsock, + mmio_socket_dev->regs.base, + mmio_socket_dev->regs.size, + mmio_socket_dev->regs.irq, + mmio_socket_dev->cid, + mmio_socket_dev->shared_buffer_size, + mmio_socket_dev->buffer_our, + mmio_socket_dev->buffer_peer, + mmio_socket_dev->peer_ch); + assert(success); + /* Finally start the guest */ guest_start(GUEST_VCPU_ID, kernel_pc, vmm_config.dtb, vmm_config.initrd); LOG_VMM("%s is ready\n", microkit_name); @@ -175,6 +186,8 @@ void notified(microkit_channel ch) virtio_blk_handle_resp(&virtio_blk); } else if (ch == net_config.rx.id) { virtio_net_handle_rx(&virtio_net); + } else if (ch == vmm_config.virtio_mmio_socket_devices[0].peer_ch) { + virtio_vsock_handle_rx(&virtio_vsock); } else { LOG_VMM_ERR("Unexpected channel, ch: 0x%lx\n", ch); } diff --git a/examples/virtio/meta.py b/examples/virtio/meta.py index a49d8edd4..1283850c7 100644 --- a/examples/virtio/meta.py +++ b/examples/virtio/meta.py @@ -24,9 +24,10 @@ class Board: timer: str blk: str guest_blk: str + partition: int net: str guest_net: str - partition: int + guest_vsock: str BOARDS: List[Board] = [ @@ -39,9 +40,10 @@ class Board: timer=None, blk="virtio_mmio@a003e00", guest_blk="virtio-blk@150000", + partition=0, net="virtio_mmio@a003c00", guest_net="virtio-net@160000", - partition=0 + guest_vsock="virtio-socket@170000", ), Board( name="maaxboard", @@ -52,20 +54,27 @@ class Board: timer="soc@0/bus@30000000/timer@302d0000", blk="soc@0/bus@30800000/mmc@30b40000", guest_blk="virtio-blk@150000", + partition=0, net="soc@0/bus@30800000/ethernet@30be0000", guest_net="virtio-net@160000", - partition=0 + guest_vsock="virtio-socket@170000", ), ] def generate(sdf_file: str, output_dir: str, dtb: DeviceTree, client_dtb: DeviceTree): - # Client VM - vmm_client0 = ProtectionDomain("CLIENT_VMM", "client_vmm.elf", priority=100) - vm_client0 = VirtualMachine("client_linux", [VirtualMachine.Vcpu(id=0)]) + # Client VM 0 + vmm_client0 = ProtectionDomain("CLIENT_VMM0", "client_vmm0.elf", priority=100) + vm_client0 = VirtualMachine("client_linux0", [VirtualMachine.Vcpu(id=0)]) client0 = Vmm(sdf, vmm_client0, vm_client0, client_dtb) sdf.add_pd(vmm_client0) + # Client VM 1 + vmm_client1 = ProtectionDomain("CLIENT_VMM1", "client_vmm1.elf", priority=99) + vm_client1 = VirtualMachine("client_linux1", [VirtualMachine.Vcpu(id=0)]) + client1 = Vmm(sdf, vmm_client1, vm_client1, client_dtb) + sdf.add_pd(vmm_client1) + # Serial subsystem serial_driver = ProtectionDomain("serial_driver", "serial_driver.elf", priority=200) serial_virt_tx = ProtectionDomain("serial_virt_tx", "serial_virt_tx.elf", priority=199) @@ -80,6 +89,7 @@ def generate(sdf_file: str, output_dir: str, dtb: DeviceTree, client_dtb: Device serial_system = Sddf.Serial(sdf, serial_node, serial_driver, serial_virt_tx, virt_rx=serial_virt_rx) client0.add_virtio_mmio_console(guest_serial_node, serial_system) + client1.add_virtio_mmio_console(guest_serial_node, serial_system) pds = [ serial_driver, @@ -101,18 +111,22 @@ def generate(sdf_file: str, output_dir: str, dtb: DeviceTree, client_dtb: Device net_virt_rx = ProtectionDomain("net_virt_rx", "network_virt_rx.elf", priority=99) net_system = Sddf.Net(sdf, net_node, eth_driver, net_virt_tx, net_virt_rx) client0_net_copier = ProtectionDomain( - "client0_net_copier", "network_copy.elf", priority=98, budget=20000) + "client0_net_copier", "network_copy_vmm0.elf", priority=98, budget=20000) + client1_net_copier = ProtectionDomain( + "client1_net_copier", "network_copy_vmm1.elf", priority=98, budget=20000) pds = [ eth_driver, net_virt_rx, net_virt_tx, client0_net_copier, + client1_net_copier, ] for pd in pds: sdf.add_pd(pd) client0.add_virtio_mmio_net(guest_net_node, net_system, client0_net_copier) + client1.add_virtio_mmio_net(guest_net_node, net_system, client1_net_copier) # Block subsystem blk_driver = ProtectionDomain("blk_driver", "blk_driver.elf", priority=200) @@ -126,6 +140,7 @@ def generate(sdf_file: str, output_dir: str, dtb: DeviceTree, client_dtb: Device blk_system = Sddf.Blk(sdf, blk_node, blk_driver, blk_virt) partition = int(args.partition) if args.partition else board.partition client0.add_virtio_mmio_blk(guest_blk_node, blk_system, partition=partition) + client1.add_virtio_mmio_blk(guest_blk_node, blk_system, partition=partition + 1) pds = [ blk_driver, blk_virt @@ -147,6 +162,15 @@ def generate(sdf_file: str, output_dir: str, dtb: DeviceTree, client_dtb: Device assert timer_system.connect() assert timer_system.serialise_config(output_dir) + # virtIO socket + guest_vsock_node = client_dtb.node(board.guest_vsock) + assert guest_vsock_node is not None + cid_a = 3 + cid_b = 4 + vsock_connection = Vmm.VmmVirtioSocketConnection( + sdf, guest_vsock_node, client0, cid_a, client1, cid_b) + + assert vsock_connection.connect() assert serial_system.connect() assert serial_system.serialise_config(output_dir) assert blk_system.connect() @@ -155,6 +179,8 @@ def generate(sdf_file: str, output_dir: str, dtb: DeviceTree, client_dtb: Device assert net_system.serialise_config(output_dir) assert client0.connect() assert client0.serialise_config(output_dir) + assert client1.connect() + assert client1.serialise_config(output_dir) with open(f"{output_dir}/{sdf_file}", "w+") as f: f.write(sdf.render()) diff --git a/examples/virtio/virtio.mk b/examples/virtio/virtio.mk index 6bb28f9e8..ee7313954 100644 --- a/examples/virtio/virtio.mk +++ b/examples/virtio/virtio.mk @@ -1,5 +1,5 @@ # -# Copyright 2024, UNSW +# Copyright 2025, UNSW # # SPDX-License-Identifier: BSD-2-Clause # @@ -32,6 +32,7 @@ CLIENT_DTB := client_vm/vm.dtb METAPROGRAM := $(VIRTIO_EXAMPLE)/meta.py CLIENT_VM_USERLEVEL_INIT := blk_client_init +CLIENT_VM_USERLEVEL_HOME := vsock_send vsock_recv vpath %.c $(SDDF) $(LIBVMM) $(VIRTIO_EXAMPLE) $(NETWORK_COMPONENTS) @@ -75,8 +76,8 @@ include $(LIBVMM)/vmm.mk include $(LIBVMM_TOOLS)/linux/uio/uio.mk include $(LIBVMM_TOOLS)/linux/blk/blk_init.mk -IMAGES := client_vmm.elf timer_driver.elf blk_driver.elf blk_virt.elf serial_driver.elf serial_virt_tx.elf serial_virt_rx.elf \ - network_virt_rx.elf network_virt_tx.elf eth_driver.elf network_copy.elf +IMAGES := client_vmm0.elf client_vmm1.elf timer_driver.elf blk_driver.elf blk_virt.elf serial_driver.elf serial_virt_tx.elf serial_virt_rx.elf \ + network_virt_rx.elf network_virt_tx.elf eth_driver.elf network_copy_vmm0.elf network_copy_vmm1.elf CHECK_FLAGS_BOARD_MD5:=.board_cflags-$(shell echo -- $(CFLAGS) $(BOARD) $(MICROKIT_CONFIG) | shasum | sed 's/ *-//') @@ -88,6 +89,11 @@ all: loader.img -include vmm.d +network_copy_vmm0.elf: network_copy.elf + cp $< $@ +network_copy_vmm1.elf: network_copy.elf + cp $< $@ + $(IMAGES): libsddf_util_debug.a libvmm.a $(DTB_FILE): $(DTS_FILE) @@ -100,22 +106,31 @@ ifeq ($(MICROKIT_BOARD), maaxboard) $(OBJCOPY) --update-section .device_resources=timer_driver_device_resources.data timer_driver.elf $(OBJCOPY) --update-section .timer_client_config=timer_client_blk_driver.data blk_driver.elf endif + $(OBJCOPY) --update-section .device_resources=blk_driver_device_resources.data blk_driver.elf $(OBJCOPY) --update-section .blk_driver_config=blk_driver.data blk_driver.elf $(OBJCOPY) --update-section .blk_virt_config=blk_virt.data blk_virt.elf - $(OBJCOPY) --update-section .blk_client_config=blk_client_CLIENT_VMM.data client_vmm.elf + $(OBJCOPY) --update-section .blk_client_config=blk_client_CLIENT_VMM0.data client_vmm0.elf + $(OBJCOPY) --update-section .blk_client_config=blk_client_CLIENT_VMM1.data client_vmm1.elf + $(OBJCOPY) --update-section .device_resources=serial_driver_device_resources.data serial_driver.elf $(OBJCOPY) --update-section .serial_driver_config=serial_driver_config.data serial_driver.elf $(OBJCOPY) --update-section .serial_virt_rx_config=serial_virt_rx.data serial_virt_rx.elf $(OBJCOPY) --update-section .serial_virt_tx_config=serial_virt_tx.data serial_virt_tx.elf - $(OBJCOPY) --update-section .serial_client_config=serial_client_CLIENT_VMM.data client_vmm.elf - $(OBJCOPY) --update-section .vmm_config=vmm_CLIENT_VMM.data client_vmm.elf + $(OBJCOPY) --update-section .serial_client_config=serial_client_CLIENT_VMM0.data client_vmm0.elf + $(OBJCOPY) --update-section .serial_client_config=serial_client_CLIENT_VMM1.data client_vmm1.elf + $(OBJCOPY) --update-section .device_resources=eth_driver_device_resources.data eth_driver.elf $(OBJCOPY) --update-section .net_driver_config=net_driver.data eth_driver.elf $(OBJCOPY) --update-section .net_virt_rx_config=net_virt_rx.data network_virt_rx.elf $(OBJCOPY) --update-section .net_virt_tx_config=net_virt_tx.data network_virt_tx.elf - $(OBJCOPY) --update-section .net_copy_config=net_copy_client0_net_copier.data network_copy.elf network_copy.elf - $(OBJCOPY) --update-section .net_client_config=net_client_CLIENT_VMM.data client_vmm.elf + $(OBJCOPY) --update-section .net_copy_config=net_copy_client0_net_copier.data network_copy_vmm0.elf + $(OBJCOPY) --update-section .net_copy_config=net_copy_client1_net_copier.data network_copy_vmm1.elf + $(OBJCOPY) --update-section .net_client_config=net_client_CLIENT_VMM0.data client_vmm0.elf + $(OBJCOPY) --update-section .net_client_config=net_client_CLIENT_VMM1.data client_vmm1.elf + + $(OBJCOPY) --update-section .vmm_config=vmm_CLIENT_VMM0.data client_vmm0.elf + $(OBJCOPY) --update-section .vmm_config=vmm_CLIENT_VMM1.data client_vmm1.elf $(IMAGE_FILE) $(REPORT_FILE): $(IMAGES) $(SYSTEM_FILE) $(MICROKIT_TOOL) $(SYSTEM_FILE) --search-path $(BUILD_DIR) --board $(MICROKIT_BOARD) \ @@ -137,11 +152,18 @@ ${INITRD}: tar xf $@.tar.gz -C initrd_download_dir cp initrd_download_dir/${INITRD}/rootfs.cpio.gz ${INITRD} +vsock_send: $(VIRTIO_EXAMPLE)/client_vm_userlevel/vsock_send.c + zig cc $^ -target aarch64-linux-musl -o $@ + +vsock_recv: $(VIRTIO_EXAMPLE)/client_vm_userlevel/vsock_recv.c + zig cc $^ -target aarch64-linux-musl -o $@ + client_vm/rootfs.cpio.gz: ${INITRD} \ - $(CLIENT_VM_USERLEVEL_INIT) |client_vm + $(CLIENT_VM_USERLEVEL_INIT) $(CLIENT_VM_USERLEVEL_HOME) |client_vm $(LIBVMM)/tools/packrootfs ${INITRD} \ client_vm/rootfs_staging -o $@ \ - --startup $(CLIENT_VM_USERLEVEL_INIT) + --startup $(CLIENT_VM_USERLEVEL_INIT) \ + --home $(CLIENT_VM_USERLEVEL_HOME) blk_storage: $(LIBVMM_TOOLS)/mkvirtdisk $@ $(BLK_NUM_PART) $(BLK_SIZE) $(BLK_MEM) @@ -165,7 +187,9 @@ client_vm/images.o: $(LIBVMM)/tools/package_guest_images.S $(CHECK_FLAGS_BOARD_M -target $(TARGET) \ $(LIBVMM)/tools/package_guest_images.S -o $@ -client_vmm.elf: client_vm/vmm.o client_vm/images.o |vm_dir +client_vmm0.elf: client_vm/vmm.o client_vm/images.o |vm_dir + $(LD) $(LDFLAGS) $^ $(LIBS) -o $@ +client_vmm1.elf: client_vm/vmm.o client_vm/images.o |vm_dir $(LD) $(LDFLAGS) $^ $(LIBS) -o $@ # Stop make from deleting intermediate files diff --git a/include/libvmm/config.h b/include/libvmm/config.h index 136abe5f9..3584f5639 100644 --- a/include/libvmm/config.h +++ b/include/libvmm/config.h @@ -13,19 +13,43 @@ static char VMM_MAGIC[VMM_MAGIC_LEN] = { 'v', 'm', 'm' }; #define VMM_MAX_IRQS 32 #define VMM_MAX_VCPUS 32 #define VMM_MAX_UIOS 16 -#define VMM_MAX_VIRTIO_MMIO_DEVICES 32 +#define MAX_VIRTIO_MMIO_DEVICES_PER_TYPE 4 typedef struct vmm_config_irq { uint8_t id; uint32_t irq; } vmm_config_irq_t; -typedef struct vmm_config_virtio_mmio_device { - uint8_t type; +typedef struct vmm_config_virtio_mmio_device_regs { uint64_t base; uint32_t size; uint32_t irq; -} vmm_config_virtio_mmio_device_t; +} vmm_config_virtio_mmio_device_regs_t; + +typedef struct vmm_config_virtio_console_device { + vmm_config_virtio_mmio_device_regs_t regs; +} vmm_config_virtio_console_device_t; + +typedef struct vmm_config_virtio_block_device { + vmm_config_virtio_mmio_device_regs_t regs; +} vmm_config_virtio_block_device_t; + +typedef struct vmm_config_virtio_net_device { + vmm_config_virtio_mmio_device_regs_t regs; +} vmm_config_virtio_net_device_t; + +typedef struct vmm_config_virtio_socket_device { + vmm_config_virtio_mmio_device_regs_t regs; + uint32_t cid; + uint32_t shared_buffer_size; + uintptr_t buffer_our; + uintptr_t buffer_peer; + uint32_t peer_ch; +} vmm_config_virtio_socket_device_t; + +typedef struct vmm_config_virtio_sound_device { + vmm_config_virtio_mmio_device_regs_t regs; +} vmm_config_virtio_sound_device_t; typedef struct vmm_config_vcpu { uint8_t id; @@ -50,8 +74,22 @@ typedef struct vmm_config { vmm_config_irq_t irqs[VMM_MAX_IRQS]; uint8_t num_vcpus; vmm_config_vcpu_t vcpus[VMM_MAX_VCPUS]; - uint8_t num_virtio_mmio_devices; - vmm_config_virtio_mmio_device_t virtio_mmio_devices[VMM_MAX_VIRTIO_MMIO_DEVICES]; + + uint8_t num_virtio_mmio_console_devices; + vmm_config_virtio_console_device_t virtio_mmio_console_devices[MAX_VIRTIO_MMIO_DEVICES_PER_TYPE]; + + uint8_t num_virtio_mmio_block_devices; + vmm_config_virtio_block_device_t virtio_mmio_block_devices[MAX_VIRTIO_MMIO_DEVICES_PER_TYPE]; + + uint8_t num_virtio_mmio_net_devices; + vmm_config_virtio_net_device_t virtio_mmio_net_devices[MAX_VIRTIO_MMIO_DEVICES_PER_TYPE]; + + uint8_t num_virtio_mmio_socket_devices; + vmm_config_virtio_socket_device_t virtio_mmio_socket_devices[MAX_VIRTIO_MMIO_DEVICES_PER_TYPE]; + + uint8_t num_virtio_mmio_sound_devices; + vmm_config_virtio_sound_device_t virtio_mmio_sound_devices[MAX_VIRTIO_MMIO_DEVICES_PER_TYPE]; + uint8_t num_uio_regions; vmm_config_uio_region_t uios[VMM_MAX_UIOS]; } vmm_config_t; diff --git a/include/libvmm/virtio/mmio.h b/include/libvmm/virtio/mmio.h index abe0f5d63..5a187e1d0 100644 --- a/include/libvmm/virtio/mmio.h +++ b/include/libvmm/virtio/mmio.h @@ -46,6 +46,7 @@ #define VIRTIO_DEVICE_ID_NET 1 #define VIRTIO_DEVICE_ID_BLOCK 2 #define VIRTIO_DEVICE_ID_CONSOLE 3 +#define VIRTIO_DEVICE_ID_SOCKET 19 #define VIRTIO_DEVICE_ID_SOUND 25 /* The maximum size (number of elements) of a virtqueue. It is set diff --git a/include/libvmm/virtio/socket.h b/include/libvmm/virtio/socket.h new file mode 100644 index 000000000..1ebcd6e43 --- /dev/null +++ b/include/libvmm/virtio/socket.h @@ -0,0 +1,146 @@ +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (C) Red Hat, Inc., 2013-2015 + * Copyright (C) Asias He , 2013 + * Copyright (C) Stefan Hajnoczi , 2015 + */ +/* + * SPDX-License-Identifier: BSD-3-Clause + */ + +#pragma once + +#include + +#define VIRTIO_VSOCK_RX_QUEUE 0 +#define VIRTIO_VSOCK_TX_QUEUE 1 +#define VIRTIO_VSOCK_EVENT_QUEUE 2 + +/* Regardless of the configuration of the VSock, there are always 3 virtqueues */ +#define VIRTIO_VSOCK_NUM_VIRTQ 3 + +/* The feature bitmap for virtio vsock */ +#define VIRTIO_VSOCK_F_STREAM 0 +#define VIRTIO_VSOCK_F_SEQPACKET 1 /* SOCK_SEQPACKET supported */ + +struct virtio_vsock_config { + uint64_t guest_cid; +} __attribute__((packed)); + +/* vsock onfig starts after PCI config */ +#define VIRTIO_VSOCK_CFG_GUEST_CID (VIRTIO_PCI_CONFIG_OFF(false)) + +enum virtio_vsock_event_id { + VIRTIO_VSOCK_EVENT_TRANSPORT_RESET = 0, +}; + +struct virtio_vsock_event { + uint32_t id; +} __attribute__((packed)); + +struct virtio_vsock_hdr { + uint64_t src_cid; /* Note that the upper 32-bits are reserved and zeroed. */ + uint64_t dst_cid; /* Note that the upper 32-bits are reserved and zeroed. */ + uint32_t src_port; + uint32_t dst_port; + uint32_t len; + uint16_t type; /* enum virtio_vsock_type */ + uint16_t op; /* enum virtio_vsock_op */ + uint32_t flags; + uint32_t buf_alloc; + uint32_t fwd_cnt; +} __attribute__((packed)); + +enum virtio_vsock_type { + VIRTIO_VSOCK_TYPE_STREAM = 1, + VIRTIO_VSOCK_TYPE_SEQPACKET = 2, +}; + +enum virtio_vsock_op { + VIRTIO_VSOCK_OP_INVALID = 0, + /* Connect operations */ + VIRTIO_VSOCK_OP_REQUEST = 1, + VIRTIO_VSOCK_OP_RESPONSE = 2, + VIRTIO_VSOCK_OP_RST = 3, + VIRTIO_VSOCK_OP_SHUTDOWN = 4, + /* To send payload */ + VIRTIO_VSOCK_OP_RW = 5, + /* Tell the peer our credit info */ + VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6, + /* Request the peer to send the credit info to us */ + VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7, +}; + +/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ +enum virtio_vsock_shutdown { + VIRTIO_VSOCK_SHUTDOWN_RCV = 1, + VIRTIO_VSOCK_SHUTDOWN_SEND = 2, +}; + +/* VIRTIO_VSOCK_OP_RW flags values */ +enum virtio_vsock_rw { + VIRTIO_VSOCK_SEQ_EOM = 1, + VIRTIO_VSOCK_SEQ_EOR = 2, +}; + +struct virtio_vsock_packet { + struct virtio_vsock_hdr hdr; + uint8_t data[]; +}; + +struct virtio_vsock_device { + struct virtio_device virtio_device; + struct virtio_queue_handler vqs[VIRTIO_VSOCK_NUM_VIRTQ]; + uint32_t guest_cid; + uint32_t shared_buffer_size; + uint32_t payload_size; + uintptr_t buffer_our; + uintptr_t buffer_peer; + uint32_t peer_ch; +}; + +/* This is to receive data from the peer. To send data, we write data to the equivalent + in the peer */ +struct virtio_vsock_recv_space { + struct virtio_vsock_recv_space_metadata { + bool dirty; // True if the receiver have not processed the payload. + bool signal_required; // True if the receiver still have packets to send. + } metadata; + struct virtio_vsock_packet packet; +}; + +bool virtio_mmio_vsock_init(struct virtio_vsock_device *vsock, + uintptr_t region_base, + uintptr_t region_size, + size_t virq, + uint32_t guest_cid, + uint32_t shared_buffer_size, + uintptr_t buffer_our, + uintptr_t buffer_peer, + microkit_channel peer_ch); +bool virtio_vsock_handle_rx(struct virtio_vsock_device *vsock); diff --git a/include/libvmm/virtio/virtio.h b/include/libvmm/virtio/virtio.h index d9a0f3e95..ad5eced3e 100644 --- a/include/libvmm/virtio/virtio.h +++ b/include/libvmm/virtio/virtio.h @@ -10,6 +10,7 @@ #include #include #include +#include #include /* diff --git a/src/virtio/socket.c b/src/virtio/socket.c new file mode 100644 index 000000000..b9def261f --- /dev/null +++ b/src/virtio/socket.c @@ -0,0 +1,392 @@ +/* + * Copyright 2025, UNSW + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include + +/* Uncomment this to enable debug logging */ +// #define DEBUG_VSOCK + +#if defined(DEBUG_VSOCK) +#define LOG_VSOCK(...) do{ printf("%s|VIRTIO(VSOCK): ", microkit_name); printf(__VA_ARGS__); }while(0) +#else +#define LOG_VSOCK(...) do{}while(0) +#endif + +#define LOG_VSOCK_ERR(...) do{ printf("%s|VIRTIO(VSOCK)|ERROR: ", microkit_name); printf(__VA_ARGS__); }while(0) + +static bool virtio_vsock_valid_cid(uint32_t cid) +{ + // Page 159 of 282 VirtIO spec 1.2. + switch (cid) { + case 0: + case 1: + case 2: + case 0xffffffff: + return false; + default: + return true; + } +} + +static void virtio_vsock_features_print(uint32_t features) +{ + /* Dump the features given in a human-readable format */ + LOG_VSOCK("Dumping features (0x%lx):\n", features); + LOG_VSOCK("feature VIRTIO_VSOCK_F_STREAM set to %s\n", + BIT_LOW(VIRTIO_VSOCK_F_STREAM) & features ? "true" : "false"); + LOG_VSOCK("feature VIRTIO_VSOCK_F_SEQPACKET set to %s\n", + BIT_LOW(VIRTIO_VSOCK_F_SEQPACKET) & features ? "true" : "false"); +} + +static void virtio_vsock_reset(struct virtio_device *dev) +{ + LOG_VSOCK("operation: reset\n"); + + for (int i = 0; i < dev->num_vqs; i++) { + dev->vqs[i].ready = false; + dev->vqs[i].last_idx = 0; + } +} + +static bool virtio_vsock_get_device_features(struct virtio_device *dev, uint32_t *features) +{ + LOG_VSOCK("operation: get device features\n"); + + switch (dev->data.DeviceFeaturesSel) { + case 0: + *features = BIT_LOW(VIRTIO_VSOCK_F_STREAM); + break; + case 1: + *features = BIT_HIGH(VIRTIO_F_VERSION_1); + break; + default: + // @ivanv: audit + LOG_VSOCK_ERR("driver sets DeviceFeaturesSel to 0x%x, which doesn't make sense\n", dev->data.DeviceFeaturesSel); + return false; + } + + return true; +} + +static bool virtio_vsock_set_driver_features(struct virtio_device *dev, uint32_t features) +{ + LOG_VSOCK("operation: set driver features\n"); + virtio_vsock_features_print(features); + + bool success = true; + + switch (dev->data.DriverFeaturesSel) { + // feature bits 0 to 31 + case 0: + break; + // features bits 32 to 63 + case 1: + success = (features == BIT_HIGH(VIRTIO_F_VERSION_1)); + break; + default: + LOG_VSOCK_ERR("driver sets DriverFeaturesSel to 0x%x, which doesn't make sense\n", dev->data.DriverFeaturesSel); + return false; + } + + if (success) { + dev->data.features_happy = 1; + LOG_VSOCK("device is feature happy\n"); + } + + return true; +} + +static bool virtio_vsock_get_device_config(struct virtio_device *dev, uint32_t offset, uint32_t *config) +{ + LOG_VSOCK("operation: get device config at offset 0x%lx\n", offset - REG_VIRTIO_MMIO_CONFIG); + + struct virtio_vsock_device *vsock = (struct virtio_vsock_device *)dev->device_data; + + switch (offset) { + case REG_VIRTIO_MMIO_CONFIG: + *config = vsock->guest_cid; + break; + case REG_VIRTIO_MMIO_CONFIG + 0x4: + /* The upper 32-bits of the CID are reserved and zeroed. */ + *config = 0; + break; + default: + LOG_VSOCK("get config at unknown register offset 0x%x\n", offset); + return false; + } + + return true; +} + +static bool virtio_vsock_set_device_config(struct virtio_device *dev, uint32_t offset, uint32_t config) +{ + LOG_VSOCK("operation: set device config\n"); + return false; +} + +static void virtio_vsock_handle_tx(struct virtio_device *dev) +{ + struct virtio_vsock_device *vsock = (struct virtio_vsock_device *) dev->device_data; + struct virtio_queue_handler *vq = &dev->vqs[VIRTIO_VSOCK_TX_QUEUE]; + struct virtio_vsock_recv_space *peer_buf = (struct virtio_vsock_recv_space *) vsock->buffer_peer; + struct virtio_vsock_recv_space *our_buf = (struct virtio_vsock_recv_space *) vsock->buffer_our; + + /* Process 1 transmit buffer if one exists. */ + if (vq->last_idx != vq->virtq.avail->idx) { + uint16_t desc_idx = vq->virtq.avail->ring[vq->last_idx % vq->virtq.num]; + struct virtq_desc desc; + desc = vq->virtq.desc[desc_idx]; + struct virtio_vsock_packet *packet = (struct virtio_vsock_packet *) desc.addr; + + /* Before doing anything, check to make sure the other side can actually receive data. */ + if (peer_buf->metadata.dirty) { + /* But there is a special condition, if the packet is a reset packet, it must be sent + no matter what. (Virtio spec v1.2 section 5.10.6.4.2) */ + if (packet->hdr.op != VIRTIO_VSOCK_OP_RST) { + return; + } + } + + LOG_VSOCK("======== TRANSMITTING ========\n"); + LOG_VSOCK("src_cid: 0x%lx\n", packet->hdr.src_cid); + LOG_VSOCK("dst_cid: 0x%lx\n", packet->hdr.dst_cid); + LOG_VSOCK("src_port: 0x%lx\n", packet->hdr.src_port); + LOG_VSOCK("dst_port: 0x%lx\n", packet->hdr.dst_port); + LOG_VSOCK("len: 0x%lx\n", packet->hdr.len); + LOG_VSOCK("type: 0x%lx\n", packet->hdr.type); + LOG_VSOCK("op: 0x%lx\n", packet->hdr.op); + LOG_VSOCK("flags: 0x%lx\n", packet->hdr.flags); + LOG_VSOCK("buf_alloc: 0x%lx\n", packet->hdr.buf_alloc); + LOG_VSOCK("fwd_cnt: 0x%lx\n", packet->hdr.fwd_cnt); + + /* We only support VIRTIO_VSOCK_TYPE_STREAM */ + assert(packet->hdr.type == VIRTIO_VSOCK_TYPE_STREAM); + + /* Check the guest (src) and dest CIDs are valid */ + assert(virtio_vsock_valid_cid(packet->hdr.src_cid)); + assert(virtio_vsock_valid_cid(packet->hdr.dst_cid)); + assert(packet->hdr.src_cid == vsock->guest_cid); + + switch (packet->hdr.op) { + case VIRTIO_VSOCK_OP_REQUEST: + case VIRTIO_VSOCK_OP_RESPONSE: + case VIRTIO_VSOCK_OP_RST: + case VIRTIO_VSOCK_OP_SHUTDOWN: + case VIRTIO_VSOCK_OP_CREDIT_UPDATE: + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + /* These requests don't have any payloads. */ + assert(packet->hdr.len == 0); + break; + case VIRTIO_VSOCK_OP_RW: { + /* Only this one have a payload because it actually sends data */ + /* Sanity check that the payload does not exceed the device's buffer. Shouldn't + trip as we've previously informed the other side of our real buffer size. */ + assert(packet->hdr.len <= vsock->payload_size); + break; + } + default: { + LOG_VSOCK_ERR("invalid operation %d\n", packet->hdr.op); + break; + } + } + + /* Copy the head to the receiver's RX buffer. */ + memcpy(&peer_buf->packet.hdr, &packet->hdr, sizeof(struct virtio_vsock_hdr)); + + /* Then doctor the header to tell the receiver the size of our device receive buffer */ + peer_buf->packet.hdr.buf_alloc = vsock->payload_size; + + /* Copy the payload if the request is a send */ + if (packet->hdr.op == VIRTIO_VSOCK_OP_RW) { + uintptr_t payload = (uintptr_t) &packet->data; + if (vq->virtq.desc[desc_idx].flags & VIRTQ_DESC_F_NEXT) { + /* Linux tends to put the payload in a separate descriptor for zero-copy TX so we + need to handle accordingly. */ + payload = vq->virtq.desc[desc.next].addr; + assert(vq->virtq.desc[desc.next].len == packet->hdr.len); + } else { + assert(vq->virtq.desc[desc_idx].len == sizeof(struct virtio_vsock_hdr) + packet->hdr.len); + } + memcpy(&peer_buf->packet.data, (void *) payload, packet->hdr.len); + } + + /* Prevent further TX to the receiver until they have consumed this packet */ + peer_buf->metadata.dirty = true; + + struct virtq_used_elem used_hdr_elem = {desc_idx, sizeof(struct virtio_vsock_hdr) + packet->hdr.len}; + vq->virtq.used->ring[vq->virtq.used->idx % vq->virtq.num] = used_hdr_elem; + vq->virtq.used->idx++; + vq->last_idx++; + + if (vq->last_idx != vq->virtq.avail->idx) { + /* Still got packets to send, but out of buffer for now. */ + LOG_VSOCK("=> Sender requesting signal\n"); + our_buf->metadata.signal_required = true; + } else { + LOG_VSOCK("=> Sender processed all TX\n"); + our_buf->metadata.signal_required = false; + } + + microkit_notify(vsock->peer_ch); + } +} + +bool virtio_vsock_handle_rx(struct virtio_vsock_device *vsock) +{ + struct virtio_device *dev = &vsock->virtio_device; + struct virtio_vsock_recv_space *our_buf = (struct virtio_vsock_recv_space *) vsock->buffer_our; + struct virtio_vsock_recv_space *peer_buf = (struct virtio_vsock_recv_space *) vsock->buffer_peer; + + if (our_buf->metadata.signal_required) { + /* If our signal required bit is set, it means that previously we have packets to send while the + receiver's buffer is dirty. Now that we got notified, it means that the receiver is ready for + our next packet. Send it! */ + virtio_vsock_handle_tx(dev); + } else { + /* Normal receive */ + struct virtio_queue_handler *vq = &dev->vqs[VIRTIO_VSOCK_RX_QUEUE]; + struct virtio_vsock_packet *packet = (struct virtio_vsock_packet *) &our_buf->packet; + + LOG_VSOCK("======== RECEIVING ========\n"); + LOG_VSOCK("src_cid: 0x%lx\n", packet->hdr.src_cid); + LOG_VSOCK("dst_cid: 0x%lx\n", packet->hdr.dst_cid); + LOG_VSOCK("src_port: 0x%lx\n", packet->hdr.src_port); + LOG_VSOCK("dst_port: 0x%lx\n", packet->hdr.dst_port); + LOG_VSOCK("len: 0x%lx\n", packet->hdr.len); + LOG_VSOCK("type: 0x%lx\n", packet->hdr.type); + LOG_VSOCK("op: 0x%lx\n", packet->hdr.op); + LOG_VSOCK("flags: 0x%lx\n", packet->hdr.flags); + LOG_VSOCK("buf_alloc: 0x%lx\n", packet->hdr.buf_alloc); + LOG_VSOCK("fwd_cnt: 0x%lx\n", packet->hdr.fwd_cnt); + + /* Grab an available descriptor and copy over the packet's header */ + uint16_t desc_head = vq->virtq.avail->ring[vq->last_idx % vq->virtq.num]; + struct virtq_desc desc = vq->virtq.desc[desc_head]; + memcpy((void *) desc.addr, &packet->hdr, sizeof(struct virtio_vsock_hdr)); + + if (packet->hdr.op == VIRTIO_VSOCK_OP_RW) { + void *payload_dest = (void *)((uintptr_t) desc.addr + sizeof(struct virtio_vsock_hdr)); + /* Some version of Linux likes to chain RX descriptor for zero-copy RX, we have to handle that + in the driver as well. */ + if (desc.flags & VIRTQ_DESC_F_NEXT) { + payload_dest = (void *) vq->virtq.desc[desc.next].addr; + assert(vq->virtq.desc[desc.next].len >= packet->hdr.len); + } else { + assert(desc.len >= sizeof(struct virtio_vsock_hdr) + packet->hdr.len); + } + memcpy(payload_dest, packet->data, packet->hdr.len); + } + + struct virtq_used_elem used_hdr_elem = {desc_head, sizeof(struct virtio_vsock_hdr) + packet->hdr.len}; + vq->virtq.used->ring[vq->virtq.used->idx % vq->virtq.num] = used_hdr_elem; + vq->virtq.used->idx++; + vq->last_idx++; + + /* Initiate to our peer that we are ready to accept the next packet. */ + our_buf->metadata.dirty = false; + + /* The peer might have stopped sending because our buffer was dirty, + or we might have stopped sending because the peer buffer was dirty + check if we have anything to send and notify them that we are ready + to receive the next packet. */ + if (peer_buf->metadata.signal_required) { + LOG_VSOCK("=> Peer requested signal\n"); + microkit_notify(vsock->peer_ch); + } + + /* Inject an IRQ to tell the guest that a packet has been received. */ + dev->data.InterruptStatus = BIT_LOW(0); + LOG_VSOCK("operation: injecting virq %d\n", dev->virq); + bool success = virq_inject(GUEST_VCPU_ID, dev->virq); + assert(success); + } + + return true; +} + +static bool virtio_vsock_handle_queue_notify(struct virtio_device *dev) +{ + LOG_VSOCK("operation: handle queue_notify on %d\n", dev->data.QueueNotify); + + size_t vq_idx = dev->data.QueueNotify; + if (vq_idx >= dev->num_vqs) { + LOG_VSOCK_ERR("invalid virtq index %d\n", vq_idx); + return false; + } + + if (vq_idx != VIRTIO_VSOCK_TX_QUEUE) { + return true; + } + virtio_vsock_handle_tx(dev); + + /* As of virtIO v1.2, the event vq isn't important in our device implementation + because it is used to account for guest migrating to a different host. */ + + return true; +} + +static virtio_device_funs_t functions = { + .device_reset = virtio_vsock_reset, + .get_device_features = virtio_vsock_get_device_features, + .set_driver_features = virtio_vsock_set_driver_features, + .get_device_config = virtio_vsock_get_device_config, + .set_device_config = virtio_vsock_set_device_config, + .queue_notify = virtio_vsock_handle_queue_notify, +}; + +bool virtio_mmio_vsock_init(struct virtio_vsock_device *vsock, + uintptr_t region_base, + uintptr_t region_size, + size_t virq, + uint32_t guest_cid, + uint32_t shared_buffer_size, + uintptr_t buffer_our, + uintptr_t buffer_peer, + microkit_channel peer_ch) +{ + assert(shared_buffer_size >= 0x1000); + assert(buffer_our != buffer_peer); + + /* First check whether or not we have a valid guest CID */ + if (!virtio_vsock_valid_cid(guest_cid)) { + LOG_VSOCK_ERR("attempted to init vsock device with invalid guest CID %d\n", guest_cid); + return false; + } + LOG_VSOCK("registering vsock device with cid %u\n", guest_cid); + + struct virtio_device *dev = &vsock->virtio_device; + dev->data.DeviceID = VIRTIO_DEVICE_ID_SOCKET; + dev->data.VendorID = VIRTIO_MMIO_DEV_VENDOR_ID; + dev->funs = &functions; + dev->vqs = vsock->vqs; + dev->num_vqs = VIRTIO_VSOCK_NUM_VIRTQ; + dev->virq = virq; + dev->device_data = vsock; + + vsock->guest_cid = guest_cid; + vsock->shared_buffer_size = shared_buffer_size; + vsock->payload_size = shared_buffer_size - sizeof(struct virtio_vsock_recv_space_metadata) - sizeof( + struct virtio_vsock_hdr); + vsock->buffer_our = buffer_our; + vsock->buffer_peer = buffer_peer; + vsock->peer_ch = peer_ch; + + struct virtio_vsock_recv_space *our_buf = (struct virtio_vsock_recv_space *) vsock->buffer_our; + our_buf->metadata.dirty = false; + our_buf->metadata.signal_required = false; + + if (!virtio_mmio_register_device(dev, region_base, region_size, virq)) { + LOG_VSOCK_ERR("Couldn't register mmio device in init()."); + return false; + } + + return true; +} diff --git a/vmm.mk b/vmm.mk index c3b39dd3c..c7f051bcd 100644 --- a/vmm.mk +++ b/vmm.mk @@ -48,6 +48,7 @@ ARCH_INDEP_FILES := src/util/printf.c \ src/virtio/console.c \ src/virtio/mmio.c \ src/virtio/net.c \ + src/virtio/socket.c \ src/virtio/sound.c \ src/guest.c