diff --git a/components/Init/src/apic_timer.c b/components/Init/src/apic_timer.c new file mode 100644 index 00000000..5a52f8cb --- /dev/null +++ b/components/Init/src/apic_timer.c @@ -0,0 +1,34 @@ +/* + * Copyright 2022, UNSW (ABN 57 195 873 179) + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +/** + * This file abstracts the time_server implementation away from the + * x86 APIC timer. It also provides all the functions necessary for the APIC + * timer to be emulated. See: sel4vmmplatsupport/arch/drivers/timer_emul.h +*/ +#include + +#include "timers.h" + +uint64_t apic_tsc_freq(void) +{ + return init_timer_tsc_frequency(); +} + +int apic_oneshot_absolute(uint64_t ns) +{ + return init_timer_oneshot_absolute(TIMER_APIC, ns); +} + +int apic_oneshot_relative(uint64_t ns) +{ + return init_timer_oneshot_relative(TIMER_APIC, ns); +} + +int apic_timer_stop(void) +{ + return init_timer_stop(TIMER_APIC); +} diff --git a/components/Init/src/camkes_vm_interfaces.h b/components/Init/src/camkes_vm_interfaces.h index 0af0c022..d92a23a6 100644 --- a/components/Init/src/camkes_vm_interfaces.h +++ b/components/Init/src/camkes_vm_interfaces.h @@ -39,6 +39,7 @@ int guest_mappings_num_guestmaps(); int guest_mappings_get_guest_map(int num, uint64_t *frame, uint64_t *size); seL4_CPtr guest_mappings_get_mapping_mem_frame(uintptr_t paddr); -int irqs_num_irqs(); -const char *irqs_get_irq(int irq, seL4_CPtr *irq_handler, uint8_t *ioapic, uint8_t *source, int *level_trig, - int *active_low, uint8_t *dest); +int irqs_ioapic_num_irqs(void); +int irqs_msi_num_irqs(void); +const char * irqs_ioapic_get_irq(int i, seL4_CPtr *irq_handler, uint8_t *ioapic, uint8_t *source, int *level_trig, int *active_low, uint8_t *dest); +const char * irqs_msi_get_irq(int i, seL4_CPtr *irq_handler, uint8_t* irq, uint8_t *dest); diff --git a/components/Init/src/main.c b/components/Init/src/main.c index 1e726e26..107156c0 100644 --- a/components/Init/src/main.c +++ b/components/Init/src/main.c @@ -41,11 +41,15 @@ #include #include +#include +#include +#include #include #include #include #include +#include #include "vm.h" @@ -68,6 +72,18 @@ extern void *muslc_brk_reservation_start; extern vspace_t *muslc_this_vspace; static sel4utils_res_t muslc_brk_reservation_memory; +uint64_t apic_tsc_freq(void); +int apic_oneshot_absolute(uint64_t ns); +int apic_oneshot_relative(uint64_t ns); +int apic_timer_stop(void); + +static struct timer_functions timer_emul = { + .tsc_freq = apic_tsc_freq, + .oneshot_absolute = apic_oneshot_absolute, + .oneshot_relative = NULL, + .stop = apic_timer_stop +}; + seL4_CPtr intready_notification(); static seL4_CPtr get_async_event_notification() @@ -439,7 +455,7 @@ void pit_timer_interrupt(void); void rtc_timer_interrupt(uint32_t); void serial_timer_interrupt(uint32_t); -static seL4_Word irq_badges[16] = { +static seL4_Word irq_badges[VM_NUM_IRQS] = { VM_PIC_BADGE_IRQ_0, VM_PIC_BADGE_IRQ_1, VM_PIC_BADGE_IRQ_2, @@ -455,7 +471,11 @@ static seL4_Word irq_badges[16] = { VM_PIC_BADGE_IRQ_12, VM_PIC_BADGE_IRQ_13, VM_PIC_BADGE_IRQ_14, - VM_PIC_BADGE_IRQ_15 + VM_PIC_BADGE_IRQ_15, + VM_APIC_BADGE_IRQ_16, + VM_APIC_BADGE_IRQ_17, + VM_APIC_BADGE_IRQ_18, + VM_APIC_BADGE_IRQ_19, }; void serial_character_interrupt(void); @@ -472,9 +492,18 @@ static int handle_async_event(vm_t *vm, seL4_Word badge, UNUSED seL4_MessageInfo if (badge & BIT(27)) { if ((badge & init_timer_notification_badge()) == init_timer_notification_badge()) { uint32_t completed = init_timer_completed(); + + /* + * We need both the PIT and APIC timer emulated as the PIT is used + * for calibration during early stages of Linux booting + */ if (completed & BIT(TIMER_PIT)) { pit_timer_interrupt(); } + if (completed & BIT(TIMER_APIC)) { + vm_inject_timer_irq(vm->vcpus[BOOT_VCPU]); + } + if (completed & (BIT(TIMER_PERIODIC_TIMER) | BIT(TIMER_COALESCED_TIMER) | BIT(TIMER_SECOND_TIMER) | BIT( TIMER_SECOND_TIMER2))) { rtc_timer_interrupt(completed); @@ -487,9 +516,9 @@ static int handle_async_event(vm_t *vm, seL4_Word badge, UNUSED seL4_MessageInfo if ((badge & serial_getchar_notification_badge()) == serial_getchar_notification_badge()) { serial_character_interrupt(); } - for (int i = 0; i < 16; i++) { + for (int i = 0; i < VM_NUM_IRQS; i++) { if ((badge & irq_badges[i]) == irq_badges[i]) { - vm_inject_irq(vm->vcpus[BOOT_VCPU], i); + int res = vm_inject_irq(vm->vcpus[BOOT_VCPU], i); } } for (int i = 0; i < device_notify_list_len; i++) { @@ -533,45 +562,71 @@ static seL4_CPtr create_async_event_notification_cap(vm_t *vm, seL4_Word badge) return minted_ntfn_path.capPtr; } -static void irq_ack_hw_irq_handler(vm_vcpu_t *vcpu, int irq, void *cookie) +static void irq_ack_hw_irq_handler(UNUSED vm_vcpu_t *vcpu, int irq, void *cookie) { - seL4_CPtr handler = (seL4_CPtr) cookie; - int UNUSED error = seL4_IRQHandler_Ack(handler); + int UNUSED error = seL4_IRQHandler_Ack(((x86_irq_cookie_t *) cookie)->irq_cap); assert(!error); } -static void init_irqs(vm_t *vm) +static void init_irqs_common(vm_t *vm, seL4_CPtr irq_handler, int irq, bool is_msi) { - int error UNUSED; + int error; + cspacepath_t badge_path; + cspacepath_t async_path; + + vka_cspace_make_path(&vka, intready_notification(), &async_path); + error = vka_cspace_alloc_path(&vka, &badge_path); + ZF_LOGF_IF(error, "Failed to alloc cspace path"); + + error = vka_cnode_mint(&badge_path, &async_path, seL4_AllRights, irq_badges[irq]); + ZF_LOGF_IF(error, "Failed to mint cnode"); + error = seL4_IRQHandler_SetNotification(irq_handler, badge_path.capPtr); + ZF_LOGF_IF(error, "Failed to set notification for irq handler"); + error = seL4_IRQHandler_Ack(irq_handler); + ZF_LOGF_IF(error, "Failed to ack irq handler"); + + /* Set up the cookie */ + x86_irq_cookie_t *cookie = malloc(sizeof(*cookie)); + ZF_LOGF_IF(!cookie, "Malloc for irq cookie failed"); + cookie->is_msi = is_msi; + cookie->irq_cap = irq_handler; + + error = vm_register_irq(vm->vcpus[BOOT_VCPU], irq, irq_ack_hw_irq_handler, (void *) cookie); + ZF_LOGF_IF(error, "Failed to register irq ack handler"); +} - int num_irqs = irqs_num_irqs(); +static void init_irqs(vm_t *vm) +{ + int ioapic_num_irqs = irqs_ioapic_num_irqs(); + int msi_num_irqs = irqs_msi_num_irqs(); - if (camkes_cross_vm_connections_init && num_irqs > get_crossvm_irq_num()) { + if (camkes_cross_vm_connections_init && (ioapic_num_irqs + msi_num_irqs) > get_crossvm_irq_num()) { ZF_LOGE("Cross vm event irq number not available"); } - for (int i = 0; i < num_irqs; i++) { - seL4_CPtr irq_handler; - uint8_t ioapic; - uint8_t source; - int level_trig; - int active_low; + seL4_CPtr irq_handler; + + for (int i = 0; i < ioapic_num_irqs; i++) { + UNUSED uint8_t ioapic; + UNUSED uint8_t source; + UNUSED int level_trig; + UNUSED int active_low; uint8_t dest; - cspacepath_t badge_path; - cspacepath_t async_path; - irqs_get_irq(i, &irq_handler, &ioapic, &source, &level_trig, &active_low, &dest); - vka_cspace_make_path(&vka, intready_notification(), &async_path); - error = vka_cspace_alloc_path(&vka, &badge_path); - ZF_LOGF_IF(error, "Failed to alloc cspace path"); - - error = vka_cnode_mint(&badge_path, &async_path, seL4_AllRights, irq_badges[dest]); - ZF_LOGF_IF(error, "Failed to mint cnode"); - error = seL4_IRQHandler_SetNotification(irq_handler, badge_path.capPtr); - ZF_LOGF_IF(error, "Failed to set notification for irq handler"); - error = seL4_IRQHandler_Ack(irq_handler); - ZF_LOGF_IF(error, "Failed to ack irq handler"); - error = vm_register_irq(vm->vcpus[BOOT_VCPU], dest, irq_ack_hw_irq_handler, (void *)irq_handler); - ZF_LOGF_IF(error, "Failed to register irq ack handler"); + + irqs_ioapic_get_irq(i, &irq_handler, &ioapic, &source, &level_trig, &active_low, &dest); + init_irqs_common(vm, irq_handler, dest, false); + } + + for (int i = 0; i < msi_num_irqs; i++) { + UNUSED uint8_t irq; + uint8_t source; + irqs_msi_get_irq(i, &irq_handler, &irq, &source); + + /* Setting up MSIs are a bit different from regular PIC interrupts. + * "Source" is the irq number that gets triggered by the MSI, and is + * different from pci_line_irq, which serves as a placeholder in the + * PCI config space. */ + init_irqs_common(vm, irq_handler, source, true); } } @@ -620,6 +675,42 @@ ioport_fault_result_t ioport_callback_handler(vm_vcpu_t *vcpu, unsigned int port return result; } +static int pci_device_find_irq(const char *irq_name, bool *is_msi, uint8_t *vmm_irq) +{ + /* search for the irq in the msi list first */ + for (int i = 0; i < irqs_msi_num_irqs(); i++) { + seL4_CPtr cap; + uint8_t irq; + uint8_t source; + + const char *this_name; + this_name = irqs_msi_get_irq(i, &cap, &irq, &source); + if (strcmp(irq_name, this_name) == 0) { + *is_msi = true; + *vmm_irq = source; + return irq; + } + } + + /* if the pci device is not an MSI, then it must be a regular IOAPIC int */ + for (int i = 0; i < irqs_ioapic_num_irqs(); i++) { + seL4_CPtr cap; + UNUSED uint8_t ioapic; + UNUSED uint8_t source; + UNUSED int level_trig; + UNUSED int active_low; + uint8_t dest; + const char *this_name; + this_name = irqs_ioapic_get_irq(i, &cap, &ioapic, &source, &level_trig, &active_low, &dest); + if (strcmp(irq_name, this_name) == 0) { + *is_msi = false; + return dest; + } + } + + return -1; +} + void *main_continued(void *arg) { int error; @@ -698,6 +789,9 @@ void *main_continued(void *arg) ZF_LOGI("RTC pre init"); rtc_pre_init(); + /* Assign a timer to the boot vcpu */ + vm_assign_vcpu_timer(vm_vcpu, &timer_emul); + error = vmm_io_port_init(&io_ports, FREE_IOPORT_START); if (error) { ZF_LOGF_IF(error, "Failed to initialise VMM ioport management"); @@ -767,7 +861,6 @@ void *main_continued(void *arg) ZF_LOGF_IF(error, "Failed to initialise VMM PCI"); } - /* Perform device discovery and give passthrough device information */ ZF_LOGI("PCI device discovery"); for (i = 0; i < pci_devices_num_devices(); i++) { uint8_t bus; @@ -775,30 +868,22 @@ void *main_continued(void *arg) uint8_t fun; const char *irq_name; int irq = -1; + + bool is_msi = false; + uint8_t vmm_irq; + seL4_CPtr iospace_cap; pci_devices_get_device(i, &bus, &dev, &fun, &iospace_cap); irq_name = pci_devices_get_device_irq(i); - /* search for the irq */ - for (int j = 0; j < irqs_num_irqs(); j++) { - seL4_CPtr cap; - uint8_t ioapic; - uint8_t source; - int level_trig; - int active_low; - uint8_t dest; - const char *this_name; - this_name = irqs_get_irq(j, &cap, &ioapic, &source, &level_trig, &active_low, &dest); - if (strcmp(irq_name, this_name) == 0) { - irq = dest; - break; - } - } + irq = pci_device_find_irq(irq_name, &is_msi, &vmm_irq); + assert(irq != -1); libpci_device_t *device = libpci_find_device_bdf(bus, dev, fun); if (!device) { LOG_ERROR("Failed to find device %02x:%02x.%d\n", bus, dev, fun); return NULL; } + /* Allocate resources */ vmm_pci_bar_t bars[6]; int num_bars = vmm_pci_helper_map_bars(&vm, &device->cfg, bars); @@ -810,7 +895,7 @@ void *main_continued(void *arg) entry = vmm_pci_create_bar_emulation(entry, num_bars, bars); } entry = vmm_pci_create_irq_emulation(entry, irq); - entry = vmm_pci_no_msi_cap_emulation(entry); + entry = vmm_pci_cap_emulation(entry, is_msi, vmm_irq); error = vmm_pci_add_entry(pci, entry, NULL); assert(!error); } @@ -839,15 +924,19 @@ void *main_continued(void *arg) assert(!error); } } + + /* We only scan non-pci ioports as the pci ioports are picked up + * and set up via the pci emulation code.*/ for (i = 0; i < ioports_num_nonpci_ioports(); i++) { uint16_t start; uint16_t end; const char *desc; seL4_CPtr cap; desc = ioports_get_nonpci_ioport(i, &cap, &start, &end); - error = vm_enable_passthrough_ioport(vm_vcpu, start, end); + error = vm_enable_passthrough_ioport(vm_vcpu, start, end - 1); assert(!error); } + /* config start and end encomposes both addr and data ports */ vm_ioport_range_t pci_config_range = {X86_IO_PCI_CONFIG_START, X86_IO_PCI_CONFIG_END}; vm_ioport_interface_t pci_config_interface = {pci, vmm_pci_io_port_in, vmm_pci_io_port_out, "PCI Configuration Space"}; diff --git a/components/Init/src/timers.h b/components/Init/src/timers.h index 66cc26d6..c2a50cf4 100644 --- a/components/Init/src/timers.h +++ b/components/Init/src/timers.h @@ -24,3 +24,5 @@ #define TIMER_TRANSMIT_TIMER 6 #define TIMER_MODEM_STATUS_TIMER 7 #define TIMER_MORE_CHARS 8 + +#define TIMER_APIC 9 diff --git a/components/Init/templates/seL4VMIRQs.template.c b/components/Init/templates/seL4VMIRQs.template.c index a7762313..ce596cf8 100644 --- a/components/Init/templates/seL4VMIRQs.template.c +++ b/components/Init/templates/seL4VMIRQs.template.c @@ -11,26 +11,43 @@ #include /*- set config_irqs = configuration[me.name].get("vm_irqs") -*/ -/*- set irqs = [] -*/ +/*- set config_pci = configuration[me.name].get("pci_devices") -*/ +/*- set ioapic_irqs = [] -*/ +/*- set msi_irqs = [] -*/ +/*- set irq_to_bdf = {} -*/ + /*- set irqnotification_object = alloc_obj('irq_notification_obj', seL4_NotificationObject) -*/ /*- set irqnotification_object_cap = alloc_cap('irq_notification_obj', irqnotification_object, read=True) -*/ + +/*# Map irq -> PCI bdf #*/ +/*- for device in config_pci -*/ + /*- do irq_to_bdf.update({device['irq']:(device['bus'], device['dev'], device['fun'])}) -*/ +/*- endfor -*/ + /*- if config_irqs is not none -*/ /*- for irq in config_irqs -*/ - /*- set cap = alloc('irq_%d_%d' % (irq['ioapic'], irq['source']), seL4_IRQHandler, vector=irq['dest'], ioapic = irq['ioapic'], ioapic_pin = irq['source'], level = irq['level_trig'], polarity = irq['active_low'], notification=my_cnode[irqnotification_object_cap]) -*/ - /*- do irqs.append( (irq['name'].strip('"'), irq['ioapic'], irq['source'], irq['level_trig'], irq['active_low'], irq['dest'], cap) ) -*/ + /*- if 'ioapic' in irq -*/ + /*- set cap = alloc('irq_ioapic%d_%d' % (irq['ioapic'], irq['source']), seL4_IRQHandler, vector=irq['dest'], ioapic = irq['ioapic'], ioapic_pin = irq['source'], level = irq['level_trig'], polarity = irq['active_low'], notification=my_cnode[irqnotification_object_cap]) -*/ + /*- do ioapic_irqs.append( (irq['name'].strip('"'), irq['ioapic'], irq['source'], irq['level_trig'], irq['active_low'], irq['dest'], cap) ) -*/ + /*- else -*/ + /*# assume that an irq is msi if it's not generated by an ioapic pin #*/ + /*- set bus, dev, fun = irq_to_bdf[irq['name']] -*/ + /*- set cap = alloc('msi_%s' % (irq['name']), seL4_IRQHandler, vector=irq['source'], pci_bus=bus, pci_dev=dev, pci_fun=fun, handle=irq['handle'], notification=my_cnode[irqnotification_object_cap]) -*/ + /*- do msi_irqs.append( (irq['name'].strip('"'), irq['pci_irq_line'], irq['source'], cap) ) -*/ + /*- endif -*/ /*- endfor -*/ /*- endif -*/ -int irqs_num_irqs() { - return /*? len(irqs) ?*/; +int irqs_ioapic_num_irqs() { + return /*? len(ioapic_irqs) ?*/; } -const char * irqs_get_irq(int irq, seL4_CPtr *irq_handler, uint8_t *ioapic, uint8_t *source, int *level_trig, int *active_low, uint8_t *dest) { - /*- if len(irqs) == 0 -*/ +const char * irqs_ioapic_get_irq(int i, seL4_CPtr *irq_handler, uint8_t *ioapic, uint8_t *source, int *level_trig, int *active_low, uint8_t *dest) { + /*- if len(ioapic_irqs) == 0 -*/ return NULL; /*- else -*/ - switch (irq) { - /*- for name, ioapic, source, level_trig, active_low, dest, cap in irqs -*/ + switch (i) { + /*- for name, ioapic, source, level_trig, active_low, dest, cap in ioapic_irqs -*/ case /*? loop.index0 ?*/: *irq_handler = /*? cap ?*/; *ioapic = /*? ioapic ?*/; @@ -45,3 +62,25 @@ const char * irqs_get_irq(int irq, seL4_CPtr *irq_handler, uint8_t *ioapic, uint } /*- endif -*/ } + +int irqs_msi_num_irqs() { + return /*? len(msi_irqs) ?*/; +} + +const char * irqs_msi_get_irq(int i, seL4_CPtr *irq_handler, uint8_t *irq, uint8_t *source) { + /*- if len(msi_irqs) == 0 -*/ + return NULL; + /*- else -*/ + switch (i) { + /*- for name, irq, source, cap in msi_irqs -*/ + case /*? loop.index0 ?*/: + *irq_handler = /*? cap ?*/; + *irq = /*? irq ?*/; + *source = /*? source ?*/; + return "/*? name ?*/"; + /*- endfor -*/ + default: + return NULL; + } + /*- endif -*/ +} diff --git a/components/VM/configurations/vm.h b/components/VM/configurations/vm.h index 5577fe44..4c53aa4a 100644 --- a/components/VM/configurations/vm.h +++ b/components/VM/configurations/vm.h @@ -13,7 +13,6 @@ /* For all the async sources on the intready endpoint the high bit * is set to indicate that an async event occured, and the low bits * indicate which async events */ - #define VM_PIC_BADGE_IRQ_0 134217730 /* BIT(27) | BIT(1) */ #define VM_PIC_BADGE_IRQ_1 134217732 /* BIT(27) | BIT(2) */ #define VM_PIC_BADGE_IRQ_2 134217736 /* BIT(27) | BIT(3) */ @@ -31,6 +30,15 @@ #define VM_PIC_BADGE_IRQ_14 134250496 /* BIT(27) | BIT(15) */ #define VM_PIC_BADGE_IRQ_15 134283264 /* BIT(27) | BIT(16) */ +/* Only support 4 APIC interrupts for now, we are limited by the number of bits + * in a notification badge, so this is just being conservative. */ +#define VM_APIC_BADGE_IRQ_16 134348800 /* BIT(27) | BIT(17) */ +#define VM_APIC_BADGE_IRQ_17 134479872 /* BIT(27) | BIT(18) */ +#define VM_APIC_BADGE_IRQ_18 134742016 /* BIT(27) | BIT(19) */ +#define VM_APIC_BADGE_IRQ_19 135266304 /* BIT(27) | BIT(20) */ + +#define VM_NUM_IRQS 20 + /* Base definition of the Init component. This gets * extended in the per Vm configuration */ #define VM_INIT_DEF() \ @@ -102,7 +110,7 @@ vm##num.serial_getchar_shmem_size = 0x1000; \ vm##num.simple = true; \ vm##num.asid_pool = true; \ - vm##num.global_endpoint_mask = 0x1fffffff & ~0x1fffe; \ + vm##num.global_endpoint_mask = 0x1fffffff & ~0x1ffffe; \ vm##num.global_endpoint_base = 1 << 27; \ VM_MAYBE_ZONE_DMA(num) \ /**/ @@ -130,7 +138,7 @@ #define VM_CONFIGURATION_DEF() \ fserv.heap_size = 0x30000; \ - time_server.timers_per_client = 9; \ + time_server.timers_per_client = 10; \ /* Put the entire time server at the highest priority */ \ time_server.priority = 255; \ /* The timer server runs better if it can get the true tsc frequency from the kernel */ \