From ad976d8d72c0e4130bc4689b46577d3a654dc2d1 Mon Sep 17 00:00:00 2001 From: William Durand Date: Thu, 10 Feb 2022 21:58:43 +0100 Subject: [PATCH] kernel: add linux compatibility layer --- Makefile | 6 + Makefile-cfg.include | 2 + data/initrd/etc/hosts | 1 + data/initrd/etc/passwd | 1 + include/kernel/proc/descriptor.h | 8 + include/libc/sys/linux_compat.h | 112 ++++++ include/libc/sys/syscall.h | 8 + .../arch/aarch32/board/raspi2/asm/proc.asm | 7 + .../arch/aarch64/board/raspi3/asm/proc.asm | 7 + src/kernel/arch/x86_64/Makefile.include | 2 +- src/kernel/arch/x86_64/asm/boot.asm | 28 +- src/kernel/arch/x86_64/asm/k_syscall.asm | 64 ++- src/kernel/arch/x86_64/asm/proc.asm | 8 + src/kernel/arch/x86_64/kmain.c | 17 + src/kernel/kmain.c | 10 +- src/kernel/net/ipv4.c | 9 + src/kernel/net/udp.c | 2 +- src/kernel/proc/descriptor.c | 33 +- src/kernel/sys/k_execv.c | 115 +++++- src/kernel/sys/k_read.c | 6 + src/kernel/sys/k_recvfrom.c | 30 +- src/kernel/sys/k_sendto.c | 87 +++- src/kernel/sys/k_socket.c | 23 +- src/kernel/sys/k_syscall.c | 370 +++++++++++++++++- src/userland/Makefile.include | 4 + 25 files changed, 893 insertions(+), 67 deletions(-) create mode 100644 data/initrd/etc/hosts create mode 100644 data/initrd/etc/passwd create mode 100644 include/libc/sys/linux_compat.h diff --git a/Makefile b/Makefile index 8201ab8d5..cfaaf4ed2 100644 --- a/Makefile +++ b/Makefile @@ -161,6 +161,11 @@ ifeq ($(CONFIG_USE_FAKE_CLOCK), 1) libk_c_files += $(kernel_src_dir)/time/fake_clock.c endif +ifeq ($(CONFIG_LINUX_COMPAT), 1) + KERNEL_CONFIG += -DCONFIG_LINUX_COMPAT + LIBC_CONFIG += -DCONFIG_LINUX_COMPAT +endif + # This file exists in a Docker container because we copy it in `Dockerfile`. in_docker = $(wildcard /tmp/install-linux-deps) ifneq ($(in_docker),) @@ -435,6 +440,7 @@ what: ## display some information about the current configuration echo "OS_NAME : $(OS_NAME)" echo "ARCH : $(ARCH)" echo "" + echo "CONFIG_LINUX_COMPAT = $(CONFIG_LINUX_COMPAT)" echo "CONFIG_SEMIHOSTING = $(CONFIG_SEMIHOSTING)" echo "CONFIG_USE_DLMALLOC = $(CONFIG_USE_DLMALLOC)" echo "CONFIG_USE_FAKE_CLOCK = $(CONFIG_USE_FAKE_CLOCK)" diff --git a/Makefile-cfg.include b/Makefile-cfg.include index 9f7794b7c..ec370f177 100644 --- a/Makefile-cfg.include +++ b/Makefile-cfg.include @@ -6,6 +6,8 @@ LLVM_SUFFIX ?= # When set to 1, enable the Undefined Behavior SANitizer. UBSAN ?= +# When set to 1, enable Linux (binary) compatibility. +CONFIG_LINUX_COMPAT ?= # When set to 1, enable semi-hosting mode (QEMU, mainly). CONFIG_SEMIHOSTING ?= # When set to 1, use dlmalloc for malloc/free/realloc (instead of liballoc). diff --git a/data/initrd/etc/hosts b/data/initrd/etc/hosts new file mode 100644 index 000000000..988ed358d --- /dev/null +++ b/data/initrd/etc/hosts @@ -0,0 +1 @@ +140.82.121.4 github diff --git a/data/initrd/etc/passwd b/data/initrd/etc/passwd new file mode 100644 index 000000000..038b4bfad --- /dev/null +++ b/data/initrd/etc/passwd @@ -0,0 +1 @@ +root:x:0:0::/:none diff --git a/include/kernel/proc/descriptor.h b/include/kernel/proc/descriptor.h index 1fc452a1e..de27a62ce 100644 --- a/include/kernel/proc/descriptor.h +++ b/include/kernel/proc/descriptor.h @@ -2,9 +2,11 @@ #ifndef PROC_DESCRIPTOR_H #define PROC_DESCRIPTOR_H +#include #include #include #include +#include #include #define STDIN 0 @@ -23,6 +25,8 @@ typedef struct descriptor uint32_t type; uint32_t protocol; uint16_t port; + struct sockaddr_in addr; + socklen_t addr_len; } descriptor_t; /** @@ -82,4 +86,8 @@ int descriptor_udp_lookup(uint16_t port); */ bool is_protocol_supported(uint32_t type, uint32_t protocol); +void duplicate_descriptor(int oldfd, int newfd); + +int descriptor_raw_lookup(uint32_t protocol, in_addr_t src_addr); + #endif diff --git a/include/libc/sys/linux_compat.h b/include/libc/sys/linux_compat.h new file mode 100644 index 000000000..27bc7a555 --- /dev/null +++ b/include/libc/sys/linux_compat.h @@ -0,0 +1,112 @@ +// https://chromium.googlesource.com/chromiumos/docs/+/HEAD/constants/syscalls.md +#ifndef SYS_LINUX_COMPAT_H +#define SYS_LINUX_COMPAT_H + +#ifdef __x86_64__ + +#define SYSCALL_READ 0 +#define SYSCALL_WRITE 1 +#define SYSCALL_OPEN 2 +#define SYSCALL_CLOSE 3 +#define SYSCALL_FSTAT 5 +#define SYSCALL_POLL 7 +#define SYSCALL_LSEEK 8 +#define SYSCALL_MMAP 9 +#define SYSCALL_BRK 12 +#define SYSCALL_RT_SIGACTION 13 +#define SYSCALL_RT_SIGPROCMASK 14 +#define SYSCALL_IOCTL 16 +#define SYSCALL_WRITEV 20 +#define SYSCALL_DUP2 33 +#define SYSCALL_NANOSLEEP 35 +#define SYSCALL_GETPID 39 +#define SYSCALL_SOCKET 41 +#define SYSCALL_CONNECT 42 +#define SYSCALL_SENDTO 44 +#define SYSCALL_RECVFROM 45 +#define SYSCALL_BIND 49 +#define SYSCALL_GETSOCKNAME 51 +#define SYSCALL_SETSOCKOPT 54 +#define SYSCALL_EXECV 59 +#define SYSCALL_EXIT 60 +#define SYSCALL_UNAME 63 +#define SYSCALL_FCNTL 72 +#define SYSCALL_GETCWD 79 +#define SYSCALL_GETTIMEOFDAY 96 +#define SYSCALL_SETUID 105 +#define SYSCALL_SETGID 106 +#define SYSCALL_GETEUID 107 +#define SYSCALL_ARCH_PRCTL 158 +#define SYSCALL_REBOOT 169 +#define SYSCALL_SET_TID_ADDR 218 +#define SYSCALL_EXIT_GROUP 231 +#define SYSCALL_OPENAT 257 + +#elif __arm__ + +#define SYSCALL_EXIT 1 +#define SYSCALL_READ 3 +#define SYSCALL_WRITE 4 +#define SYSCALL_OPEN 5 +#define SYSCALL_CLOSE 6 +#define SYSCALL_EXECV 11 +#define SYSCALL_LSEEK 19 +#define SYSCALL_GETPID 20 +#define SYSCALL_BRK 45 +#define SYSCALL_GETEUID 49 +#define SYSCALL_IOCTL 54 +#define SYSCALL_DUP2 63 +#define SYSCALL_GETTIMEOFDAY 78 +#define SYSCALL_REBOOT 88 +#define SYSCALL_FSTAT 108 +#define SYSCALL_WRITEV 146 +#define SYSCALL_EXIT_GROUP 248 +#define SYSCALL_SET_TID_ADDR 256 +#define SYSCALL_SOCKET 281 +#define SYSCALL_SENDTO 290 +#define SYSCALL_RECVFROM 292 +#define SYSCALL_OPENAT 322 + +// Not available on AArch32: +// +// - SYSCALL_ARCH_PRCTL + +#elif __aarch64__ + +#define SYSCALL_FCNTL 25 +#define SYSCALL_IOCTL 29 +#define SYSCALL_OPENAT 56 +#define SYSCALL_CLOSE 57 +#define SYSCALL_LSEEK 62 +#define SYSCALL_READ 63 +#define SYSCALL_WRITE 64 +#define SYSCALL_WRITEV 66 +#define SYSCALL_FSTAT 80 +#define SYSCALL_EXIT 93 +#define SYSCALL_EXIT_GROUP 94 +#define SYSCALL_SET_TID_ADDR 96 +#define SYSCALL_REBOOT 142 +#define SYSCALL_GETTIMEOFDAY 169 +#define SYSCALL_GETPID 172 +#define SYSCALL_GETEUID 175 +#define SYSCALL_SOCKET 198 +#define SYSCALL_SENDTO 206 +#define SYSCALL_RECVFROM 207 +#define SYSCALL_BRK 214 +#define SYSCALL_MUNMAP 215 +#define SYSCALL_EXECV 221 +#define SYSCALL_MMAP 222 + +// Not available on AArch64: +// +// - SYSCALL_OPEN +// - SYSCALL_DUP2 +// - SYSCALL_ARCH_PRCTL + +#endif + +// Not available outside ArvernOS: +#define SYSCALL_TEST 348 +#define SYSCALL_GETHOSTBYNAME2 349 + +#endif diff --git a/include/libc/sys/syscall.h b/include/libc/sys/syscall.h index 276f7fc41..4d4087ecf 100644 --- a/include/libc/sys/syscall.h +++ b/include/libc/sys/syscall.h @@ -11,6 +11,12 @@ #include #include +#ifdef CONFIG_LINUX_COMPAT + +#include + +#else // CONFIG_LINUX_COMPAT + #define SYSCALL_TEST 1 #define SYSCALL_WRITE 2 #define SYSCALL_READ 3 @@ -29,6 +35,8 @@ #define SYSCALL_EXIT 16 #define SYSCALL_OPENAT 17 +#endif // CONFIG_LINUX_COMPAT + #define SYSCALL_SET_ERRNO() \ if (retval < 0) { \ errno = -retval; \ diff --git a/src/kernel/arch/aarch32/board/raspi2/asm/proc.asm b/src/kernel/arch/aarch32/board/raspi2/asm/proc.asm index 16553cecb..ea533e0b7 100644 --- a/src/kernel/arch/aarch32/board/raspi2/asm/proc.asm +++ b/src/kernel/arch/aarch32/board/raspi2/asm/proc.asm @@ -22,3 +22,10 @@ arch_task_switch: ret_from_fork: bl task_schedule_tail blx r4 + +#ifdef CONFIG_LINUX_COMPAT +.global linux_compat_start +linux_compat_start: + mov sp, r1 + blx r0 +#endif diff --git a/src/kernel/arch/aarch64/board/raspi3/asm/proc.asm b/src/kernel/arch/aarch64/board/raspi3/asm/proc.asm index 16f24c6b0..1805f0a0f 100644 --- a/src/kernel/arch/aarch64/board/raspi3/asm/proc.asm +++ b/src/kernel/arch/aarch64/board/raspi3/asm/proc.asm @@ -32,3 +32,10 @@ arch_task_switch: ldr x30, [x8] mov sp, x9 ret + +#ifdef CONFIG_LINUX_COMPAT +.global linux_compat_start +linux_compat_start: + mov sp, x1 + blr x0 +#endif diff --git a/src/kernel/arch/x86_64/Makefile.include b/src/kernel/arch/x86_64/Makefile.include index f9e963a14..81971bdf2 100644 --- a/src/kernel/arch/x86_64/Makefile.include +++ b/src/kernel/arch/x86_64/Makefile.include @@ -27,7 +27,7 @@ QEMU = qemu-system-x86_64 # Options for the different tools ############################################################################### -QEMU_OPTIONS += -m 512M +QEMU_OPTIONS += -m 512M -cpu IvyBridge QEMU_OPTIONS += -serial file:$(log_file) QEMU_OPTIONS += -netdev user,id=u1,ipv6=off,dhcpstart=10.0.2.20 QEMU_OPTIONS += -device rtl8139,netdev=u1 diff --git a/src/kernel/arch/x86_64/asm/boot.asm b/src/kernel/arch/x86_64/asm/boot.asm index 545004b0c..79ebe3255 100644 --- a/src/kernel/arch/x86_64/asm/boot.asm +++ b/src/kernel/arch/x86_64/asm/boot.asm @@ -28,11 +28,37 @@ start: ; load the 64-bit GDT lgdt [gdt64.pointer] +%ifdef CONFIG_LINUX_COMPAT + call enable_sse + + ; enable the following instructions: rdfsbase, rdgsbase, wrfsbase, wrgsbase + mov eax, cr4 + or eax, 1 << 16 + mov cr4, eax +%endif + jmp gdt64.kernel_code:long_mode_start - ; Should not be reached. + ; should not be reached hlt +enable_sse: + mov eax, 0x1 ; check for SSE + cpuid + test edx, 1 << 25 + jz .no_sse ; after this, SSE can be enabled + mov eax, cr0 + and ax, 0xFFFB ; clear coprocessor emulation CR0.EM + or ax, 0x2 ; set coprocessor monitoring CR0.MP + mov cr0, eax + mov eax, cr4 + or ax, 3 << 9 ; set CR4.OSFXSR and CR4.OSXMMEXCPT at the same time + mov cr4, eax + ret + +.no_sse: + ret + ; ----------------------------------------------------------------------------- ; make sure the kernel was really loaded by a Multiboot compliant bootloader %define MULTIBOOT2_MAGIC_VALUE 0x36d76289 diff --git a/src/kernel/arch/x86_64/asm/k_syscall.asm b/src/kernel/arch/x86_64/asm/k_syscall.asm index b75b4afb2..2ebbee745 100644 --- a/src/kernel/arch/x86_64/asm/k_syscall.asm +++ b/src/kernel/arch/x86_64/asm/k_syscall.asm @@ -1,42 +1,78 @@ global syscall_handler +%define RSP0 0 +%define CS 8 +%define SS 16 + syscall_handler: sti - push rcx ; save the return address - push r11 ; save the flags +%ifdef CONFIG_LINUX_COMPAT + ; Save the current rsp,cs and ss to be used in the iret frame at the end. + mov gs:[RSP0], rsp + mov gs:[CS], cs + mov gs:[SS], ss +%endif + + ; Save the return address, flags, base pointer for sysret. + push rcx + push r11 push rbp + + ; Save callee-saved registers. push rbx - push rdx - push rdi - push rsi - push r8 - push r9 - push r10 push r12 push r13 push r14 push r15 + ; Save the syscall params. + push rdi + push rsi + push rdx + push r10 + push r8 + push r9 + mov rcx, r10 ; fix 3rd syscall arg extern syscall_handlers call [rax * 8 + syscall_handlers] - pop r15 - pop r14 - pop r13 - pop r12 - pop r10 pop r9 pop r8 + pop r10 + pop rdx pop rsi pop rdi - pop rdx + + pop r15 + pop r14 + pop r13 + pop r12 pop rbx + pop rbp pop r11 pop rcx +; This is needed because usermode has been broken recently (when the multi- +; tasking code has been introduced). Therefore, we only have kernel mode (ring +; 0). `sysret` always returns to ring 3 so we cannot use it when a syscall is +; done from ring 0 (which happens with Linux unmodified binaries). That's why +; we use `iretq`. +%ifdef CONFIG_LINUX_COMPAT + push qword gs:[SS] + push qword gs:[RSP0] + push r11 + push qword gs:[CS] + push rcx + + cli + + iretq +%else cli + o64 sysret +%endif diff --git a/src/kernel/arch/x86_64/asm/proc.asm b/src/kernel/arch/x86_64/asm/proc.asm index 34ec1bb0b..0dc710f70 100644 --- a/src/kernel/arch/x86_64/asm/proc.asm +++ b/src/kernel/arch/x86_64/asm/proc.asm @@ -30,3 +30,11 @@ ret_from_fork: call task_schedule_tail call r12 + +%ifdef CONFIG_LINUX_COMPAT +global linux_compat_start + +linux_compat_start: + mov rsp, rsi + jmp rdi +%endif diff --git a/src/kernel/arch/x86_64/kmain.c b/src/kernel/arch/x86_64/kmain.c index 656c9fd15..933631151 100644 --- a/src/kernel/arch/x86_64/kmain.c +++ b/src/kernel/arch/x86_64/kmain.c @@ -27,6 +27,19 @@ void load_modules(multiboot_info_t* mbi); void load_network_config(inish_config_t* kernel_cfg, net_driver_t* driver); void load_system_config(inish_config_t* kernel_cfg); +#ifdef CONFIG_LINUX_COMPAT + +typedef struct cpu_vars +{ + uint64_t rsp0; + uint64_t cs; + uint64_t ss; +} cpu_vars_t; + +static cpu_vars_t cpu_vars = { 0 }; + +#endif // CONFIG_LINUX_COMPAT + static uintptr_t initrd_addr = 0; void load_modules(multiboot_info_t* mbi) @@ -117,6 +130,10 @@ void kmain(uintptr_t addr) kmain_early_start(); +#ifdef CONFIG_LINUX_COMPAT + __asm__("wrgsbase %0" ::"r"(&cpu_vars)); +#endif + tss_init(); frame_init(); paging_init(); diff --git a/src/kernel/kmain.c b/src/kernel/kmain.c index 9284f4916..57a42522a 100644 --- a/src/kernel/kmain.c +++ b/src/kernel/kmain.c @@ -112,16 +112,12 @@ void kinit() argv[_argc] = NULL; free(_cmdline); + INFO("kmain: loading %s...", argv[0]); + if (strcmp(argv[0], "kshell") == 0) { - INFO("kmain: loading %s...", argv[0]); kshell(argc, argv); } else { - // TODO: create task - - // INFO("kmain: switching to usermode... (%s)", argv[0]); - // - // k_execv(argv[0], argv); - WARN("cannot execute: %s", saved_cmdline); + k_execv(argv[0], argv); } k_exit(EXIT_FAILURE); diff --git a/src/kernel/net/ipv4.c b/src/kernel/net/ipv4.c index e2e309428..8fc342cea 100644 --- a/src/kernel/net/ipv4.c +++ b/src/kernel/net/ipv4.c @@ -5,9 +5,11 @@ #include #include #include +#include #include #include #include +#include ipv4_header_t ipv4_create_header(uint8_t src_ip[4], in_addr_t dst_addr, @@ -36,6 +38,13 @@ void ipv4_receive_packet(net_interface_t* interface, src_ip[3], interface->id); + int sockfd = descriptor_raw_lookup(header.proto, header.src_addr); + if (sockfd >= 0) { + // Handle SOCK_RAW. + write(sockfd, data, len); + return; + } + switch (header.proto) { case IPV4_PROTO_ICMP: icmpv4_receive_packet(interface, data, &header); diff --git a/src/kernel/net/udp.c b/src/kernel/net/udp.c index ccf9accca..9a3855049 100644 --- a/src/kernel/net/udp.c +++ b/src/kernel/net/udp.c @@ -34,7 +34,7 @@ void udp_receive_packet(net_interface_t* interface, int sockfd = descriptor_udp_lookup(udp_header.dst_port); NET_DEBUG("got sockfd=%d for dst_port=%d", sockfd, udp_header.dst_port); - if (sockfd > 0) { + if (sockfd >= 0) { write(sockfd, udp_data, udp_header.len - sizeof(udp_header_t)); return; } diff --git a/src/kernel/proc/descriptor.c b/src/kernel/proc/descriptor.c index 92297547d..8417d121e 100644 --- a/src/kernel/proc/descriptor.c +++ b/src/kernel/proc/descriptor.c @@ -1,7 +1,6 @@ #include #include -#include #define NB_SYSTEM_DESCRIPTORS 20 @@ -39,6 +38,12 @@ descriptor_t* get_descriptor(int id) return &descriptors[id]; } +void duplicate_descriptor(int oldfd, int newfd) +{ + memcpy(&descriptors[newfd], get_descriptor(oldfd), sizeof(descriptor_t)); + delete_descriptor(oldfd); +} + void delete_descriptor(int id) { if (id >= NB_SYSTEM_DESCRIPTORS) { @@ -71,6 +76,25 @@ int create_socket_descriptor(inode_t inode, return -1; } +int descriptor_raw_lookup(uint32_t protocol, in_addr_t src_addr) +{ + for (uint8_t fd = 0; fd < NB_SYSTEM_DESCRIPTORS; fd++) { + if (!descriptors[fd].used || descriptors[fd].type != SOCK_RAW || + descriptors[fd].protocol != protocol) { + continue; + } + + struct sockaddr_in sa = { 0 }; + memcpy(&sa, &descriptors[fd].addr, descriptors[fd].addr_len); + + if (sa.sin_addr.s_addr == src_addr) { + return fd; + } + } + + return -1; +} + int descriptor_udp_lookup(uint16_t port) { for (uint8_t fd = 3; fd < NB_SYSTEM_DESCRIPTORS; fd++) { @@ -89,10 +113,17 @@ bool is_protocol_supported(uint32_t type, uint32_t protocol) switch (type) { case SOCK_DGRAM: switch (protocol) { + case IPPROTO_IP: case IPPROTO_UDP: return true; } break; + + case SOCK_RAW: + switch (protocol) { + case IPPROTO_ICMP: + return true; + } } return false; diff --git a/src/kernel/sys/k_execv.c b/src/kernel/sys/k_execv.c index 5fb659d5f..cc4fd2451 100644 --- a/src/kernel/sys/k_execv.c +++ b/src/kernel/sys/k_execv.c @@ -1,11 +1,122 @@ #include #include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_LINUX_COMPAT + +#include + +#define PUSH_TO_STACK(stack, type, value) \ + stack = (char*)stack - sizeof(type); \ + *((type*)stack) = value + +// A trampoline function that jumps to `fn` after having set `rsp` to the value +// of `stack`. +extern void linux_compat_start(uintptr_t fn, uintptr_t stack); + +#endif // CONFIG_LINUX_COMPAT int k_execv(const char* path, char* const argv[]) { - UNUSED(*path); +#ifdef CONFIG_LINUX_COMPAT + int fd = k_open(path, O_RDONLY); + if (fd < 3) { + SYS_DEBUG("%s", "error k_open"); + return fd; + } + + struct stat stat = { 0 }; + + int retval = k_fstat(fd, &stat); + if (retval != 0) { + SYS_DEBUG("%s", "error k_fstat"); + return retval; + } + + uint8_t* buf = (uint8_t*)malloc(stat.st_size * sizeof(uint8_t)); + + retval = k_read(fd, buf, stat.st_size); + if (retval != stat.st_size) { + SYS_DEBUG("%s", "error k_read"); + return retval; + } + + retval = k_close(fd); + if (retval != 0) { + SYS_DEBUG("%s", "error k_close"); + return retval; + } + + elf_header_t* elf = elf_load(buf); + + if (elf == NULL) { + return -ENOEXEC; + } + + strncpy(CURRENT_TASK->name, path, 20); + + uint64_t argc = 0; + while (argv[argc]) { + argc++; + } + + uint64_t s[4096]; + void* stack = (void*)&s[4095]; + + // aux: AT_NULL + PUSH_TO_STACK(stack, uintptr_t, (uintptr_t)NULL); + PUSH_TO_STACK(stack, uintptr_t, (uintptr_t)NULL); + // aux: AT_RANDOM + // TODO: `AT_RANDOM` should really be random... + uint8_t rand_bytes[16] = { + 0xaa, 0xc0, 0xff, 0xee, 0xc0, 0xff, 0xee, 0xc0, + 0xff, 0xee, 0xc0, 0xff, 0xee, 0xc0, 0xff, 0xee, + }; + PUSH_TO_STACK(stack, uintptr_t, (uintptr_t)rand_bytes); + PUSH_TO_STACK(stack, uint64_t, 25); + // aux: AT_PAGESZ + PUSH_TO_STACK(stack, uint64_t, 4096); + PUSH_TO_STACK(stack, uint64_t, 6); + // aux: AT_PHNUM + PUSH_TO_STACK(stack, uint64_t, elf->ph_num); + PUSH_TO_STACK(stack, uint64_t, 5); + // aux: AT_PHENT + PUSH_TO_STACK(stack, uint64_t, elf->ph_size); + PUSH_TO_STACK(stack, uint64_t, 4); + // aux: AT_PHDR + PUSH_TO_STACK(stack, uintptr_t, (uintptr_t)buf + elf->ph_offset); + PUSH_TO_STACK(stack, uint64_t, 3); + + // envp + PUSH_TO_STACK(stack, uintptr_t, (uintptr_t)NULL); + + // argv + PUSH_TO_STACK(stack, uintptr_t, (uintptr_t)NULL); + + for (int i = argc - 1; i >= 0; i--) { + PUSH_TO_STACK(stack, uintptr_t, (uintptr_t)argv[i]); + } + + // argc + PUSH_TO_STACK(stack, uint64_t, argc); + + linux_compat_start((uintptr_t)elf->entry, (uintptr_t)stack); + + elf_unload(elf); + free(buf); + + return retval; +#else // CONFIG_LINUX_COMPAT + WARN("cannot execute: %s", path); UNUSED(*argv); - return k_not_implemented(); + return -ENOSYS; +#endif // CONFIG_LINUX_COMPAT } diff --git a/src/kernel/sys/k_read.c b/src/kernel/sys/k_read.c index 0b4c50d80..c9d4f0180 100644 --- a/src/kernel/sys/k_read.c +++ b/src/kernel/sys/k_read.c @@ -15,7 +15,13 @@ ssize_t k_read(int fd, void* buf, size_t count) SYS_DEBUG("fd=%d buf=%p count=%zu", fd, buf, count); if (fd == STDIN_FILENO) { +#ifdef CONFIG_LINUX_COMPAT + // TODO: This was changed to please `read(STDIN)` for Linux compat' but I + // am not sure that's what we want... + unsigned char c = arch_getchar(true); +#else unsigned char c = arch_getchar(false); +#endif if (c) { ((uint8_t*)buf)[0] = c; diff --git a/src/kernel/sys/k_recvfrom.c b/src/kernel/sys/k_recvfrom.c index fd625a911..dd9e97230 100644 --- a/src/kernel/sys/k_recvfrom.c +++ b/src/kernel/sys/k_recvfrom.c @@ -5,6 +5,7 @@ #include #include #include +#include #include ssize_t k_recvfrom(int sockfd, @@ -14,15 +15,8 @@ ssize_t k_recvfrom(int sockfd, struct sockaddr* src_addr, socklen_t* addrlen) { - UNUSED(addrlen); - UNUSED(src_addr); UNUSED(flags); - if (sockfd < 3) { - SYS_DEBUG("invalid socket descriptor sd=%d", sockfd); - return -ENOTSOCK; - } - descriptor_t* desc = get_descriptor(sockfd); if (desc == NULL) { @@ -30,11 +24,27 @@ ssize_t k_recvfrom(int sockfd, return -EBADF; } - if (desc->domain != AF_INET || desc->type != SOCK_DGRAM || - !is_protocol_supported(desc->type, desc->protocol)) { - SYS_DEBUG("invalid sockfd=%d", sockfd); + if (desc->domain != AF_INET) { + SYS_DEBUG("invalid domain for sockfd=%d", sockfd); + return -EINVAL; + } + + switch (desc->type) { + case SOCK_DGRAM: + case SOCK_RAW: + break; + default: + SYS_DEBUG("invalid type for sockfd=%d", sockfd); + return -EINVAL; + } + + if (!is_protocol_supported(desc->type, desc->protocol)) { + SYS_DEBUG("unsupported protocol for sockfd=%d", sockfd); return -EINVAL; } + memcpy(src_addr, &desc->addr, desc->addr_len); + *addrlen = desc->addr_len; + return vfs_read(desc->inode, buf, len, 0); } diff --git a/src/kernel/sys/k_sendto.c b/src/kernel/sys/k_sendto.c index 824342efa..58698ebcd 100644 --- a/src/kernel/sys/k_sendto.c +++ b/src/kernel/sys/k_sendto.c @@ -17,10 +17,7 @@ ssize_t k_sendto(int sockfd, { UNUSED(flags); - if (sockfd < 3) { - SYS_DEBUG("invalid socket descriptor sd=%d", sockfd); - return -ENOTSOCK; - } + SYS_DEBUG("sockfd=%d buf=%p len=%d flags=%d", sockfd, buf, len, flags); descriptor_t* desc = get_descriptor(sockfd); @@ -29,27 +26,79 @@ ssize_t k_sendto(int sockfd, return -EBADF; } - if (desc->domain != AF_INET || desc->type != SOCK_DGRAM || - !is_protocol_supported(desc->type, desc->protocol)) { - SYS_DEBUG("invalid sockfd=%d", sockfd); + if (desc->domain != AF_INET) { + SYS_DEBUG("invalid domain for sockfd=%d", sockfd); + return -EINVAL; + } + + switch (desc->type) { + case SOCK_DGRAM: + case SOCK_RAW: + break; + default: + SYS_DEBUG("invalid type for sockfd=%d", sockfd); + return -EINVAL; + } + + if (!is_protocol_supported(desc->type, desc->protocol)) { + SYS_DEBUG("unsupported protocol for sockfd=%d", sockfd); return -EINVAL; } + SYS_DEBUG("descriptor: domain=%d type=%d protocol=%d", + desc->domain, + desc->type, + desc->protocol); + + memcpy(&desc->addr, dst_addr, addrlen); + desc->addr_len = addrlen; + net_interface_t* interface = net_get_interface(0); - struct sockaddr_in addr = { 0 }; - memcpy(&addr, dst_addr, addrlen); + switch (desc->type) { + case SOCK_DGRAM: + switch (desc->protocol) { + case IPPROTO_IP: + ipv4_send_packet(interface, + (struct sockaddr_in*)&desc->addr, + IPV4_PROTO_ICMP, + 0, + (uint8_t*)buf, + len); + return len; - switch (desc->protocol) { - case IPPROTO_UDP: - udp_send_packet( - interface, desc->port, interface->mac, &addr, (uint8_t*)buf, len); - break; - default: - // Indicate that something went wrong, even though the type and protocol - // are supported. - len = 0; + case IPPROTO_UDP: + udp_send_packet(interface, + desc->port, + interface->mac, + (struct sockaddr_in*)&desc->addr, + (uint8_t*)buf, + len); + return len; + + default: + // Indicate that something went wrong, even though the type and + // protocol are supported. + return 0; + } + + case SOCK_RAW: + switch (desc->protocol) { + case IPPROTO_ICMP: + ipv4_send_packet(interface, + (struct sockaddr_in*)&desc->addr, + IPV4_PROTO_ICMP, + 0, + (uint8_t*)buf, + len); + return len; + + default: + // Indicate that something went wrong, even though the type and + // protocol are supported. + return 0; + } } - return len; + return 0; } diff --git a/src/kernel/sys/k_socket.c b/src/kernel/sys/k_socket.c index 9e45aa30a..2cc8c851c 100644 --- a/src/kernel/sys/k_socket.c +++ b/src/kernel/sys/k_socket.c @@ -8,15 +8,24 @@ int k_socket(int domain, int type, int protocol) { + SYS_DEBUG("domain=%d type=%d protocol=%d", domain, type, protocol); + if (domain != AF_INET) { + SYS_DEBUG("domain %d not supported", domain); return -EAFNOSUPPORT; } - if (type != SOCK_DGRAM) { - return -ESOCKTNOSUPPORT; + switch (type) { + case SOCK_DGRAM: + case SOCK_RAW: + break; + default: + SYS_DEBUG("type %d not supported", type); + return -ESOCKTNOSUPPORT; } if (!is_protocol_supported(type, protocol)) { + SYS_DEBUG("protocol %d not supported", protocol); return -EPROTONOSUPPORT; } @@ -31,7 +40,7 @@ int k_socket(int domain, int type, int protocol) // before returning it. inode_t inode = vfs_create(parent, NULL, 0); - if (parent == NULL) { + if (inode == NULL) { SYS_DEBUG("%s", "failed to create new inode"); return -EMFILE; } @@ -43,6 +52,12 @@ int k_socket(int domain, int type, int protocol) return -ENFILE; } - SYS_DEBUG("open sd=%d", sd); + SYS_DEBUG("open sd=%d inode=%p domain=%d type=%d protocol=%d", + sd, + inode, + domain, + type, + protocol); + return sd; } diff --git a/src/kernel/sys/k_syscall.c b/src/kernel/sys/k_syscall.c index b72663223..863cd1599 100644 --- a/src/kernel/sys/k_syscall.c +++ b/src/kernel/sys/k_syscall.c @@ -1,15 +1,278 @@ #include #include +#include +#include +#include +#include +#include #include #include +#include