diff --git a/src/cgroup.c b/src/cgroup.c index b7491ba..ebe8c94 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -36,10 +36,10 @@ * TODO: * Add more cgroups support. */ -static void mount_cgroup_v1_memory(void) +static void mount_cgroup_v1(const char *_Nonnull controller) { /* - * Mount Cgroup v1 memory controller. + * Mount Cgroup v1 _any_ controller. * Nothing to return because if this function run failed, * that means cgroup is fully not supported on the device. */ @@ -49,46 +49,12 @@ static void mount_cgroup_v1_memory(void) // Mount /sys/fs/cgroup as tmpfs. mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, NULL); // Mount memory controller. - mkdir("/sys/fs/cgroup/memory", S_IRUSR | S_IWUSR); + char cgroup_controller_path[PATH_MAX] = { '\0' }; + sprintf(cgroup_controller_path, "/sys/fs/cgroup/%s", controller); + mkdir(cgroup_controller_path, S_IRUSR | S_IWUSR); usleep(2000); - mount("none", "/sys/fs/cgroup/memory", "cgroup", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "memory"); - ruri_log("{base}Tried to mount cgroup v1 memory\n"); -} -static void mount_cgroup_v1_cpu(void) -{ - /* - * Mount Cgroup v1 cpu controller. - * Nothing to return because if this function run failed, - * that means cgroup is fully not supported on the device. - */ - mkdir("/sys/fs/cgroup", S_IRUSR | S_IWUSR); - // Maybe needless. - umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); - // Mount /sys/fs/cgroup as tmpfs. - mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, NULL); - // Mount cpu controller. - mkdir("/sys/fs/cgroup/cpu", S_IRUSR | S_IWUSR); - usleep(2000); - mount("none", "/sys/fs/cgroup/cpu", "cgroup", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "cpu"); - ruri_log("{base}Tried to mount cgroup v1 cpu\n"); -} -static void mount_cgroup_v1_cpuset(void) -{ - /* - * Mount Cgroup v1 cpuset controller. - * Nothing to return because if this function run failed, - * that means cgroup is fully not supported on the device. - */ - mkdir("/sys/fs/cgroup", S_IRUSR | S_IWUSR); - // Maybe needless. - umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); - // Mount /sys/fs/cgroup as tmpfs. - mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, NULL); - // Mount cpuset controller. - mkdir("/sys/fs/cgroup/cpuset", S_IRUSR | S_IWUSR); - usleep(2000); - mount("none", "/sys/fs/cgroup/cpuset", "cgroup", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "cpuset"); - ruri_log("{base}Tried to mount cgroup v1 cpuset\n"); + mount("none", cgroup_controller_path, "cgroup", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, controller); + ruri_log("{base}Tried to mount cgroup v1 %s\n", controller); } static bool is_cgroupv2_support(const char *_Nonnull type) { @@ -111,10 +77,11 @@ static bool is_cgroupv2_support(const char *_Nonnull type) write(subtree_control_fd, "+cpuset\n", strlen("+cpuset\n")); close(subtree_control_fd); usleep(200); - // Check if we have a controlable cgroup for `type`. + // Check if we have a controllable cgroup for `type`. mkdir("/sys/fs/cgroup/ruri", S_IRUSR | S_IWUSR); int fd = open("/sys/fs/cgroup/ruri/cgroup.controllers", O_RDONLY | O_CLOEXEC); if (fd < 0) { + rmdir("/sys/fs/cgroup/ruri"); umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); ruri_log("{base}Cgroup v2 does not support %s\n", type); ruri_log("{base}cgroup.controllers does not exist\n"); @@ -124,12 +91,7 @@ static bool is_cgroupv2_support(const char *_Nonnull type) ssize_t len = read(fd, buf, 255); if (len <= 0) { close(fd); - umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); - ruri_log("{base}Cgroup v2 does not support %s\n", type); - ruri_log("{base}cgroup.controllers read failed\n"); - return false; - } - if (len <= 0) { + rmdir("/sys/fs/cgroup/ruri"); umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); ruri_log("{base}Cgroup v2 does not support %s\n", type); ruri_log("{base}cgroup.controllers read failed\n"); @@ -143,10 +105,12 @@ static bool is_cgroupv2_support(const char *_Nonnull type) // If str_to_find is in buf and str_to_find+1 is space (' ') or end ('\0'). // We return true. if (strstr(buf, str_to_find) != NULL && (strstr(buf, str_to_find)[strlen(str_to_find)] == ' ' || strstr(buf, str_to_find)[strlen(str_to_find)] == '\0')) { + rmdir("/sys/fs/cgroup/ruri"); umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); ruri_log("{base}Cgroup v2 supports %s\n", type); return true; } + rmdir("/sys/fs/cgroup/ruri"); umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); ruri_log("{base}Cgroup v2 does not support %s\n", type); return false; @@ -178,6 +142,60 @@ static char *memory_to_bytes(const char *_Nonnull memory) free(memory_dup); return ret; } +// Returns 1 for failed open(). //FIXME WIP +static int cgroup_v1_attach(const struct RURI_CONTAINER *_Nonnull container, const char *_Nonnull controller) +{ + pid_t pid = getpid(); + char buf[128] = { '\0' }; + char cgroup_path[PATH_MAX] = { '\0' }; + sprintf(memory_cgroup_path, "/sys/fs/cgroup/%s/%d", controller, container->container_id); + mkdir(memory_cgroup_path, S_IRUSR | S_IWUSR); + usleep(200); + int fd = -1; + if (container->memory != NULL) { + // Set memory limit. + char memory_cgroup_limit_path[PATH_MAX] = { '\0' }; + sprintf(memory_cgroup_limit_path, "/sys/fs/cgroup/memory/%d/memory.limit_in_bytes", container->container_id); + fd = open(memory_cgroup_limit_path, O_RDWR | O_CLOEXEC); + if (fd < 0 && !container->no_warnings) { + ruri_warning("{yellow}Set memory limit failed{clear}\n"); + umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); + return; + } + char *memory = memory_to_bytes(container->memory); + sprintf(buf, "%s\n", memory); + if (write(fd, buf, strlen(buf)) < 0 && !container->no_warnings) { + ruri_warning("{yellow}Set memory limit failed{clear}\n"); + } + free(memory); + close(fd); + char memory_oom[PATH_MAX] = { '\0' }; + sprintf(memory_oom, "/sys/fs/cgroup/memory/%d/memory.oom_control", container->container_id); + fd = open(memory_oom, O_RDWR | O_CLOEXEC); + if (fd < 0 && !container->no_warnings) { + ruri_warning("{yellow}Set memory limit failed{clear}\n"); + umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); + return; + } + sprintf(buf, "1\n"); + if (write(fd, buf, strlen(buf)) < 0 && !container->no_warnings) { + ruri_warning("{yellow}Set memory limit failed{clear}\n"); + } + close(fd); + } + char memory_cgroup_procs_path[PATH_MAX] = { '\0' }; + sprintf(memory_cgroup_procs_path, "/sys/fs/cgroup/memory/%d/cgroup.procs", container->container_id); + // Add pid to container_id memory cgroup. + fd = open(memory_cgroup_procs_path, O_RDWR | O_CLOEXEC); + if (fd < 0) { + umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); + return; + } + sprintf(buf, "%d\n", pid); + write(fd, buf, strlen(buf)); + close(fd); + umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); +} static void set_cgroup_v1_memory(const struct RURI_CONTAINER *_Nonnull container) { /* @@ -186,7 +204,8 @@ static void set_cgroup_v1_memory(const struct RURI_CONTAINER *_Nonnull container * Control file: * /sys/fs/cgroup/memory/${container_id}/memory.limit_in_bytes */ - mount_cgroup_v1_memory(); + mount_cgroup_v1("memory"); + pid_t pid = getpid(); char buf[128] = { '\0' }; char memory_cgroup_path[PATH_MAX] = { '\0' }; @@ -247,7 +266,7 @@ static void set_cgroup_v1_cpu(const struct RURI_CONTAINER *_Nonnull container) * /sys/fs/cgroup/cpu/${container_id}/cpu.cfs_quota_us * /sys/fs/cgroup/cpu/${container_id}/cpu.cfs_period_us */ - mount_cgroup_v1_cpu(); + mount_cgroup_v1("cpu"); char cpu_cgroup_path[PATH_MAX] = { '\0' }; sprintf(cpu_cgroup_path, "/sys/fs/cgroup/cpu/%d", container->container_id); mkdir(cpu_cgroup_path, S_IRUSR | S_IWUSR); @@ -304,7 +323,7 @@ static void set_cgroup_v1_cpuset(const struct RURI_CONTAINER *_Nonnull container * Nothing to return, only warnings to show if cgroup is not supported. * Control file: /sys/fs/cgroup/cpuset/${container_id}/cpuset.cpus */ - mount_cgroup_v1_cpuset(); + mount_cgroup_v1("cpuset"); char cpuset_cgroup_path[PATH_MAX] = { '\0' }; sprintf(cpuset_cgroup_path, "/sys/fs/cgroup/cpuset/%d", container->container_id); mkdir(cpuset_cgroup_path, S_IRUSR | S_IWUSR); @@ -354,6 +373,31 @@ static void set_cgroup_v1_cpuset(const struct RURI_CONTAINER *_Nonnull container // Do not keep the apifs mounted. umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); } +// Returns 1 for failed open(). +static int cgroup_v2_attach(const struct RURI_CONTAINER *_Nonnull container) +{ + pid_t pid = getpid(); + char buf[128] = { '\0' }; + char cgroup_path[PATH_MAX] = { '\0' }; + sprintf(cgroup_path, "/sys/fs/cgroup/%d", container->container_id); + mkdir(cgroup_path, S_IRUSR | S_IWUSR); + usleep(200); + // Add pid to container_id cgroup. + char cgroup_procs_path[PATH_MAX] = { '\0' }; + sprintf(cgroup_procs_path, "/sys/fs/cgroup/%d/cgroup.procs", container->container_id); + int fd = open(cgroup_procs_path, O_RDWR | O_CLOEXEC); + if (fd < 0 && !container->no_warnings) { + ruri_warning("{yellow}Set cgroup.procs failed{clear}\n"); + // Report failure for apifs cleanup + return 1; + } + sprintf(buf, "%d\n", pid); + if (write(fd, buf, strlen(buf)) < 0 && !container->no_warnings) { + ruri_warning("{yellow}Set cgroup.procs failed{clear}\n"); + } + close(fd); + return 0; +} static void set_cgroup_v2_memory(const struct RURI_CONTAINER *_Nonnull container) { /* @@ -366,36 +410,24 @@ static void set_cgroup_v2_memory(const struct RURI_CONTAINER *_Nonnull container umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); // Mount /sys/fs/cgroup as cgroup2. mount("none", "/sys/fs/cgroup", "cgroup2", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, NULL); - pid_t pid = getpid(); - char buf[128] = { '\0' }; - char cgroup_path[PATH_MAX] = { '\0' }; - sprintf(cgroup_path, "/sys/fs/cgroup/%d", container->container_id); - mkdir(cgroup_path, S_IRUSR | S_IWUSR); - usleep(200); - char cgroup_procs_path[PATH_MAX] = { '\0' }; - sprintf(cgroup_procs_path, "/sys/fs/cgroup/%d/cgroup.procs", container->container_id); // Add pid to container_id cgroup. - int fd = open(cgroup_procs_path, O_RDWR | O_CLOEXEC); - if (fd < 0 && !container->no_warnings) { - ruri_warning("{yellow}Set cgroup.procs failed{clear}\n"); + if (cgroup_v2_attach(container)) { // Do not keep the apifs mounted. umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); return; } - sprintf(buf, "%d\n", pid); - write(fd, buf, strlen(buf)); - close(fd); if (container->memory != NULL) { // Set memory limit. char cgroup_memlimit_path[PATH_MAX] = { '\0' }; sprintf(cgroup_memlimit_path, "/sys/fs/cgroup/%d/memory.high", container->container_id); - fd = open(cgroup_memlimit_path, O_RDWR | O_CLOEXEC); + int fd = open(cgroup_memlimit_path, O_RDWR | O_CLOEXEC); if (fd < 0 && !container->no_warnings) { ruri_warning("{yellow}Set memory limit failed{clear}\n"); // Do not keep the apifs mounted. umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); return; } + char buf[256] = { '\0' }; sprintf(buf, "%s\n", container->memory); if (write(fd, buf, strlen(buf)) < 0 && !container->no_warnings) { ruri_warning("{yellow}Set memory limit failed{clear}\n"); @@ -444,38 +476,24 @@ static void set_cgroup_v2_cpuset(const struct RURI_CONTAINER *_Nonnull container umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); // Mount /sys/fs/cgroup as cgroup2. mount("none", "/sys/fs/cgroup", "cgroup2", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, NULL); - pid_t pid = getpid(); - char buf[128] = { '\0' }; - char cgroup_path[PATH_MAX] = { '\0' }; - sprintf(cgroup_path, "/sys/fs/cgroup/%d", container->container_id); - mkdir(cgroup_path, S_IRUSR | S_IWUSR); - usleep(200); - char cgroup_procs_path[PATH_MAX] = { '\0' }; - sprintf(cgroup_procs_path, "/sys/fs/cgroup/%d/cgroup.procs", container->container_id); // Add pid to container_id cgroup. - int fd = open(cgroup_procs_path, O_RDWR | O_CLOEXEC); - if (fd < 0 && !container->no_warnings) { - ruri_warning("{yellow}Set cgroup.procs failed{clear}\n"); + if (cgroup_v2_attach(container)) { // Do not keep the apifs mounted. umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); return; } - sprintf(buf, "%d\n", pid); - if (write(fd, buf, strlen(buf)) < 0 && !container->no_warnings) { - ruri_warning("{yellow}Set cgroup.procs failed{clear}\n"); - } - close(fd); if (container->cpuset != NULL) { // Set cpuset limit. char cgroup_cpuset_path[PATH_MAX] = { '\0' }; sprintf(cgroup_cpuset_path, "/sys/fs/cgroup/%d/cpuset.cpus", container->container_id); - fd = open(cgroup_cpuset_path, O_RDWR | O_CLOEXEC); + int fd = open(cgroup_cpuset_path, O_RDWR | O_CLOEXEC); if (fd < 0 && !container->no_warnings) { ruri_warning("{yellow}Set cpuset limit failed{clear}\n"); // Do not keep the apifs mounted. umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); return; } + char buf[256] = { '\0' }; sprintf(buf, "%s\n", container->cpuset); if (write(fd, buf, strlen(buf)) < 0 && !container->no_warnings) { ruri_warning("{yellow}Set cpuset limit failed{clear}\n"); @@ -497,38 +515,24 @@ static void set_cgroup_v2_cpu(const struct RURI_CONTAINER *_Nonnull container) umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); // Mount /sys/fs/cgroup as cgroup2. mount("none", "/sys/fs/cgroup", "cgroup2", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, NULL); - pid_t pid = getpid(); - char buf[128] = { '\0' }; - char cgroup_path[PATH_MAX] = { '\0' }; - sprintf(cgroup_path, "/sys/fs/cgroup/%d", container->container_id); - mkdir(cgroup_path, S_IRUSR | S_IWUSR); - usleep(200); - char cgroup_procs_path[PATH_MAX] = { '\0' }; - sprintf(cgroup_procs_path, "/sys/fs/cgroup/%d/cgroup.procs", container->container_id); // Add pid to container_id cgroup. - int fd = open(cgroup_procs_path, O_RDWR | O_CLOEXEC); - if (fd < 0 && !container->no_warnings) { - ruri_warning("{yellow}Set cgroup.procs failed{clear}\n"); + if (cgroup_v2_attach(container)) { // Do not keep the apifs mounted. umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); return; } - sprintf(buf, "%d\n", pid); - if (write(fd, buf, strlen(buf)) < 0 && !container->no_warnings) { - ruri_warning("{yellow}Set cgroup.procs failed{clear}\n"); - } - close(fd); if (container->cpupercent > 0) { // Set cpuset limit. char cgroup_cpu_path[PATH_MAX] = { '\0' }; sprintf(cgroup_cpu_path, "/sys/fs/cgroup/%d/cpu.max", container->container_id); - fd = open(cgroup_cpu_path, O_RDWR | O_CLOEXEC); + int fd = open(cgroup_cpu_path, O_RDWR | O_CLOEXEC); if (fd < 0 && !container->no_warnings) { ruri_warning("{yellow}Set cpupercent limit failed{clear}\n"); // Do not keep the apifs mounted. umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); return; } + char buf[256] = { '\0' }; sprintf(buf, "%d 100000\n", container->cpupercent * 1000); if (write(fd, buf, strlen(buf)) < 0 && !container->no_warnings) { ruri_warning("{yellow}Set cpupercent limit failed{clear}\n"); @@ -538,6 +542,23 @@ static void set_cgroup_v2_cpu(const struct RURI_CONTAINER *_Nonnull container) // Do not keep the apifs mounted. umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); } +void ruri_attach_cgroup_v2(const struct RURI_CONTAINER *_Nonnull container) +{ + /* + * Mount cgroupv2 hierarchy and attach process. + * Nothing to return, only warnings to show if cgroup is not supported. + * Control file: /sys/fs/cgroup/${container_id}/cgroup.procs + */ + mkdir("/sys/fs/cgroup", S_IRUSR | S_IWUSR); + // Maybe needless. + umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); + // Mount /sys/fs/cgroup as cgroup2. + mount("none", "/sys/fs/cgroup", "cgroup2", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, NULL); + // Add pid to container_id cgroup. + cgroup_v2_attach(container); + // Do not keep the apifs mounted. + umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); +} void ruri_set_limit(const struct RURI_CONTAINER *_Nonnull container) { /* @@ -568,3 +589,43 @@ void ruri_set_limit(const struct RURI_CONTAINER *_Nonnull container) mount("tmpfs", "/sys/fs", "tmpfs", MS_RDONLY, NULL); } } +void ruri_kill_container_with_cgroup_v2(const struct RURI_CONTAINER *_Nonnull container) +{ + /* + * Kill all processes in corresponding cgroup v2 structure. + * FIXME what should i return? + */ + // Umount the mask of /sys/fs + if (!container->unmask_dirs) { + umount2("/sys/fs", MNT_DETACH | MNT_FORCE); + } + if (!is_cgroupv2_support("kill")) { + ; // FIXME + } + mkdir("/sys/fs/cgroup", S_IRUSR | S_IWUSR); + // Maybe needless. + umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); + // Mount /sys/fs/cgroup as cgroup2. + mount("none", "/sys/fs/cgroup", "cgroup2", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, NULL); + char cgroup_path[PATH_MAX] = { '\0' }; + sprintf(cgroup_path, "/sys/fs/cgroup/%d", container->container_id); + // FIXME detect dir existence instead of mkdir + usleep(200); + char cgroup_kill_path[PATH_MAX] = { '\0' }; + sprintf(cgroup_kill_path, "/sys/fs/cgroup/%d/cgroup.kill", container->container_id); + // Pid should be added beforehand. + int fd = open(cgroup_kill_path, O_RDWR | O_CLOEXEC); + if ((fd < 0 || write(fd, "1\n", strlen("1\n")) < 0) && !container->no_warnings) { + ruri_warning("{yellow}Set cgroup.procs failed{clear}\n"); + } + if (fd < 0) { + close(fd); + } + // Do not keep the apifs mounted. + umount2("/sys/fs/cgroup", MNT_DETACH | MNT_FORCE); + // Mask /sys/fs again. + if (!container->unmask_dirs) { + mount("tmpfs", "/sys/fs", "tmpfs", MS_RDONLY, NULL); + } +} +// TODO void container_ps_with_cgroup_v2(const