From 6861d6a495b220909f01f5598a9671ff8b25f179 Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 21 Jun 2024 18:36:35 +0800 Subject: [PATCH 1/2] container --- .../kvm_watcher/include/bpf/container.h | 127 ++++++++++++++++++ eBPF_Supermarket/kvm_watcher/include/common.h | 12 +- .../kvm_watcher/src/kvm_watcher.bpf.c | 25 ++++ .../kvm_watcher/src/kvm_watcher.c | 22 ++- 4 files changed, 184 insertions(+), 2 deletions(-) create mode 100644 eBPF_Supermarket/kvm_watcher/include/bpf/container.h diff --git a/eBPF_Supermarket/kvm_watcher/include/bpf/container.h b/eBPF_Supermarket/kvm_watcher/include/bpf/container.h new file mode 100644 index 000000000..b68ded094 --- /dev/null +++ b/eBPF_Supermarket/kvm_watcher/include/bpf/container.h @@ -0,0 +1,127 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: yys2020haha@163.com +// +// Kernel space BPF program used for counting container sys_entry/sys_exit info. + +#ifndef __CONTAINER_H +#define __CONTAINER_H + +#include "common.h" +#include "vmlinux.h" +#include +#include +#include +struct { + __uint(type,BPF_MAP_TYPE_HASH); + __uint(max_entries, 8192); + __type(key, pid_t); + __type(value, u64); +}time_info SEC(".maps"); +struct { + __uint(type,BPF_MAP_TYPE_HASH); + __uint(max_entries, 8192); + __type(key, pid_t); + __type(value, u64); +}id SEC(".maps"); +static int trace_container_sys_entry(struct trace_event_raw_sys_enter *args){ + u64 st = bpf_ktime_get_ns(); + pid_t pid = bpf_get_current_pid_tgid(); + u64 syscall_id = (u64)args->id; + bpf_map_update_elem(&time_info,&pid,&st,BPF_ANY); + bpf_map_update_elem(&id,&pid,&syscall_id,BPF_ANY); + return 0; +} +static int trace_container_sys_exit(struct trace_event_raw_sys_exit *args,void *rb,struct common_event *e){ + u64 exit_time = bpf_ktime_get_ns(); + pid_t pid = bpf_get_current_pid_tgid(); + //bpf_printk("pid=%15d\n",pid); + u64 delay,start_time,syscallid; + u64 *st = bpf_map_lookup_elem(&time_info,&pid); + if( st !=0){ + start_time = *st; + delay = (exit_time - start_time)/1000; + bpf_map_delete_elem(&time_info, &pid); + }else{ + return 0; + } + u64 *sc_id = bpf_map_lookup_elem(&id,&pid); + if( sc_id !=0){ + syscallid = *sc_id; + bpf_map_delete_elem(&id, &pid); + }else{ + return 0; + } + RESERVE_RINGBUF_ENTRY(rb, e); + e->syscall_data.delay = delay; + //bpf_get_current_comm(&e->syscall_data.comm, sizeof(e->syscall_data.comm)); + e->syscall_data.pid = pid; + e->syscall_data.syscall_id = syscallid; + bpf_ringbuf_submit(e, 0); + return 0; +} +#define MAX_NODENAME_LEN 64 +struct data_t { + char nodename[MAX_NODENAME_LEN]; +}; +// 字符串比较函数 +static bool str_not_equal(const char *s1, const char *s2) { + #pragma clang loop unroll(full) + for (int i = 0; i < MAX_NODENAME_LEN; i++) { + if (s1[i] != s2[i]) { + return true; + } + if (s1[i] == '\0') { + break; + } + } + return false; +} +static bool is_container_task(){ + struct task_struct *task; + struct nsproxy *ns; + struct uts_namespace *uts; + struct data_t data = {}; + + // 获取当前任务的 task_struct + task = (struct task_struct *)bpf_get_current_task(); + + // 获取 nsproxy + bpf_probe_read_kernel(&ns, sizeof(ns), &task->nsproxy); + if (!ns) { + return false; + } + + // 获取 uts_namespace + bpf_probe_read_kernel(&uts, sizeof(uts), &ns->uts_ns); + if (!uts) { + return false; + } + + // 读取主机名 + bpf_probe_read_kernel_str(&data.nodename, sizeof(data.nodename), uts->name.nodename); + + // 打印主机名 + //bpf_printk("Hostname: %s\n", data.nodename); + const char target_nodename[] = "yys-virtual-machine"; + if (str_not_equal(data.nodename, target_nodename)) { + bpf_printk("Hostname: %s\n", data.nodename); + return true; + } else { + return false; + } + +} +#endif /* __CONTAINER_H */ diff --git a/eBPF_Supermarket/kvm_watcher/include/common.h b/eBPF_Supermarket/kvm_watcher/include/common.h index 8c5f89006..6ecb7c261 100644 --- a/eBPF_Supermarket/kvm_watcher/include/common.h +++ b/eBPF_Supermarket/kvm_watcher/include/common.h @@ -1,4 +1,4 @@ -// Copyright 2023 The LMP Authors. +// Copyright 2023 The LMP Authors.#define TASK_COMM_ // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -94,6 +94,7 @@ static const char binary_path[] = "/bin/qemu-system-x86_64"; #define PFERR_RSVD_MASK (1UL << 3) // mmio + // 定时器模式 #define APIC_LVT_TIMER_ONESHOT (0 << 17) // 单次触发 #define APIC_LVT_TIMER_PERIODIC (1 << 17) // 周期性触发模式 @@ -232,6 +233,7 @@ struct process { char comm[TASK_COMM_LEN]; }; + enum EventType { NONE_TYPE, VCPU_WAKEUP, @@ -244,6 +246,7 @@ enum EventType { IRQ_INJECT, HYPERCALL, IOCTL, + CONTAINER_SYSCALL, TIMER, } event_type; @@ -342,6 +345,13 @@ struct common_event { __u32 vcpu_id; // HYPERCALL 特有成员 } hypercall_data; + + struct{ + __u64 pid; + __u64 syscall_id; + __u64 delay; + + } syscall_data; }; }; diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c index 8e57cb7ae..b95b051e6 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c @@ -27,6 +27,7 @@ #include "kvm_mmu.h" #include "kvm_irq.h" #include "kvm_hypercall.h" +#include "container.h" char LICENSE[] SEC("license") = "Dual BSD/GPL"; @@ -243,3 +244,27 @@ int BPF_KPROBE(kp_start_sw_timer, struct kvm_lapic *apic) { CHECK_PID(vm_pid); return trace_start_sw_timer(apic); } + +//采集容器的系统用调用信息 +SEC("tracepoint/raw_syscalls/sys_enter") +int tp_container_sys_entry(struct trace_event_raw_sys_enter *args){ + //过滤进程 + bool is_container = is_container_task(); + if(is_container){ + return trace_container_sys_entry(args); + }else{ + return 0; + } + +} +SEC("tracepoint/raw_syscalls/sys_exit") +int tracepoint__syscalls__sys_exit(struct trace_event_raw_sys_exit *args){ + //过滤进程 + bool is_container = is_container_task(); + if(is_container){ + return trace_container_sys_exit(args,&rb,e); + }else{ + return 0; + } + +} \ No newline at end of file diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index 1d08fb50f..c254037a0 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -329,6 +329,7 @@ static struct env { bool execute_timer; bool verbose; bool show; + bool execute_container_syscall; int monitoring_time; pid_t vm_pid; enum EventType event_type; @@ -349,6 +350,7 @@ static struct env { .monitoring_time = 0, .vm_pid = -1, .show = false, + .execute_container_syscall = false, .event_type = NONE_TYPE, }; @@ -359,6 +361,7 @@ int option_selected = 0; // 功能标志变量,确保激活子功能 // 具体解释命令行参数 static const struct argp_option opts[] = { {"vcpu_wakeup", 'w', NULL, 0, "Monitoring the wakeup of vcpu."}, + {"container_syscall", 'a', NULL, 0, "Monitoring the syscall of container."}, {"vcpu_load", 'o', NULL, 0, "Monitoring the load of vcpu."}, {"vm_exit", 'e', NULL, 0, "Monitoring the event of vm exit(including exiting to KVM and user " @@ -392,6 +395,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) { case 's': env.show = true; break; + case 'a': + SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_container_syscall); + break; case 'H': argp_state_help(state, stderr, ARGP_HELP_STD_HELP); break; @@ -516,6 +522,8 @@ static int determineEventType(struct env *env) { env->event_type = VCPU_LOAD; } else if (env->execute_timer) { env->event_type = TIMER; + }else if(env->execute_container_syscall){ + env->event_type = CONTAINER_SYSCALL; } else { env->event_type = NONE_TYPE; // 或者根据需要设置一个默认的事件类型 } @@ -544,6 +552,11 @@ static int handle_event(void *ctx, void *data, size_t data_sz) { case VCPU_LOAD: { break; } + case CONTAINER_SYSCALL:{ + printf("%-15u %-15lld %-15lld \n", + e->syscall_data.pid,e->syscall_data.delay,e->syscall_data.syscall_id); + break; + } case HALT_POLL: { // 使用 e->halt_poll_data 访问 HALT_POLL 特有成员 printf("%-18.6f %-15s %-6d/%-8d %-10s %-7d %-7d --> %d \n", @@ -754,6 +767,10 @@ static int print_event_head(struct env *env) { "DUR_HALT(ms)", "COMM", "PID/TID", "VCPU_ID", "WAIT/POLL", "VAILD?"); break; + case CONTAINER_SYSCALL: + printf("%-8s %-18s %6s %15s\n", "PID", + "DELAY(ns)", "SyscallID", "COMM"); + break; case EXIT: //可视化调整输出格式 // printf("Waiting vm_exit ... \n"); @@ -862,7 +879,10 @@ static void set_disable_load(struct kvm_watcher_bpf *skel) { if (env.execute_hypercall) { SET_KP_OR_FENTRY_LOAD(kvm_emulate_hypercall, kvm); } - + bpf_program__set_autoload(skel->progs.tp_container_sys_entry, + env.execute_container_syscall ? true : false); + bpf_program__set_autoload(skel->progs.tracepoint__syscalls__sys_exit, + env.execute_container_syscall ? true : false); bpf_program__set_autoload(skel->progs.tp_vcpu_wakeup, env.execute_vcpu_wakeup ? true : false); bpf_program__set_autoload(skel->progs.tp_exit, From e088d4662c392c2ef2231e749523678532818db5 Mon Sep 17 00:00:00 2001 From: YYS Date: Fri, 5 Jul 2024 16:06:48 +0800 Subject: [PATCH 2/2] container proc --- .../eBPF_proc_image/bpf/syscall_image.bpf.c | 66 +++++++++++++++++-- .../eBPF_proc_image/controller.c | 10 ++- .../eBPF_proc_image/include/proc_image.h | 1 + .../eBPF_proc_image/proc_image.c | 16 ++++- 4 files changed, 86 insertions(+), 7 deletions(-) diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/syscall_image.bpf.c b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/syscall_image.bpf.c index c33ef5f88..b608d77be 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/syscall_image.bpf.c +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/syscall_image.bpf.c @@ -23,8 +23,9 @@ #include "proc_image.h" char LICENSE[] SEC("license") = "Dual BSD/GPL"; - +#define MAX_NODENAME_LEN 64 const volatile pid_t ignore_tgid = -1; +const volatile char hostname[MAX_NODENAME_LEN] = ""; const int key = 0; pid_t pre_target_pid = -1;//上一个监测的进程; int pre_target_tgid = -1;//上一个监测的进程组; @@ -48,14 +49,69 @@ struct { __uint(max_entries,256 * 10240); } syscall_rb SEC(".maps"); +struct { + __uint(type,BPF_MAP_TYPE_HASH); + __uint(max_entries, 8192); + __type(key, pid_t); + __type(value,struct container_id); +}container_id_map SEC(".maps"); + +struct container_id{ + char container_id[20]; +}; + +struct data_t { + char nodename[MAX_NODENAME_LEN]; +}; + +static bool is_container_task(const volatile char hostname[MAX_NODENAME_LEN]){ + struct task_struct *task; + struct nsproxy *ns; + struct uts_namespace *uts; + struct data_t data = {}; + // 获取当前任务的 task_struct + task = (struct task_struct *)bpf_get_current_task(); + // 获取 nsproxy + bpf_probe_read_kernel(&ns, sizeof(ns), &task->nsproxy); + if (!ns) { + return false; + } + // 获取 uts_namespace + bpf_probe_read_kernel(&uts, sizeof(uts), &ns->uts_ns); + if (!uts) { + return false; + } + // 读取主机名 + bpf_probe_read_kernel_str(&data.nodename, sizeof(data.nodename), uts->name.nodename); + // 打印主机名 + bool is_equal = true; + for(int i = 0;isc_func) + if(!sc_ctrl || !sc_ctrl->sc_func) return 0; - + if(sc_ctrl->is_container) + if(!is_container_task(hostname)) + return 0; pid_t pid = bpf_get_current_pid_tgid(); int tgid = bpf_get_current_pid_tgid() >> 32; @@ -104,7 +160,9 @@ int sys_exit(struct trace_event_raw_sys_exit *args) sc_ctrl = bpf_map_lookup_elem(&sc_ctrl_map,&key); if(!sc_ctrl || !sc_ctrl->sc_func) return 0; - + if(sc_ctrl->is_container) + if(!is_container_task(hostname)) + return 0; pid_t pid = bpf_get_current_pid_tgid(); int tgid = bpf_get_current_pid_tgid() >> 32; diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/controller.c b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/controller.c index 121a8fdb1..718feda25 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/controller.c +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/controller.c @@ -49,6 +49,7 @@ static struct env { bool enable_lock; bool enable_syscall; bool enable_schedule; + bool is_container; } env = { .usemode = 0, .pid = -1, @@ -68,6 +69,7 @@ static struct env { .enable_lock = false, .enable_syscall = false, .enable_schedule = false, + .is_container = false, }; const char argp_program_doc[] ="Trace process to get process image.\n"; @@ -78,6 +80,7 @@ static const struct argp_option opts[] = { { "finish", 'f', NULL, 0, "Finish to run eBPF tool" }, { "pid", 'p', "PID", 0, "Process ID to trace" }, { "tgid", 'P', "TGID", 0, "Thread group to trace" }, + { "containerproc", 'o', NULL, 0, "Thread of containerproc to trace" }, { "cpuid", 'c', "CPUID", 0, "Set For Tracing per-CPU Process(other processes don't need to set this parameter)" }, { "time", 't', "TIME-SEC", 0, "Max Running Time(0 for infinite)" }, { "myproc", 'm', NULL, 0, "Trace the process of the tool itself (not tracked by default)" }, @@ -143,6 +146,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'm': env.enable_myproc = true; break; + case 'o': + env.is_container = true; + break; case 'r': env.enable_resource = true; break; @@ -201,7 +207,7 @@ int deactivate_mode(){ } if(env.enable_syscall){ - struct sc_ctrl sc_ctrl = {false,false,-1,-1,0}; + struct sc_ctrl sc_ctrl = {false,false,false,-1,-1,0}; err = update_sc_ctrl_map(sc_ctrl); if(err < 0) return err; } @@ -257,7 +263,7 @@ int main(int argc, char **argv) } if(env.enable_syscall){ - struct sc_ctrl sc_ctrl = {true,env.enable_myproc,env.pid,env.tgid,env.syscalls}; + struct sc_ctrl sc_ctrl = {true,env.enable_myproc, env.is_container,env.pid,env.tgid,env.syscalls}; err = update_sc_ctrl_map(sc_ctrl); if(err < 0) return err; } diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/proc_image.h b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/proc_image.h index ecfc1905f..589ca5063 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/proc_image.h +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/proc_image.h @@ -65,6 +65,7 @@ struct total_rsc{ struct sc_ctrl { bool sc_func; bool enable_myproc; + bool is_container; pid_t target_pid; pid_t target_tgid; int syscalls; diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/proc_image.c b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/proc_image.c index b52a77712..9577b3482 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/proc_image.c +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/proc_image.c @@ -65,6 +65,7 @@ static struct env { int lock_prev_tgid; int sched_prev_tgid; int sc_prev_tgid; + char hostname[64]; } env = { .output_resourse = false, .output_schedule = false, @@ -88,6 +89,7 @@ static struct env { .lock_prev_tgid = 0, .sched_prev_tgid = 0, .sc_prev_tgid = 0, + .hostname = "", }; struct hashmap *map = NULL; @@ -723,6 +725,16 @@ static void sig_handler(int signo) exiting = true; } +void get_hostname() { + char hostname[64]; + int result = gethostname(hostname, sizeof(hostname)); + if (result == 0) { + strcpy(env.hostname,hostname); + } else { + perror("gethostname"); + } +} + int main(int argc, char **argv) { struct resource_image_bpf *resource_skel = NULL; @@ -802,7 +814,9 @@ int main(int argc, char **argv) } syscall_skel->rodata->ignore_tgid = env.ignore_tgid; - + get_hostname(); + strcpy(syscall_skel->rodata->hostname,env.hostname); + err = syscall_image_bpf__load(syscall_skel); if (err) { fprintf(stderr, "Failed to load and verify BPF syscall skeleton\n");