From de36de67f3a61ed2467f111ef7f537435d66cb8c Mon Sep 17 00:00:00 2001
From: hov1417 <hovhannes1417@gmail.com>
Date: Mon, 22 May 2023 11:46:58 +0400
Subject: [PATCH 1/3] Add: eBPF profiling

---
 .gitignore            |   2 +
 CMakeLists.txt        |   4 +-
 Dockerfile            |  22 +-
 README.md             |   6 +
 cmake/leveldb.cmake   |   2 +
 ebpf/ebpf.py          | 521 ++++++++++++++++++++++++++++++++++++++++++
 ebpf/ebpf_main.c      | 419 +++++++++++++++++++++++++++++++++
 run.py                | 107 +++++++--
 src/bench.cxx         |  30 +++
 src/core/settings.hpp |   1 +
 10 files changed, 1075 insertions(+), 39 deletions(-)
 create mode 100644 ebpf/ebpf.py
 create mode 100644 ebpf/ebpf_main.c

diff --git a/.gitignore b/.gitignore
index 314bde06..f1ed02af 100644
--- a/.gitignore
+++ b/.gitignore
@@ -56,3 +56,5 @@ code-review*.bak
 *.exe
 *.out
 *.app
+
+**/__pycache__/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2f138cf1..26f58d5d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -131,11 +131,11 @@ add_executable(ucsb_bench ./src/bench.cxx)
 
 if(${UCSB_BUILD_USTORE})
   # Choose engine. Available engines: UCSET, ROCKSDB, LEVELDB, UDISK
-  set(USTORE_ENGINE_NAME UCSET) 
+  set(USTORE_ENGINE_NAME UCSET)
 
   include("${CMAKE_MODULE_PATH}/ustore.cmake")
   list(APPEND UCSB_DB_LIBS "ustore")
-  target_compile_definitions(ucsb_bench PUBLIC UCSB_HAS_USTORE=1) 
+  target_compile_definitions(ucsb_bench PUBLIC UCSB_HAS_USTORE=1)
 endif()
 
 if(${UCSB_BUILD_ROCKSDB})
diff --git a/Dockerfile b/Dockerfile
index 140f2847..06249493 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,36 +4,34 @@ MAINTAINER unum@cloud.com
 
 WORKDIR ./
 
-RUN apt-get update
+RUN apt update
 ENV DEBIAN_FRONTEND noninteractive
 
 # Install tools
 RUN apt install -y python3-pip
 RUN pip3 install cmake
-RUN pip3 install conan
 RUN apt install -y gcc-10
 RUN apt install -y g++-10
-RUN apt-get install -y libexplain-dev
-RUN apt-get install -y libsnappy-dev
-RUN apt-get install -yq pkg-config
-RUN apt-get install -y git
+RUN apt install -y libexplain-dev
+RUN apt install -y libsnappy-dev
+RUN apt install -y pkg-config
+RUN apt install -y git
 
 # Build WiredTiger (latest)
-RUN git clone git://github.com/wiredtiger/wiredtiger.git
+RUN git clone --depth 1 git@github.com:wiredtiger/wiredtiger.git
 RUN mkdir ./wiredtiger/build
 WORKDIR "./wiredtiger/build"
-RUN cmake ../.
+RUN cmake ..
 RUN make install
 
 WORKDIR /
 
 # Build UCSB
-RUN git clone https://github.com/unum-cloud/ucsb.git
+RUN git clone --depth 1 git@github.com:unum-cloud/ucsb.git
 WORKDIR "./ucsb/"
 RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 10
 RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 10
-RUN bash -i setup.sh
 RUN bash -i build_release.sh
-RUN rm -rf ./bench
+RUN rm -rf ./bench/results
 
-ENTRYPOINT ["./build_release/bin/ucsb_bench"]
+ENTRYPOINT ["./build_release/build/bin/ucsb_bench"]
diff --git a/README.md b/README.md
index 9dc279a4..46596e6d 100644
--- a/README.md
+++ b/README.md
@@ -41,6 +41,12 @@ The outputs will be placed in the `bench/results/` folder.
 git clone https://github.com/unum-cloud/ucsb.git && cd ucsb && ./run.py
 ```
 
+### eBPF
+
+There is an optional eBPF-based profiling, which can be enabled with `--with-ebpf` and `--with-ebpf-memory` flags,
+before execution make sure you have `bcc` [installed](https://github.com/iovisor/bcc/blob/master/INSTALL.md) at least version 0.21.0.
+
+
 ## Supported Databases
 
 Key-Value Stores and NoSQL databases differ in supported operations.
diff --git a/cmake/leveldb.cmake b/cmake/leveldb.cmake
index ea21444f..9e210799 100644
--- a/cmake/leveldb.cmake
+++ b/cmake/leveldb.cmake
@@ -27,4 +27,6 @@ if(NOT leveldb_POPULATED)
     add_subdirectory(${leveldb_SOURCE_DIR} ${leveldb_BINARY_DIR} EXCLUDE_FROM_ALL)
 endif()
 
+add_compile_options(-g)
+
 include_directories(${leveldb_SOURCE_DIR}/include)
diff --git a/ebpf/ebpf.py b/ebpf/ebpf.py
new file mode 100644
index 00000000..0e233e5a
--- /dev/null
+++ b/ebpf/ebpf.py
@@ -0,0 +1,521 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import logging
+import os
+import signal
+import resource
+import sys
+from time import sleep, strftime, time
+from typing import Optional, Tuple
+
+import pexpect
+from bcc import BPF
+from bcc.syscall import syscall_name
+from pexpect import spawn
+
+logging.basicConfig(filename='/tmp/ebpf.log', encoding='utf-8', level=logging.DEBUG)
+
+
+def get_size_filter(min_size, max_size):
+    if min_size is not None and max_size is not None:
+        return "if (size < %d || size > %d) return 0;" % (min_size, max_size)
+    elif min_size is not None:
+        return "if (size < %d) return 0;" % min_size
+    elif max_size is not None:
+        return "if (size > %d) return 0;" % max_size
+    else:
+        return ""
+
+
+class Allocation(object):
+    def __init__(self, stack, size, address):
+        self.stack = stack
+        self.count = 1
+        self.size = size
+        self.address = address
+
+    def update(self, size):
+        self.count += 1
+        self.size += size
+
+
+def print_outstanding(bpf, pid, top, min_age_ns):
+    print(f"Top {top} stacks with outstanding allocations:")
+    alloc_info = {}
+    allocs = bpf["allocs"]
+    stack_traces = bpf["stack_traces"]
+    for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
+        if BPF.monotonic_time() - min_age_ns < info.timestamp_ns or info.stack_id < 0:
+            continue
+        if info.stack_id in alloc_info:
+            alloc_info[info.stack_id].update(info.size)
+        else:
+            stack = list(stack_traces.walk(info.stack_id))
+            combined = []
+            for addr in stack:
+                func_name = bpf.sym(addr, pid, show_module=True, show_offset=True)
+                formatted_address = ('0x' + format(addr, '016x') + '\t').encode('utf-8')
+                combined.append(formatted_address + func_name)
+            alloc_info[info.stack_id] = Allocation(combined, info.size, address.value)
+        print(f"\taddr = {address.value} size = {info.size}")
+    to_show = sorted(alloc_info.values(), key=lambda a: a.size)[-top:]
+    for alloc in to_show:
+        stack = b"\n\t\t".join(alloc.stack).decode("ascii")
+        print(f"\t{alloc.size} bytes in {alloc.count} allocations from stack\n\t\t{stack}")
+
+
+def get_outstanding(bpf, pid, min_age_ns, top):
+    alloc_info = {}
+    allocs = bpf["allocs"]
+    stack_traces = bpf["stack_traces"]
+    for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
+        if BPF.monotonic_time() - min_age_ns < info.timestamp_ns or info.stack_id < 0:
+            continue
+        if info.stack_id in alloc_info:
+            alloc_info[info.stack_id].update(info.size)
+        else:
+            stack = list(stack_traces.walk(info.stack_id))
+            combined = []
+            for addr in stack:
+                func_name = bpf.sym(addr, pid, show_module=True, show_offset=True)
+                formatted_address = ('0x' + format(addr, '016x') + ' ').encode('utf-8')
+                combined.append(formatted_address + func_name)
+            alloc_info[info.stack_id] = Allocation(combined, info.size, address.value)
+
+    sorted_stacks = sorted(alloc_info.values(), key=lambda a: -a.size)[:top]
+    return list(
+        map(lambda alloc: {'stack': [s.decode('ascii') for s in alloc.stack], 'size': alloc.size, 'count': alloc.count},
+            sorted_stacks))
+
+
+class CombinedAlloc(object):
+    def __init__(self, item):
+        self.stack_id = item[0]
+        self.free_size = item[1].free_size
+        self.alloc_size = item[1].alloc_size
+        self.number_of_frees = item[1].number_of_frees
+        self.number_of_allocs = item[1].number_of_allocs
+
+    def key(self):
+        return self.alloc_size - self.free_size
+
+    def __str__(self):
+        return f"CombinedAlloc(stack_id={self.stack_id},\n" \
+               f"\t free_size={self.free_size},\n" \
+               f"\t alloc_size={self.alloc_size},\n" \
+               f"\t number_of_frees={self.number_of_frees},\n" \
+               f"\t number_of_allocs={self.number_of_allocs})\n"
+
+    def __repr__(self):
+        return self.__str__()
+
+    def is_positive(self):
+        return self.alloc_size > self.free_size
+
+
+def print_memory_statistics(bpf, pid, top):
+    stack_traces = bpf["stack_traces"]
+    print("stack traces", len(list(stack_traces.items())))
+    combined_alloc = list(
+        sorted(
+            map(CombinedAlloc, bpf["combined_allocs"].items()),
+            key=lambda a: a.key(),
+        )
+    )
+    memory = sum((c.alloc_size - c.free_size for c in combined_alloc)) / 1024
+    print("overall, allocated", memory, "kb in", len(combined_alloc), "allocations")
+    entries = []
+    for allocation in combined_alloc[:top]:
+        trace = get_trace_info(bpf, pid, stack_traces, allocation.stack_id.value)
+        entry = f"\t{allocation.alloc_size - allocation.free_size} bytes in " \
+                f"{allocation.number_of_allocs - allocation.number_of_frees}" \
+                f" allocations from stack ({allocation.number_of_allocs + allocation.number_of_frees} allocs/frees)" \
+                f"\n\t\t{trace}"
+        entries.append(entry)
+
+    print(f"Top {top} stacks with outstanding allocations:")
+    print('\n'.join(reversed(entries)))
+
+
+def get_statistics(bpf, pid):
+    stack_traces = bpf["stack_traces"]
+    combined_alloc = list(
+        sorted(
+            map(CombinedAlloc, bpf["combined_allocs"].items()),
+            key=lambda a: a.key(),
+        )
+    )
+    memory = sum((c.alloc_size - c.free_size for c in combined_alloc))
+    entries = []
+    for allocation in combined_alloc:
+        entries.append({
+            'alloc_size': allocation.alloc_size,
+            'free_size': allocation.free_size,
+            'number_of_allocs': allocation.number_of_allocs,
+            'number_of_frees': allocation.number_of_frees,
+            'trace': get_trace_info_as_list(bpf, pid, stack_traces, allocation.stack_id.value),
+        })
+    return {
+        "memory": memory,
+        "combined_allocs": list(reversed(entries)),
+        "stack_traces": len(list(stack_traces.items()))
+    }
+
+
+def get_trace_info(bpf, pid, stack_traces, stack_id):
+    trace = []
+    for addr in walk_trace(stack_traces, stack_id):
+        sym = bpf.sym(addr, pid, show_module=False, show_offset=True)
+        trace.append(sym.decode())
+
+    trace = "\n\t\t".join(trace)
+    if not trace:
+        trace = "stack information lost"
+    return trace
+
+
+def get_trace_info_as_list(bpf, pid, stack_traces, stack_id):
+    trace = []
+    for addr in walk_trace(stack_traces, stack_id):
+        sym = bpf.sym(addr, pid, show_module=False, show_offset=True)
+        trace.append(sym.decode())
+
+    return trace
+
+
+def walk_trace(stack_traces, stack_id):
+    try:
+        return stack_traces.walk(stack_id)
+    except KeyError:
+        return []
+
+
+def print_outstanding_kernel_cache(bpf, top):
+    kernel_cache_allocs = list(
+        sorted(filter(lambda a: a[1].alloc_size > a[1].free_size, bpf['kernel_cache_counts'].items()),
+               key=lambda a: a[1].alloc_size - a[1].free_size)
+    )[:top]
+    if not kernel_cache_allocs:
+        return
+    print("---------------- Kernel Caches ---------------")
+    for (k, v) in kernel_cache_allocs:
+        print("Cache", str(k.name, "utf-8"), v.alloc_count - v.free_count, v.alloc_size - v.free_size)
+
+
+def gernel_kernel_cache(bpf, top):
+    kernel_cache_allocs = list(
+        sorted(filter(lambda a: a[1].alloc_size > a[1].free_size, bpf['kernel_cache_counts'].items()),
+               key=lambda a: a[1].alloc_size - a[1].free_size)
+    )[:top]
+    if not kernel_cache_allocs:
+        return
+    caches = []
+    to_remove = []
+    for (k, v) in kernel_cache_allocs:
+        caches.append({
+            'name': str(k.name, "utf-8"),
+            'alloc_count': v.alloc_count,
+            'free_count': v.free_count,
+            'alloc_size': v.alloc_size,
+            'free_size': v.free_size,
+        })
+        if v.alloc_count >= v.free_count:
+            to_remove.append(k)
+    if len(to_remove) > 0:
+        arr = (type(to_remove[0]) * len(to_remove))(*to_remove)
+        bpf['kernel_cache_counts'].items_delete_batch(arr)
+    return caches
+
+
+def print_syscalls(bpf, top):
+    syscall_counts = bpf["syscall_counts"]
+    print("SYSCALL                   COUNT             TIME")
+    for k, v in sorted(syscall_counts.items(), key=lambda kv: -kv[1].total_ns)[:top]:
+        print("%-22s %8d %16.3f" % (system_call_name(k), v.count, v.total_ns / 1e3))
+    syscall_counts.clear()
+
+
+def get_syscalls(bpf, top):
+    syscall_counts = bpf["syscall_counts"]
+    syscalls = []
+    for k, v in sorted(syscall_counts.items(), key=lambda kv: -kv[1].total_ns)[:top]:
+        syscalls.append({
+            'name': system_call_name(k),
+            'count': v.count,
+            'total_ns': v.total_ns,
+        })
+    syscall_counts.clear()
+    return syscalls
+
+
+def system_call_name(k):
+    return syscall_name(k.value).decode('ascii')
+
+
+def print_time():
+    print("[%s]" % strftime("%H:%M:%S"))
+
+
+def save_snapshot(
+        bpf: BPF,
+        pid: int,
+        min_age_ns: int,
+        top: int,
+        snapshot_dir: str,
+        with_memory: bool,
+        snapshot_prefix: Optional[str] = None
+):
+    current_time_millis = int(round(time() * 1000))
+    stats = get_statistics(bpf, pid) if with_memory else {}
+    outstanding = get_outstanding(bpf, pid, min_age_ns, top) if with_memory else []
+    kernel_caches = gernel_kernel_cache(bpf, top) if with_memory else []
+    syscalls = get_syscalls(bpf, top)
+    snapshot = {
+        'time': current_time_millis,
+        'stats': stats,
+        'outstanding': outstanding,
+        'kernel_caches': kernel_caches,
+        'syscalls': syscalls,
+    }
+    os.makedirs(snapshot_dir, exist_ok=True)
+    with open(f'{snapshot_dir}/{snapshot_prefix or "snapshot"}-{current_time_millis}.json', 'w') as outfile:
+        json.dump(snapshot, outfile)
+
+
+class Arguments:
+    def __init__(self, args):
+        self.pid = args.pid
+        self.command = args.command
+        self.interval = args.interval
+        self.min_age_ns = 1e6 * args.older
+        self.alloc_sample_every_n = args.alloc_sample_rate
+        self.top = args.top
+        self.min_alloc_size = args.min_alloc_size
+        self.max_alloc_size = args.max_alloc_size
+
+        if args.snapshots is None:
+            self.save_snapshots = './snapshots'
+        elif args.snapshots == -1:
+            self.save_snapshots = None
+        else:
+            self.save_snapshots = args.snapshots
+
+        if self.min_alloc_size is not None and self.max_alloc_size is not None \
+                and self.min_alloc_size > self.max_alloc_size:
+            print("min_size (-z) can't be greater than max_size (-Z)")
+            exit(1)
+
+        if self.command is None and self.pid is None:
+            print("Either -p or -c must be specified")
+            exit(1)
+
+
+def parse():
+    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument("-p", "--pid", type=int, default=-1,
+                        help="the PID to trace; if not specified, trace kernel allocs")
+    parser.add_argument("interval", nargs="?", default=5, type=int,
+                        help="interval in seconds to print outstanding allocations")
+    parser.add_argument("-o", "--older", default=500, type=int,
+                        help="prune allocations younger than this age in milliseconds")
+    parser.add_argument("-c", "--command",
+                        help="execute and trace the specified command")
+    parser.add_argument("-s", "--alloc-sample-rate", default=1, type=int,
+                        help="sample every N-th allocation to decrease the overhead")
+    parser.add_argument("-T", "--top", type=int, default=10,
+                        help="display only this many top stats")
+    parser.add_argument("-z", "--min-alloc-size", type=int,
+                        help="capture only allocations larger than this size")
+    parser.add_argument("-Z", "--max-alloc-size", type=int,
+                        help="capture only allocations smaller than this size")
+    parser.add_argument("-S", "--snapshots", default=-1, type=str, nargs='?',
+                        help="save statistics snapshots to the specified directory")
+    return Arguments(parser.parse_args())
+
+
+def attach_ebpf(pid: int = -1,
+                process: Optional[spawn] = None,
+                command: Optional[str] = None,
+                top: int = 10,
+                interval: int = 5,
+                alloc_sample_every_n: int = 1,
+                max_alloc_size: Optional[int] = None,
+                min_age_ns: int = 500,
+                min_alloc_size: Optional[int] = None,
+                save_snapshots: Optional[str] = None,
+                with_memory: bool = False):
+    (bpf, pid, process) = attach_probes(pid, process, command, alloc_sample_every_n, max_alloc_size, min_alloc_size,
+                                        with_memory)
+
+    harvest_ebpf(bpf, pid, process, top, interval, min_age_ns, save_snapshots)
+
+
+def attach_probes(
+        pid: int = -1,
+        process: Optional[spawn] = None,
+        command: Optional[str] = None,
+        alloc_sample_every_n: int = 1,
+        max_alloc_size: Optional[int] = None,
+        min_alloc_size: Optional[int] = None,
+        with_memory: bool = False,
+        communicate_with_signals: bool = False
+) -> Optional[Tuple[BPF, int, spawn]]:
+    if pid == -1 and command is None and process is None:
+        logging.info("Either pid or command or process must be specified")
+        return
+
+    if command is not None:
+        logging.info(f"Executing '{command}' and tracing the resulting process.")
+        process = pexpect.spawn(command)
+        pid = process.pid
+    elif process is not None:
+        pid = process.pid
+
+    if communicate_with_signals:
+        signal.signal(signal.SIGUSR2, signal_handler)
+
+    # Constructing probes
+    bpf = BPF(src_file='./ebpf/ebpf_main.c',
+              usdt_contexts=[],
+              cflags=[
+                  "-Wno-macro-redefined",
+                  f"-DPROCESS_ID={pid}",
+                  f"-DSAMPLE_EVERY_N={alloc_sample_every_n}",
+                  f"-DPAGE_SIZE={resource.getpagesize()}",
+                  f"-DFILTER_BY_SIZE={get_size_filter(min_alloc_size, max_alloc_size)}",
+                  "-DWITH_MEMORY" if with_memory else "",
+              ])
+
+    # Attaching probes
+
+    logging.info(f"Attaching to pid {pid}")
+
+    wait_time = 0
+    wait_interval = 0.01
+    timeout = 5
+    while not process.isalive():
+        sleep(wait_interval)
+        wait_time += wait_interval
+        if process.terminated:
+            print(process.readline().decode('utf-8'))
+            raise Exception(f'Process is already terminated, with status code {process.exitstatus}')
+        if wait_time > timeout:
+            raise Exception('Process is not alive')
+
+    if with_memory:
+        for sym in ["malloc", "calloc", "realloc", "mmap", "posix_memalign", "valloc", "memalign", "pvalloc",
+                    "aligned_alloc"]:
+            bpf.attach_uprobe(name="c", sym=sym, fn_name=sym + "_enter", pid=pid)
+            bpf.attach_uretprobe(name="c", sym=sym, fn_name=sym + "_exit", pid=pid)
+
+        bpf.attach_uprobe(name="c", sym="free", fn_name="free_enter", pid=pid)
+        bpf.attach_uprobe(name="c", sym="munmap", fn_name="munmap_enter", pid=pid)
+
+        # kernel cache probes
+        bpf.attach_kprobe(event='kmem_cache_alloc_lru', fn_name='trace_cache_alloc')
+        bpf.attach_kprobe(event='kmem_cache_alloc_bulk', fn_name='trace_cache_alloc')
+        bpf.attach_kprobe(event='kmem_cache_alloc_node', fn_name='trace_cache_alloc')
+
+        bpf.attach_kprobe(event='kmem_cache_free', fn_name='trace_cache_free')
+        bpf.attach_kprobe(event='kmem_cache_free_bulk', fn_name='trace_cache_free')
+
+    return bpf, pid, process
+
+
+SIGUSR2_received = False
+
+
+def signal_handler(_sig_no, _stack_frame):
+    global SIGUSR2_received
+    SIGUSR2_received = True
+
+
+def is_terminated(pid: int, process: Optional[spawn]):
+    if process is None:
+        try:
+            # Sending signal 0 to a pid will raise OSError if the process does not exist
+            os.kill(pid, 0)
+        except OSError:
+            return True
+        else:
+            return False
+    return SIGUSR2_received or not process.isalive()
+
+
+def sleep_and_check(
+        interval: float,
+        pid: int,
+        process: Optional[spawn],
+        check_delay: float
+):
+    wait_til = time() + interval
+    while time() < wait_til:
+        sleep(check_delay)
+        if is_terminated(pid, process):
+            break
+
+
+def print_ebpf_info(bpf, pid, min_age_ns, top):
+    print_time()
+    print_memory_statistics(bpf, pid, top)
+    print_outstanding(bpf, pid, top, min_age_ns)
+    print_outstanding_kernel_cache(bpf, top)
+    print_syscalls(bpf, top)
+    print()
+    sys.stdout.flush()
+
+
+def harvest_ebpf(
+        bpf: BPF,
+        pid: int = -1,
+        process: Optional[spawn] = None,
+        top: int = 10,
+        interval: int = 5,
+        min_age_ns: int = 500,
+        with_memory: bool = False,
+        save_snapshots: Optional[str] = None,
+        snapshot_prefix: Optional[str] = None,
+        communicate_with_signals: bool = False,
+):
+    if pid == -1 and process is None:
+        raise ValueError("Either pid or process must be specified")
+
+    if process is not None:
+        pid = process.pid
+
+    while True:
+        logging.info(f"Sleeping for {interval} seconds...")
+        try:
+            sleep_and_check(interval, pid, process, check_delay=0.2)
+        except KeyboardInterrupt:
+            break
+        if save_snapshots:
+            save_snapshot(bpf, pid, min_age_ns, top, save_snapshots, with_memory, snapshot_prefix)
+        else:
+            print_ebpf_info(bpf, pid, min_age_ns, top)
+        if is_terminated(pid, process):
+            break
+
+    if communicate_with_signals:
+        # Sending SIGUSR1 to the process will notify that the tracing is done
+        os.kill(pid, signal.SIGUSR1)
+
+    logging.info("Detaching...")
+    bpf.cleanup()
+
+
+if __name__ == "__main__":
+    arguments = parse()
+    attach_ebpf(
+        pid=arguments.pid,
+        command=arguments.command,
+        top=arguments.top,
+        interval=arguments.interval,
+        alloc_sample_every_n=arguments.alloc_sample_every_n,
+        max_alloc_size=arguments.max_alloc_size,
+        min_age_ns=arguments.min_age_ns,
+        min_alloc_size=arguments.min_alloc_size,
+        save_snapshots=arguments.save_snapshots,
+    )
diff --git a/ebpf/ebpf_main.c b/ebpf/ebpf_main.c
new file mode 100644
index 00000000..da9974fe
--- /dev/null
+++ b/ebpf/ebpf_main.c
@@ -0,0 +1,419 @@
+#include <uapi/linux/ptrace.h>
+#include <linux/mm.h>
+#include <linux/kasan.h>
+#include <linux/nsproxy.h>
+#include <linux/pid_namespace.h>
+#include <linux/sched.h>
+
+#define FILTER_BY_PID u32 __pid = bpf_get_current_pid_tgid() >> 32;if (__pid != PROCESS_ID) {return 0;}
+
+#ifdef WITH_MEMORY
+
+struct alloc_info_t {
+    u64 size;
+    u64 timestamp_ns;
+    int stack_id;
+};
+
+struct combined_alloc_info_t {
+    u64 alloc_size;
+    u64 free_size;
+    u64 number_of_allocs;
+    u64 number_of_frees;
+};
+
+// count of allocations per stack trace
+BPF_HASH(combined_allocs, u64, struct combined_alloc_info_t, 10240);
+
+BPF_HASH(sizes, u64);
+BPF_HASH(allocs, u64, struct alloc_info_t, 1000000);
+BPF_HASH(memptrs, u64, u64);
+
+BPF_STACK_TRACE(stack_traces, 10240);
+
+static inline void update_statistics_add(u64 stack_id, u64 sz) {
+    struct combined_alloc_info_t *existing_cinfo;
+    struct combined_alloc_info_t cinfo = {0};
+
+    existing_cinfo = combined_allocs.lookup(&stack_id);
+    if (existing_cinfo != 0)
+        cinfo = *existing_cinfo;
+
+    lock_xadd(&cinfo.alloc_size, sz);
+    lock_xadd(&cinfo.number_of_allocs, 1);
+
+    combined_allocs.update(&stack_id, &cinfo);
+}
+
+static inline void update_statistics_del(u64 stack_id, u64 sz) {
+    struct combined_alloc_info_t *existing_cinfo;
+    struct combined_alloc_info_t cinfo = {0};
+
+    existing_cinfo = combined_allocs.lookup(&stack_id);
+    if (existing_cinfo != 0)
+        cinfo = *existing_cinfo;
+
+    lock_xadd(&cinfo.free_size, sz);
+    lock_xadd(&cinfo.number_of_frees, 1);
+
+
+    combined_allocs.update(&stack_id, &cinfo);
+}
+
+static inline int gen_alloc_enter(struct pt_regs *ctx, size_t size) {
+    FILTER_BY_PID
+
+    FILTER_BY_SIZE
+    if (SAMPLE_EVERY_N > 1) {
+        u64 ts = bpf_ktime_get_ns();
+        if (ts % SAMPLE_EVERY_N != 0)
+            return 0;
+    }
+
+    u64 pid = bpf_get_current_pid_tgid();
+    u64 size64 = size;
+    sizes.update(&pid, &size64);
+
+    return 0;
+}
+
+static inline int gen_alloc_exit2(struct pt_regs *ctx, u64 address) {
+    FILTER_BY_PID
+
+    u64 pid = bpf_get_current_pid_tgid();
+    u64 * size64 = sizes.lookup(&pid);
+    struct alloc_info_t info = {0};
+
+    if (size64 == 0)
+        return 0; // missed alloc entry
+
+    info.size = *size64;
+    sizes.delete(&pid);
+
+    if (address != 0) {
+        info.timestamp_ns = bpf_ktime_get_ns();
+        info.stack_id = stack_traces.get_stackid(ctx, BPF_F_USER_STACK);
+        allocs.update(&address, &info);
+        update_statistics_add(info.stack_id, info.size);
+    }
+
+    return 0;
+}
+
+static inline int gen_alloc_exit(struct pt_regs *ctx) {
+    return gen_alloc_exit2(ctx, PT_REGS_RC(ctx));
+}
+
+static inline int gen_free_enter(struct pt_regs *ctx, void *address) {
+    FILTER_BY_PID
+
+    u64 addr = (u64)address;
+    struct alloc_info_t *info = allocs.lookup(&addr);
+    if (info == 0)
+        return 0;
+
+    allocs.delete(&addr);
+    update_statistics_del(info->stack_id, info->size);
+
+    return 0;
+}
+
+/** Probes */
+
+int malloc_enter(struct pt_regs *ctx, size_t size) {
+    return gen_alloc_enter(ctx, size);
+}
+
+int malloc_exit(struct pt_regs *ctx) { return gen_alloc_exit(ctx); }
+
+int free_enter(struct pt_regs *ctx, void *address) {
+    return gen_free_enter(ctx, address);
+}
+
+int calloc_enter(struct pt_regs *ctx, size_t nmemb, size_t size) {
+    return gen_alloc_enter(ctx, nmemb * size);
+}
+
+int calloc_exit(struct pt_regs *ctx) { return gen_alloc_exit(ctx); }
+
+int realloc_enter(struct pt_regs *ctx, void *ptr, size_t size) {
+    gen_free_enter(ctx, ptr);
+    return gen_alloc_enter(ctx, size);
+}
+
+int realloc_exit(struct pt_regs *ctx) { return gen_alloc_exit(ctx); }
+
+int mmap_enter(struct pt_regs *ctx) {
+    size_t size = (size_t) PT_REGS_PARM2(ctx);
+    return gen_alloc_enter(ctx, size);
+}
+
+int mmap_exit(struct pt_regs *ctx) { return gen_alloc_exit(ctx); }
+
+int munmap_enter(struct pt_regs *ctx, void *address) {
+    return gen_free_enter(ctx, address);
+}
+
+int posix_memalign_enter(struct pt_regs *ctx, void **memptr, size_t alignment,
+                         size_t size) {
+    u64 memptr64 = (u64)(size_t)memptr;
+    u64 pid = bpf_get_current_pid_tgid();
+
+    memptrs.update(&pid, &memptr64);
+    return gen_alloc_enter(ctx, size);
+}
+
+int posix_memalign_exit(struct pt_regs *ctx) {
+    u64 pid = bpf_get_current_pid_tgid();
+    u64 *memptr64 = memptrs.lookup(&pid);
+    void *addr;
+
+    if (memptr64 == 0)
+        return 0;
+
+    memptrs.delete(&pid);
+
+    if (bpf_probe_read_user(&addr, sizeof(void *), (void *) (size_t) * memptr64))
+        return 0;
+
+    u64 addr64 = (u64)(size_t) addr;
+    return gen_alloc_exit2(ctx, addr64);
+}
+
+int aligned_alloc_enter(struct pt_regs *ctx, size_t alignment, size_t size) {
+    return gen_alloc_enter(ctx, size);
+}
+
+int aligned_alloc_exit(struct pt_regs *ctx) { return gen_alloc_exit(ctx); }
+
+int valloc_enter(struct pt_regs *ctx, size_t size) {
+    return gen_alloc_enter(ctx, size);
+}
+
+int valloc_exit(struct pt_regs *ctx) { return gen_alloc_exit(ctx); }
+
+int memalign_enter(struct pt_regs *ctx, size_t alignment, size_t size) {
+    return gen_alloc_enter(ctx, size);
+}
+
+int memalign_exit(struct pt_regs *ctx) { return gen_alloc_exit(ctx); }
+
+int pvalloc_enter(struct pt_regs *ctx, size_t size) {
+    return gen_alloc_enter(ctx, size);
+}
+
+int pvalloc_exit(struct pt_regs *ctx) { return gen_alloc_exit(ctx); }
+
+/** Tracepoints
+    Function names are in tracepoint__{{category}}__{{event}}
+    alternatively we can use attach_tracepoint function in python api
+*/
+
+int tracepoint__kmem__kmalloc(struct tracepoint__kmem__kmalloc *args) {
+    gen_alloc_enter((struct pt_regs *) args, args->bytes_alloc);
+    return gen_alloc_exit2((struct pt_regs *) args, (size_t) args->ptr);
+}
+
+int tracepoint__kmem__kfree(struct tracepoint__kmem__kfree *args) {
+    return gen_free_enter((struct pt_regs *) args, (void *) args->ptr);
+}
+
+int tracepoint__kmem__kmem_cache_alloc(
+        struct tracepoint__kmem__kmem_cache_alloc *args) {
+    gen_alloc_enter((struct pt_regs *) args, args->bytes_alloc);
+    return gen_alloc_exit2((struct pt_regs *) args, (size_t) args->ptr);
+}
+
+int tracepoint__kmem__kmem_cache_free(
+        struct tracepoint__kmem__kmem_cache_free *args) {
+    return gen_free_enter((struct pt_regs *) args, (void *) args->ptr);
+}
+
+int tracepoint__kmem__mm_page_alloc(
+        struct tracepoint__kmem__mm_page_alloc *args) {
+    gen_alloc_enter((struct pt_regs *) args, PAGE_SIZE << args->order);
+    return gen_alloc_exit2((struct pt_regs *) args, args->pfn);
+}
+
+int tracepoint__kmem__mm_page_free(
+        struct tracepoint__kmem__mm_page_free *args) {
+    return gen_free_enter((struct pt_regs *) args, (void *) args->pfn);
+}
+
+int tracepoint__kmem__kmalloc_node(struct tracepoint__kmem__kmalloc_node *args) {
+    gen_alloc_enter((struct pt_regs *) args, args->bytes_alloc);
+    return gen_alloc_exit2((struct pt_regs *) args, (size_t) args->ptr);
+}
+
+int tracepoint__kmem__kmem_cache_alloc_node(struct tracepoint__kmem__kmem_cache_alloc_node *args) {
+    gen_alloc_enter((struct pt_regs *) args, args->bytes_alloc);
+    return gen_alloc_exit2((struct pt_regs *) args, (size_t) args->ptr);
+}
+
+/** kernel cache */
+
+// to resolve undefined error
+// taken from bcc slabratetop tool
+struct slab {
+    unsigned long __page_flags;
+#if defined(CONFIG_SLAB)
+    struct kmem_cache *slab_cache;
+    union {
+        struct {
+            struct list_head slab_list;
+            void *freelist; /* array of free object indexes */
+            void *s_mem;    /* first object */
+        };
+        struct rcu_head rcu_head;
+    };
+    unsigned int active;
+#elif defined(CONFIG_SLUB)
+    struct kmem_cache *slab_cache;
+    union {
+        struct {
+            union {
+                struct list_head slab_list;
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+                struct {
+                    struct slab *next;
+                        int slabs;      /* Nr of slabs left */
+                };
+#endif
+            };
+            /* Double-word boundary */
+            void *freelist;         /* first free object */
+            union {
+                unsigned long counters;
+                struct {
+                    unsigned inuse:16;
+                    unsigned objects:15;
+                    unsigned frozen:1;
+                };
+            };
+        };
+        struct rcu_head rcu_head;
+    };
+    unsigned int __unused;
+#elif defined(CONFIG_SLOB)
+    struct list_head slab_list;
+    void *__unused_1;
+    void *freelist;         /* first free block */
+    long units;
+    unsigned int __unused_2;
+#else
+#error "Unexpected slab allocator configured"
+#endif
+    atomic_t __page_refcount;
+#ifdef CONFIG_MEMCG
+    unsigned long memcg_data;
+#endif
+};
+
+// slab_address() will not be used, and NULL will be returned directly, which
+// can avoid adaptation of different kernel versions
+static inline void *slab_address(const struct slab *slab) {
+    return NULL;
+}
+
+#ifdef CONFIG_SLUB
+
+#include <linux/slub_def.h>
+
+#else
+
+#include <linux/slab_def.h>
+
+#endif
+
+struct key_t {
+    char name[32];
+};
+
+struct val_t {
+    u64 alloc_count;
+    u64 alloc_size;
+    u64 free_count;
+    u64 free_size;
+};
+
+BPF_HASH(kernel_cache_counts,
+struct key_t, struct val_t);
+
+int trace_cache_alloc(struct pt_regs *ctx, struct kmem_cache *cachep) {
+    FILTER_BY_PID
+
+    u64 size = cachep->size;
+
+    FILTER_BY_SIZE
+
+    struct key_t key = {};
+    bpf_probe_read_kernel(&key.name, sizeof(key.name), cachep->name);
+
+    struct val_t empty_val_t = {};
+
+    struct val_t *val = kernel_cache_counts.lookup_or_try_init(&key, &empty_val_t);
+    if (val) {
+        val->alloc_count++;
+        val->alloc_size += size;
+    }
+
+    return 0;
+}
+
+int trace_cache_free(struct pt_regs *ctx, struct kmem_cache *cachep) {
+    FILTER_BY_PID
+
+    u64 size = cachep->size;
+
+    FILTER_BY_SIZE
+
+    struct key_t key = {};
+    bpf_probe_read_kernel(&key.name, sizeof(key.name), cachep->name);
+
+    struct val_t empty_val_t = {};
+    struct val_t *val = kernel_cache_counts.lookup_or_try_init(&key, &empty_val_t);
+    if (val) {
+        val->free_count ++;
+        val->free_size += size;
+    }
+
+    return 0;
+}
+
+#endif
+
+
+/** System Calls */
+
+struct sys_call_data_t {
+    u64 count;
+    u64 total_ns;
+};
+BPF_HASH(syscall_start, u64, u64);
+BPF_HASH(syscall_counts, u32, struct sys_call_data_t);
+
+int tracepoint__raw_syscalls__sys_enter(struct tracepoint__raw_syscalls__sys_enter *args) {
+    FILTER_BY_PID
+    u64 pid_tgid = bpf_get_current_pid_tgid();
+    u64 t = bpf_ktime_get_ns();
+    syscall_start.update(&pid_tgid, &t);
+    return 0;
+}
+
+int tracepoint__raw_syscalls__sys_exit(struct tracepoint__raw_syscalls__sys_exit *args) {
+    FILTER_BY_PID
+
+    u64 pid_tgid = bpf_get_current_pid_tgid();
+
+    struct sys_call_data_t *val, zero = {};
+    u64 *start_ns = syscall_start.lookup(&pid_tgid);
+    if (!start_ns)
+        return 0;
+    u32 key = args->id;
+    val = syscall_counts.lookup_or_try_init(&key, &zero);
+    if (val) {
+        lock_xadd(&val->count, 1);
+        lock_xadd(&val->total_ns, bpf_ktime_get_ns() - *start_ns);
+    }
+    return 0;
+}
\ No newline at end of file
diff --git a/run.py b/run.py
index 35364d8a..469c3452 100755
--- a/run.py
+++ b/run.py
@@ -1,13 +1,16 @@
 #!/usr/bin/env python3
 
+import argparse
 import os
-import sys
-import time
+import pathlib
 import shutil
 import signal
+import sys
+import time
+from threading import Thread
+from typing import List
+
 import pexpect
-import pathlib
-import argparse
 import termcolor
 
 """
@@ -53,6 +56,8 @@
 drop_caches = False
 cleanup_previous = False
 run_in_docker_container = False
+with_ebpf = False
+with_ebpf_memory = False
 
 main_dir_path = "./db_main/"
 storage_disk_paths = [
@@ -76,21 +81,20 @@ def get_db_main_dir_path(db_name: str, size: str, main_dir_path: str) -> str:
     return os.path.join(main_dir_path, db_name, size, "")
 
 
-def get_db_storage_dir_paths(db_name: str, size: str, storage_disk_paths: str) -> list:
+def get_db_storage_dir_paths(db_name: str, size: str, storage_disk_paths: List[str]) -> list:
     db_storage_dir_paths = []
     for storage_disk_path in storage_disk_paths:
         db_storage_dir_paths.append(os.path.join(storage_disk_path, db_name, size, ""))
     return db_storage_dir_paths
 
 
-def get_results_file_path(
-    db_name: str,
-    size: str,
-    drop_caches: bool,
-    transactional: bool,
-    storage_disk_paths: str,
-    threads_count: int,
-) -> str:
+def get_results_file_path(db_name: str,
+                          size: str,
+                          drop_caches: bool,
+                          transactional: bool,
+                          storage_disk_paths: List[str],
+                          threads_count: int,
+                          ) -> str:
     root_dir_path = ""
     if drop_caches:
         if transactional:
@@ -128,17 +132,19 @@ def drop_system_caches():
 
 
 def run(
-    db_name: str,
-    size: str,
-    workload_names: list,
-    main_dir_path: str,
-    storage_disk_paths: str,
-    transactional: bool,
-    drop_caches: bool,
-    run_in_docker_container: bool,
-    threads_count: bool,
-    run_index: int,
-    runs_count: int,
+        db_name: str,
+        size: str,
+        workload_names: list,
+        main_dir_path: str,
+        storage_disk_paths: List[str],
+        transactional: bool,
+        drop_caches: bool,
+        run_in_docker_container: bool,
+        threads_count: int,
+        run_index: int,
+        runs_count: int,
+        with_ebpf: bool,
+        with_ebpf_memory: bool
 ) -> None:
     db_config_file_path = get_db_config_file_path(db_name, size)
     workloads_file_path = get_workloads_file_path(size)
@@ -149,6 +155,7 @@ def run(
     )
 
     transactional_flag = "-t" if transactional else ""
+    lazy_flag = '-l' if with_ebpf else ''
     filter = ",".join(workload_names)
     db_storage_dir_paths = ",".join(db_storage_dir_paths)
 
@@ -161,8 +168,30 @@ def run(
             raise Exception("First, please build the runner: `build_release.sh`")
 
     process = pexpect.spawn(
-        f'{runner} -db {db_name} {transactional_flag} -cfg "{db_config_file_path}" -wl "{workloads_file_path}" -md "{db_main_dir_path}" -sd "{db_storage_dir_paths}" -res "{results_file_path}" -th {threads_count} -fl {filter} -ri {run_index} -rc {runs_count}'
+        f'{runner} -db {db_name} {transactional_flag} {lazy_flag} -cfg "{db_config_file_path}" -wl "{workloads_file_path}" -md "{db_main_dir_path}" -sd "{db_storage_dir_paths}" -res "{results_file_path}" -th {threads_count} -fl {filter} -ri {run_index} -rc {runs_count}'
     )
+    thread = None
+    if with_ebpf:
+        from ebpf.ebpf import attach_probes, harvest_ebpf
+        bpf, pid, process = attach_probes(
+            process=process,
+            with_memory=with_ebpf_memory,
+            communicate_with_signals=True,
+        )
+        # Send SIGUSR1 to the process to notify it that the probes are attached
+        os.kill(process.pid, signal.SIGUSR1)
+        thread = Thread(target=harvest_ebpf, args=(bpf,), kwargs={
+            'process': process,
+            'interval': 5,
+            'with_memory': with_ebpf_memory,
+            'snapshot_prefix': "-".join(workload_names),
+            'save_snapshots': f'./bench/ebpf/snapshots/{db_name}_{size}',
+            'communicate_with_signals': True,
+        })
+        thread.start()
+    if with_ebpf:
+        thread.join()
+
     process.interact()
     process.close()
 
@@ -189,6 +218,8 @@ def parse_args():
     global cleanup_previous
     global drop_caches
     global run_in_docker_container
+    global with_ebpf
+    global with_ebpf_memory
 
     parser = argparse.ArgumentParser()
 
@@ -268,6 +299,22 @@ def parse_args():
         action=argparse.BooleanOptionalAction,
         default=run_in_docker_container,
     )
+    parser.add_argument(
+        "-eb",
+        "--with-ebpf",
+        help="Runs ebpf benchmarks",
+        default=with_ebpf,
+        dest="with_ebpf",
+        action=argparse.BooleanOptionalAction
+    )
+    parser.add_argument(
+        "-em",
+        "--with-ebpf-memory",
+        help="Enable memory related ebpf benchmarks",
+        default=with_ebpf_memory,
+        dest="with_ebpf_memory",
+        action=argparse.BooleanOptionalAction
+    )
 
     args = parser.parse_args()
     db_names = args.db_names
@@ -280,6 +327,8 @@ def parse_args():
     cleanup_previous = args.cleanup_previous
     drop_caches = args.drop_caches
     run_in_docker_container = args.run_docker
+    with_ebpf = args.with_ebpf
+    with_ebpf_memory = args.with_ebpf_memory
 
 
 def check_args():
@@ -293,6 +342,10 @@ def check_args():
         sys.exit("Database size(s) not specified")
     if not workload_names:
         sys.exit("Workload name(s) not specified")
+    if run_in_docker_container and with_ebpf:
+        sys.exit('Running ebpf benchmarks in docker container is not supported')
+    if with_ebpf_memory and not with_ebpf:
+        sys.exit('Memory related ebpf benchmarks require ebpf benchmarks to be enabled, run with --with-ebpf flag')
 
 
 def main() -> None:
@@ -364,6 +417,8 @@ def main() -> None:
                         threads_count,
                         i,
                         len(workload_names),
+                        with_ebpf,
+                        with_ebpf_memory
                     )
             else:
                 run(
@@ -378,6 +433,8 @@ def main() -> None:
                     threads_count,
                     0,
                     1,
+                    with_ebpf,
+                    with_ebpf_memory
                 )
 
 
diff --git a/src/bench.cxx b/src/bench.cxx
index 9f5f3900..24cf0af8 100644
--- a/src/bench.cxx
+++ b/src/bench.cxx
@@ -2,6 +2,7 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include <signal.h>
 
 #include <fmt/format.h>
 #include <fmt/chrono.h>
@@ -37,6 +38,7 @@ void parse_and_validate_args(int argc, char* argv[], settings_t& settings) {
     argparse::ArgumentParser program(argv[0]);
     program.add_argument("-db", "--db-name").required().help("Database name");
     program.add_argument("-t", "--transaction").default_value(false).implicit_value(true).help("Transactional");
+    program.add_argument("-l", "--lazy").default_value(false).implicit_value(true).help("Wait for a SIGUSR1");
     program.add_argument("-cfg", "--config-path").required().help("Database configuration file path");
     program.add_argument("-wl", "--workload-path").required().help("Workloads file path");
     program.add_argument("-res", "--results-path").required().help("Results file path");
@@ -53,6 +55,7 @@ void parse_and_validate_args(int argc, char* argv[], settings_t& settings) {
 
     settings.db_name = program.get("db-name");
     settings.transactional = program.get<bool>("transaction");
+    settings.lazy = program.get<bool>("lazy");
     settings.db_config_file_path = program.get("config-path");
     settings.workloads_file_path = program.get("workload-path");
     settings.results_file_path = program.get("results-path");
@@ -502,6 +505,21 @@ void bench(bm::State& state, workload_t const& workload, db_t& db, bool transact
     }
 }
 
+void wait_for_SIGUSR1() {
+    int sig;
+    sigset_t set;
+
+    // Create a signal set containing SIGUSR1
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR1);
+
+    // Block all signals in the set so that they don't terminate the program
+    sigprocmask(SIG_BLOCK, &set, NULL);
+
+    // Wait for SIGUSR1 signal
+    sigwait(&set, &sig);
+}
+
 int main(int argc, char** argv) {
 
     try {
@@ -509,6 +527,10 @@ int main(int argc, char** argv) {
         settings_t settings;
         parse_and_validate_args(argc, argv, settings);
 
+        if (settings.lazy) {
+            wait_for_SIGUSR1();
+        }
+
         // Resolve results paths
         fs::path final_results_file_path = settings.results_file_path;
         if (final_results_file_path.string().back() == '/')
@@ -597,6 +619,14 @@ int main(int argc, char** argv) {
 
         file_reporter_t::merge_results(in_progress_results_file_path, final_results_file_path);
         fs::remove(in_progress_results_file_path);
+
+        if (settings.lazy) {
+            __pid_t parent_pid = getppid();
+            // Notify parent that we finished
+            kill(parent_pid, SIGUSR2);
+            wait_for_SIGUSR1();
+        }
+
     }
     catch (exception_t const& ex) {
         fmt::print("UCSB exception: {}\n", ex.what());
diff --git a/src/core/settings.hpp b/src/core/settings.hpp
index 96703cfd..37de7622 100644
--- a/src/core/settings.hpp
+++ b/src/core/settings.hpp
@@ -9,6 +9,7 @@ namespace ucsb {
 struct settings_t {
     std::string db_name;
     bool transactional = false;
+    bool lazy = false;
     fs::path db_config_file_path;
     fs::path db_main_dir_path;
     std::vector<fs::path> db_storage_dir_paths;

From 7cd35e357b3b45bdef1c1b1f04523e231afb51ff Mon Sep 17 00:00:00 2001
From: hov1417 <hovhannes1417@gmail.com>
Date: Sun, 6 Aug 2023 18:04:11 +0400
Subject: [PATCH 2/3] Improve: ebpf collect stack info and for syscalls

---
 ebpf/ebpf.py     | 95 ++++++++++++++++++++++++++++++++++++------------
 ebpf/ebpf_main.c | 44 ++++++++++++++++++----
 run.py           | 21 +++++++++--
 3 files changed, 126 insertions(+), 34 deletions(-)

diff --git a/ebpf/ebpf.py b/ebpf/ebpf.py
index 0e233e5a..02a2a915 100644
--- a/ebpf/ebpf.py
+++ b/ebpf/ebpf.py
@@ -18,6 +18,13 @@
 logging.basicConfig(filename='/tmp/ebpf.log', encoding='utf-8', level=logging.DEBUG)
 
 
+class SetEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, set):
+            return list(obj)
+        return json.JSONEncoder.default(self, obj)
+
+
 def get_size_filter(min_size, max_size):
     if min_size is not None and max_size is not None:
         return "if (size < %d || size > %d) return 0;" % (min_size, max_size)
@@ -139,7 +146,7 @@ def print_memory_statistics(bpf, pid, top):
     print('\n'.join(reversed(entries)))
 
 
-def get_statistics(bpf, pid):
+def get_statistics(bpf, pid, min_age_ns, top):
     stack_traces = bpf["stack_traces"]
     combined_alloc = list(
         sorted(
@@ -160,7 +167,8 @@ def get_statistics(bpf, pid):
     return {
         "memory": memory,
         "combined_allocs": list(reversed(entries)),
-        "stack_traces": len(list(stack_traces.items()))
+        "stack_traces": len(list(stack_traces.items())),
+        "outstanding": get_outstanding(bpf, pid, min_age_ns, top)
     }
 
 
@@ -233,16 +241,20 @@ def print_syscalls(bpf, top):
     syscall_counts = bpf["syscall_counts"]
     print("SYSCALL                   COUNT             TIME")
     for k, v in sorted(syscall_counts.items(), key=lambda kv: -kv[1].total_ns)[:top]:
-        print("%-22s %8d %16.3f" % (system_call_name(k), v.count, v.total_ns / 1e3))
+        print("%-22s %8d %16.3f" % (system_call_name(k.value), v.count, v.total_ns / 1e3))
     syscall_counts.clear()
 
 
-def get_syscalls(bpf, top):
+def get_syscalls(bpf):
     syscall_counts = bpf["syscall_counts"]
-    syscalls = []
-    for k, v in sorted(syscall_counts.items(), key=lambda kv: -kv[1].total_ns)[:top]:
-        syscalls.append({
-            'name': system_call_name(k),
+    syscalls = {}
+    for k, v in syscall_counts.items():
+        key = k.value
+        syscall_id = key >> 32
+        thread_id = (1 << 32) & key
+
+        syscalls.setdefault(thread_id, []).append({
+            'name': system_call_name(syscall_id),
             'count': v.count,
             'total_ns': v.total_ns,
         })
@@ -251,13 +263,36 @@ def get_syscalls(bpf, top):
 
 
 def system_call_name(k):
-    return syscall_name(k.value).decode('ascii')
+    if k == 435:
+        return "clone3"
+    return syscall_name(k).decode('ascii')
 
 
 def print_time():
     print("[%s]" % strftime("%H:%M:%S"))
 
 
+def get_syscall_stacks(bpf, pid):
+    syscall_counts_stacks = bpf['syscall_counts_stacks']
+    syscalls_per_thread = {}
+    stack_ids = []
+    pid_to_syscall_times = {}
+    for k, v in syscall_counts_stacks.items():
+        stack_set = syscalls_per_thread.setdefault(v.pid_tgid, {}).setdefault(system_call_name(v.id),
+                                                                              {'stacks': set(), 'number': 0})
+        stack_set['stacks'].add(v.stack_id)
+        stack_set['number'] += 1
+        stack_ids.append(v.stack_id)
+        pid_to_syscall_times.setdefault(v.pid_tgid, []).append(k.value)
+
+    stacks = {stack_id: get_trace_info_as_list(bpf, pid, bpf['stack_traces'], stack_id) for stack_id in stack_ids}
+    return {
+        'syscalls': syscalls_per_thread,
+        'stacks': stacks,
+        'pid_to_syscall_times': pid_to_syscall_times
+    }
+
+
 def save_snapshot(
         bpf: BPF,
         pid: int,
@@ -265,23 +300,32 @@ def save_snapshot(
         top: int,
         snapshot_dir: str,
         with_memory: bool,
+        with_syscall_stacks: bool,
         snapshot_prefix: Optional[str] = None
 ):
     current_time_millis = int(round(time() * 1000))
-    stats = get_statistics(bpf, pid) if with_memory else {}
-    outstanding = get_outstanding(bpf, pid, min_age_ns, top) if with_memory else []
-    kernel_caches = gernel_kernel_cache(bpf, top) if with_memory else []
-    syscalls = get_syscalls(bpf, top)
-    snapshot = {
-        'time': current_time_millis,
-        'stats': stats,
-        'outstanding': outstanding,
-        'kernel_caches': kernel_caches,
-        'syscalls': syscalls,
-    }
+    snapshot = {'time': current_time_millis}
+    if with_memory:
+        snapshot['memory_stats'] = get_statistics(bpf, pid, min_age_ns, top)
+        snapshot['kernel_caches'] = gernel_kernel_cache(bpf, top)
+
+    snapshot['syscalls'] = get_syscalls(bpf)
+
+    if with_syscall_stacks:
+        snapshot['syscall_stacks'] = get_syscall_stacks(bpf, pid)
+
     os.makedirs(snapshot_dir, exist_ok=True)
-    with open(f'{snapshot_dir}/{snapshot_prefix or "snapshot"}-{current_time_millis}.json', 'w') as outfile:
-        json.dump(snapshot, outfile)
+    with open(get_result_file_name(snapshot_dir, snapshot_prefix or 'snapshot', 'json'), 'w') as outfile:
+        json.dump(snapshot, outfile, cls=SetEncoder)
+
+
+def get_result_file_name(dir_name: str, prefix: str, suffix: str):
+    index = 0
+    path = f'{dir_name}/{prefix}.{suffix}'
+    while os.path.isfile(path):
+        index += 1
+        path = f'{dir_name}/{prefix}_{index}.{suffix}'
+    return path
 
 
 class Arguments:
@@ -360,6 +404,7 @@ def attach_probes(
         max_alloc_size: Optional[int] = None,
         min_alloc_size: Optional[int] = None,
         with_memory: bool = False,
+        syscall_stacks: bool = False,
         communicate_with_signals: bool = False
 ) -> Optional[Tuple[BPF, int, spawn]]:
     if pid == -1 and command is None and process is None:
@@ -386,6 +431,7 @@ def attach_probes(
                   f"-DPAGE_SIZE={resource.getpagesize()}",
                   f"-DFILTER_BY_SIZE={get_size_filter(min_alloc_size, max_alloc_size)}",
                   "-DWITH_MEMORY" if with_memory else "",
+                  "-DCOLLECT_SYSCALL_STACK_INFO" if syscall_stacks else "",
               ])
 
     # Attaching probes
@@ -435,7 +481,7 @@ def signal_handler(_sig_no, _stack_frame):
 def is_terminated(pid: int, process: Optional[spawn]):
     if process is None:
         try:
-            # Sending signal 0 to a pid will raise OSError if the process does not exist
+            # Sending signal 0 to a process with id {pid} will raise OSError if the process does not exist
             os.kill(pid, 0)
         except OSError:
             return True
@@ -475,6 +521,7 @@ def harvest_ebpf(
         interval: int = 5,
         min_age_ns: int = 500,
         with_memory: bool = False,
+        with_syscall_stacks: bool = False,
         save_snapshots: Optional[str] = None,
         snapshot_prefix: Optional[str] = None,
         communicate_with_signals: bool = False,
@@ -492,7 +539,7 @@ def harvest_ebpf(
         except KeyboardInterrupt:
             break
         if save_snapshots:
-            save_snapshot(bpf, pid, min_age_ns, top, save_snapshots, with_memory, snapshot_prefix)
+            save_snapshot(bpf, pid, min_age_ns, top, save_snapshots, with_memory, with_syscall_stacks, snapshot_prefix)
         else:
             print_ebpf_info(bpf, pid, min_age_ns, top)
         if is_terminated(pid, process):
diff --git a/ebpf/ebpf_main.c b/ebpf/ebpf_main.c
index da9974fe..2c7ea57f 100644
--- a/ebpf/ebpf_main.c
+++ b/ebpf/ebpf_main.c
@@ -5,8 +5,17 @@
 #include <linux/pid_namespace.h>
 #include <linux/sched.h>
 
+/**
+ * bpf_get_current_pid_tgid may be confusing so here is a short explanation:
+ * it returns
+ * tgid << 32 | pid
+ * here pid is the thread id, and tgid (thread group id) is the process id
+ * so we need to shift by 32 bits to get the process id
+ */
 #define FILTER_BY_PID u32 __pid = bpf_get_current_pid_tgid() >> 32;if (__pid != PROCESS_ID) {return 0;}
 
+BPF_STACK_TRACE(stack_traces, 10000);
+
 #ifdef WITH_MEMORY
 
 struct alloc_info_t {
@@ -23,14 +32,12 @@ struct combined_alloc_info_t {
 };
 
 // count of allocations per stack trace
-BPF_HASH(combined_allocs, u64, struct combined_alloc_info_t, 10240);
+BPF_HASH(combined_allocs, u64, struct combined_alloc_info_t, 10000);
 
 BPF_HASH(sizes, u64);
-BPF_HASH(allocs, u64, struct alloc_info_t, 1000000);
+BPF_HASH(allocs, u64, struct alloc_info_t, 10000);
 BPF_HASH(memptrs, u64, u64);
 
-BPF_STACK_TRACE(stack_traces, 10240);
-
 static inline void update_statistics_add(u64 stack_id, u64 sz) {
     struct combined_alloc_info_t *existing_cinfo;
     struct combined_alloc_info_t cinfo = {0};
@@ -380,7 +387,7 @@ int trace_cache_free(struct pt_regs *ctx, struct kmem_cache *cachep) {
     return 0;
 }
 
-#endif
+#endif // WITH_MEMORY
 
 
 /** System Calls */
@@ -390,13 +397,34 @@ struct sys_call_data_t {
     u64 total_ns;
 };
 BPF_HASH(syscall_start, u64, u64);
-BPF_HASH(syscall_counts, u32, struct sys_call_data_t);
+BPF_HASH(syscall_counts, u64, struct sys_call_data_t);
+
+#ifdef COLLECT_SYSCALL_STACK_INFO
+
+struct sys_call_stack_t {
+    u32 id;
+    int stack_id;
+    u64 pid_tgid;
+};
+
+BPF_HASH(syscall_counts_stacks, u64, struct sys_call_stack_t, 10000);
+
+#endif // COLLECT_SYSCALL_STACK_INFO
 
 int tracepoint__raw_syscalls__sys_enter(struct tracepoint__raw_syscalls__sys_enter *args) {
     FILTER_BY_PID
     u64 pid_tgid = bpf_get_current_pid_tgid();
     u64 t = bpf_ktime_get_ns();
     syscall_start.update(&pid_tgid, &t);
+#ifdef COLLECT_SYSCALL_STACK_INFO
+    struct sys_call_stack_t zero = {};
+    struct sys_call_stack_t* data = syscall_counts_stacks.lookup_or_try_init(&t, &zero);
+    if (data) {
+        data->id = args->id;
+        data->stack_id = stack_traces.get_stackid(args, BPF_F_USER_STACK);
+        data->pid_tgid = pid_tgid;
+    }
+#endif // COLLECT_SYSCALL_STACK_INFO
     return 0;
 }
 
@@ -409,7 +437,9 @@ int tracepoint__raw_syscalls__sys_exit(struct tracepoint__raw_syscalls__sys_exit
     u64 *start_ns = syscall_start.lookup(&pid_tgid);
     if (!start_ns)
         return 0;
-    u32 key = args->id;
+    u64 syscall_id = args->id;
+    u32 thread_id = pid_tgid;
+    u64 key = (syscall_id << 32) | thread_id;
     val = syscall_counts.lookup_or_try_init(&key, &zero);
     if (val) {
         lock_xadd(&val->count, 1);
diff --git a/run.py b/run.py
index 469c3452..eaa8cc72 100755
--- a/run.py
+++ b/run.py
@@ -58,6 +58,7 @@
 run_in_docker_container = False
 with_ebpf = False
 with_ebpf_memory = False
+with_syscall_stacks = False
 
 main_dir_path = "./db_main/"
 storage_disk_paths = [
@@ -145,7 +146,8 @@ def run(
         runs_count: int,
         with_ebpf: bool,
         with_ebpf_memory: bool
-) -> None:
+,
+        with_syscall_stacks: bool) -> None:
     db_config_file_path = get_db_config_file_path(db_name, size)
     workloads_file_path = get_workloads_file_path(size)
     db_main_dir_path = get_db_main_dir_path(db_name, size, main_dir_path)
@@ -184,6 +186,7 @@ def run(
             'process': process,
             'interval': 5,
             'with_memory': with_ebpf_memory,
+            'with_syscall_stacks': with_syscall_stacks,
             'snapshot_prefix': "-".join(workload_names),
             'save_snapshots': f'./bench/ebpf/snapshots/{db_name}_{size}',
             'communicate_with_signals': True,
@@ -220,6 +223,7 @@ def parse_args():
     global run_in_docker_container
     global with_ebpf
     global with_ebpf_memory
+    global with_syscall_stacks
 
     parser = argparse.ArgumentParser()
 
@@ -315,6 +319,14 @@ def parse_args():
         dest="with_ebpf_memory",
         action=argparse.BooleanOptionalAction
     )
+    parser.add_argument(
+        "-es",
+        "--with-ebpf-syscall-stacks",
+        help="Collect eBPF syscall stack traces",
+        default=with_syscall_stacks,
+        dest="with_syscall_stacks",
+        action=argparse.BooleanOptionalAction
+    )
 
     args = parser.parse_args()
     db_names = args.db_names
@@ -329,6 +341,7 @@ def parse_args():
     run_in_docker_container = args.run_docker
     with_ebpf = args.with_ebpf
     with_ebpf_memory = args.with_ebpf_memory
+    with_syscall_stacks = args.with_syscall_stacks
 
 
 def check_args():
@@ -418,7 +431,8 @@ def main() -> None:
                         i,
                         len(workload_names),
                         with_ebpf,
-                        with_ebpf_memory
+                        with_ebpf_memory,
+                        with_syscall_stacks
                     )
             else:
                 run(
@@ -434,7 +448,8 @@ def main() -> None:
                     0,
                     1,
                     with_ebpf,
-                    with_ebpf_memory
+                    with_ebpf_memory,
+                    with_syscall_stacks
                 )
 
 

From 18bb8bb57706ac4c6d4c2fb746020307c532e346 Mon Sep 17 00:00:00 2001
From: hov1417 <hovhannes1417@gmail.com>
Date: Sun, 6 Aug 2023 18:09:08 +0400
Subject: [PATCH 3/3] Fix: some fixes and optimizations

removing eBPF script's stand-alone mode
more details about syscalls
---
 ebpf/ebpf.py            | 225 ++++++----------------------------------
 run.py                  |  42 ++++----
 src/leveldb/leveldb.hpp |   2 +-
 3 files changed, 51 insertions(+), 218 deletions(-)

diff --git a/ebpf/ebpf.py b/ebpf/ebpf.py
index 02a2a915..913dd1d1 100644
--- a/ebpf/ebpf.py
+++ b/ebpf/ebpf.py
@@ -1,13 +1,11 @@
 #!/usr/bin/env python3
 
-import argparse
 import json
 import logging
 import os
-import signal
 import resource
-import sys
-from time import sleep, strftime, time
+import signal
+from time import sleep, time
 from typing import Optional, Tuple
 
 import pexpect
@@ -48,31 +46,6 @@ def update(self, size):
         self.size += size
 
 
-def print_outstanding(bpf, pid, top, min_age_ns):
-    print(f"Top {top} stacks with outstanding allocations:")
-    alloc_info = {}
-    allocs = bpf["allocs"]
-    stack_traces = bpf["stack_traces"]
-    for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
-        if BPF.monotonic_time() - min_age_ns < info.timestamp_ns or info.stack_id < 0:
-            continue
-        if info.stack_id in alloc_info:
-            alloc_info[info.stack_id].update(info.size)
-        else:
-            stack = list(stack_traces.walk(info.stack_id))
-            combined = []
-            for addr in stack:
-                func_name = bpf.sym(addr, pid, show_module=True, show_offset=True)
-                formatted_address = ('0x' + format(addr, '016x') + '\t').encode('utf-8')
-                combined.append(formatted_address + func_name)
-            alloc_info[info.stack_id] = Allocation(combined, info.size, address.value)
-        print(f"\taddr = {address.value} size = {info.size}")
-    to_show = sorted(alloc_info.values(), key=lambda a: a.size)[-top:]
-    for alloc in to_show:
-        stack = b"\n\t\t".join(alloc.stack).decode("ascii")
-        print(f"\t{alloc.size} bytes in {alloc.count} allocations from stack\n\t\t{stack}")
-
-
 def get_outstanding(bpf, pid, min_age_ns, top):
     alloc_info = {}
     allocs = bpf["allocs"]
@@ -122,30 +95,6 @@ def is_positive(self):
         return self.alloc_size > self.free_size
 
 
-def print_memory_statistics(bpf, pid, top):
-    stack_traces = bpf["stack_traces"]
-    print("stack traces", len(list(stack_traces.items())))
-    combined_alloc = list(
-        sorted(
-            map(CombinedAlloc, bpf["combined_allocs"].items()),
-            key=lambda a: a.key(),
-        )
-    )
-    memory = sum((c.alloc_size - c.free_size for c in combined_alloc)) / 1024
-    print("overall, allocated", memory, "kb in", len(combined_alloc), "allocations")
-    entries = []
-    for allocation in combined_alloc[:top]:
-        trace = get_trace_info(bpf, pid, stack_traces, allocation.stack_id.value)
-        entry = f"\t{allocation.alloc_size - allocation.free_size} bytes in " \
-                f"{allocation.number_of_allocs - allocation.number_of_frees}" \
-                f" allocations from stack ({allocation.number_of_allocs + allocation.number_of_frees} allocs/frees)" \
-                f"\n\t\t{trace}"
-        entries.append(entry)
-
-    print(f"Top {top} stacks with outstanding allocations:")
-    print('\n'.join(reversed(entries)))
-
-
 def get_statistics(bpf, pid, min_age_ns, top):
     stack_traces = bpf["stack_traces"]
     combined_alloc = list(
@@ -200,18 +149,6 @@ def walk_trace(stack_traces, stack_id):
         return []
 
 
-def print_outstanding_kernel_cache(bpf, top):
-    kernel_cache_allocs = list(
-        sorted(filter(lambda a: a[1].alloc_size > a[1].free_size, bpf['kernel_cache_counts'].items()),
-               key=lambda a: a[1].alloc_size - a[1].free_size)
-    )[:top]
-    if not kernel_cache_allocs:
-        return
-    print("---------------- Kernel Caches ---------------")
-    for (k, v) in kernel_cache_allocs:
-        print("Cache", str(k.name, "utf-8"), v.alloc_count - v.free_count, v.alloc_size - v.free_size)
-
-
 def gernel_kernel_cache(bpf, top):
     kernel_cache_allocs = list(
         sorted(filter(lambda a: a[1].alloc_size > a[1].free_size, bpf['kernel_cache_counts'].items()),
@@ -237,21 +174,13 @@ def gernel_kernel_cache(bpf, top):
     return caches
 
 
-def print_syscalls(bpf, top):
-    syscall_counts = bpf["syscall_counts"]
-    print("SYSCALL                   COUNT             TIME")
-    for k, v in sorted(syscall_counts.items(), key=lambda kv: -kv[1].total_ns)[:top]:
-        print("%-22s %8d %16.3f" % (system_call_name(k.value), v.count, v.total_ns / 1e3))
-    syscall_counts.clear()
-
-
 def get_syscalls(bpf):
     syscall_counts = bpf["syscall_counts"]
     syscalls = {}
     for k, v in syscall_counts.items():
         key = k.value
         syscall_id = key >> 32
-        thread_id = (1 << 32) & key
+        thread_id = ((1 << 32) - 1) & key
 
         syscalls.setdefault(thread_id, []).append({
             'name': system_call_name(syscall_id),
@@ -268,28 +197,28 @@ def system_call_name(k):
     return syscall_name(k).decode('ascii')
 
 
-def print_time():
-    print("[%s]" % strftime("%H:%M:%S"))
-
-
-def get_syscall_stacks(bpf, pid):
+def get_additional_syscall_info(bpf, pid):
     syscall_counts_stacks = bpf['syscall_counts_stacks']
     syscalls_per_thread = {}
-    stack_ids = []
-    pid_to_syscall_times = {}
+    tid_to_syscall_timestamps = {}
     for k, v in syscall_counts_stacks.items():
-        stack_set = syscalls_per_thread.setdefault(v.pid_tgid, {}).setdefault(system_call_name(v.id),
-                                                                              {'stacks': set(), 'number': 0})
-        stack_set['stacks'].add(v.stack_id)
-        stack_set['number'] += 1
-        stack_ids.append(v.stack_id)
-        pid_to_syscall_times.setdefault(v.pid_tgid, []).append(k.value)
-
-    stacks = {stack_id: get_trace_info_as_list(bpf, pid, bpf['stack_traces'], stack_id) for stack_id in stack_ids}
+        thread_id = ((1 << 32) - 1) & v.pid_tgid
+        stack_dict = syscalls_per_thread.setdefault(thread_id, {}).setdefault(system_call_name(v.id), {})
+        stack_trace = '\n\t'.join(get_trace_info_as_list(bpf, pid, bpf['stack_traces'], v.stack_id))
+        stack_dict[stack_trace] = stack_dict.get(stack_trace, 0) + 1
+        tid_to_syscall_timestamps.setdefault(thread_id, []).append(k.value)
+
+    syscalls_per_thread = {thread_id: {
+        call_name: [
+                {
+                    'stack_trace': trace.split('\n\t'),
+                    'count': count,
+                } for (trace, count) in per_syscall.items()
+            ] for (call_name, per_syscall) in calls.items()}
+        for (thread_id, calls) in syscalls_per_thread.items()}
     return {
-        'syscalls': syscalls_per_thread,
-        'stacks': stacks,
-        'pid_to_syscall_times': pid_to_syscall_times
+        'syscall_stacks': syscalls_per_thread,
+        'tid_to_syscall_timestamps': tid_to_syscall_timestamps
     }
 
 
@@ -300,19 +229,20 @@ def save_snapshot(
         top: int,
         snapshot_dir: str,
         with_memory: bool,
-        with_syscall_stacks: bool,
+        with_syscall_details: bool,
         snapshot_prefix: Optional[str] = None
 ):
     current_time_millis = int(round(time() * 1000))
     snapshot = {'time': current_time_millis}
+
     if with_memory:
         snapshot['memory_stats'] = get_statistics(bpf, pid, min_age_ns, top)
         snapshot['kernel_caches'] = gernel_kernel_cache(bpf, top)
 
     snapshot['syscalls'] = get_syscalls(bpf)
 
-    if with_syscall_stacks:
-        snapshot['syscall_stacks'] = get_syscall_stacks(bpf, pid)
+    if with_syscall_details:
+        snapshot['syscall_details'] = get_additional_syscall_info(bpf, pid)
 
     os.makedirs(snapshot_dir, exist_ok=True)
     with open(get_result_file_name(snapshot_dir, snapshot_prefix or 'snapshot', 'json'), 'w') as outfile:
@@ -328,74 +258,6 @@ def get_result_file_name(dir_name: str, prefix: str, suffix: str):
     return path
 
 
-class Arguments:
-    def __init__(self, args):
-        self.pid = args.pid
-        self.command = args.command
-        self.interval = args.interval
-        self.min_age_ns = 1e6 * args.older
-        self.alloc_sample_every_n = args.alloc_sample_rate
-        self.top = args.top
-        self.min_alloc_size = args.min_alloc_size
-        self.max_alloc_size = args.max_alloc_size
-
-        if args.snapshots is None:
-            self.save_snapshots = './snapshots'
-        elif args.snapshots == -1:
-            self.save_snapshots = None
-        else:
-            self.save_snapshots = args.snapshots
-
-        if self.min_alloc_size is not None and self.max_alloc_size is not None \
-                and self.min_alloc_size > self.max_alloc_size:
-            print("min_size (-z) can't be greater than max_size (-Z)")
-            exit(1)
-
-        if self.command is None and self.pid is None:
-            print("Either -p or -c must be specified")
-            exit(1)
-
-
-def parse():
-    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
-    parser.add_argument("-p", "--pid", type=int, default=-1,
-                        help="the PID to trace; if not specified, trace kernel allocs")
-    parser.add_argument("interval", nargs="?", default=5, type=int,
-                        help="interval in seconds to print outstanding allocations")
-    parser.add_argument("-o", "--older", default=500, type=int,
-                        help="prune allocations younger than this age in milliseconds")
-    parser.add_argument("-c", "--command",
-                        help="execute and trace the specified command")
-    parser.add_argument("-s", "--alloc-sample-rate", default=1, type=int,
-                        help="sample every N-th allocation to decrease the overhead")
-    parser.add_argument("-T", "--top", type=int, default=10,
-                        help="display only this many top stats")
-    parser.add_argument("-z", "--min-alloc-size", type=int,
-                        help="capture only allocations larger than this size")
-    parser.add_argument("-Z", "--max-alloc-size", type=int,
-                        help="capture only allocations smaller than this size")
-    parser.add_argument("-S", "--snapshots", default=-1, type=str, nargs='?',
-                        help="save statistics snapshots to the specified directory")
-    return Arguments(parser.parse_args())
-
-
-def attach_ebpf(pid: int = -1,
-                process: Optional[spawn] = None,
-                command: Optional[str] = None,
-                top: int = 10,
-                interval: int = 5,
-                alloc_sample_every_n: int = 1,
-                max_alloc_size: Optional[int] = None,
-                min_age_ns: int = 500,
-                min_alloc_size: Optional[int] = None,
-                save_snapshots: Optional[str] = None,
-                with_memory: bool = False):
-    (bpf, pid, process) = attach_probes(pid, process, command, alloc_sample_every_n, max_alloc_size, min_alloc_size,
-                                        with_memory)
-
-    harvest_ebpf(bpf, pid, process, top, interval, min_age_ns, save_snapshots)
-
-
 def attach_probes(
         pid: int = -1,
         process: Optional[spawn] = None,
@@ -404,7 +266,7 @@ def attach_probes(
         max_alloc_size: Optional[int] = None,
         min_alloc_size: Optional[int] = None,
         with_memory: bool = False,
-        syscall_stacks: bool = False,
+        syscall_details: bool = False,
         communicate_with_signals: bool = False
 ) -> Optional[Tuple[BPF, int, spawn]]:
     if pid == -1 and command is None and process is None:
@@ -431,7 +293,7 @@ def attach_probes(
                   f"-DPAGE_SIZE={resource.getpagesize()}",
                   f"-DFILTER_BY_SIZE={get_size_filter(min_alloc_size, max_alloc_size)}",
                   "-DWITH_MEMORY" if with_memory else "",
-                  "-DCOLLECT_SYSCALL_STACK_INFO" if syscall_stacks else "",
+                  "-DCOLLECT_SYSCALL_STACK_INFO" if syscall_details else "",
               ])
 
     # Attaching probes
@@ -445,7 +307,6 @@ def attach_probes(
         sleep(wait_interval)
         wait_time += wait_interval
         if process.terminated:
-            print(process.readline().decode('utf-8'))
             raise Exception(f'Process is already terminated, with status code {process.exitstatus}')
         if wait_time > timeout:
             raise Exception('Process is not alive')
@@ -503,16 +364,6 @@ def sleep_and_check(
             break
 
 
-def print_ebpf_info(bpf, pid, min_age_ns, top):
-    print_time()
-    print_memory_statistics(bpf, pid, top)
-    print_outstanding(bpf, pid, top, min_age_ns)
-    print_outstanding_kernel_cache(bpf, top)
-    print_syscalls(bpf, top)
-    print()
-    sys.stdout.flush()
-
-
 def harvest_ebpf(
         bpf: BPF,
         pid: int = -1,
@@ -521,7 +372,7 @@ def harvest_ebpf(
         interval: int = 5,
         min_age_ns: int = 500,
         with_memory: bool = False,
-        with_syscall_stacks: bool = False,
+        with_syscall_details: bool = False,
         save_snapshots: Optional[str] = None,
         snapshot_prefix: Optional[str] = None,
         communicate_with_signals: bool = False,
@@ -538,10 +389,7 @@ def harvest_ebpf(
             sleep_and_check(interval, pid, process, check_delay=0.2)
         except KeyboardInterrupt:
             break
-        if save_snapshots:
-            save_snapshot(bpf, pid, min_age_ns, top, save_snapshots, with_memory, with_syscall_stacks, snapshot_prefix)
-        else:
-            print_ebpf_info(bpf, pid, min_age_ns, top)
+        save_snapshot(bpf, pid, min_age_ns, top, save_snapshots, with_memory, with_syscall_details, snapshot_prefix)
         if is_terminated(pid, process):
             break
 
@@ -551,18 +399,3 @@ def harvest_ebpf(
 
     logging.info("Detaching...")
     bpf.cleanup()
-
-
-if __name__ == "__main__":
-    arguments = parse()
-    attach_ebpf(
-        pid=arguments.pid,
-        command=arguments.command,
-        top=arguments.top,
-        interval=arguments.interval,
-        alloc_sample_every_n=arguments.alloc_sample_every_n,
-        max_alloc_size=arguments.max_alloc_size,
-        min_age_ns=arguments.min_age_ns,
-        min_alloc_size=arguments.min_alloc_size,
-        save_snapshots=arguments.save_snapshots,
-    )
diff --git a/run.py b/run.py
index eaa8cc72..dbe638e8 100755
--- a/run.py
+++ b/run.py
@@ -58,7 +58,7 @@
 run_in_docker_container = False
 with_ebpf = False
 with_ebpf_memory = False
-with_syscall_stacks = False
+with_syscall_details = False
 
 main_dir_path = "./db_main/"
 storage_disk_paths = [
@@ -145,9 +145,8 @@ def run(
         run_index: int,
         runs_count: int,
         with_ebpf: bool,
-        with_ebpf_memory: bool
-,
-        with_syscall_stacks: bool) -> None:
+        with_ebpf_memory: bool,
+        with_syscall_details: bool) -> None:
     db_config_file_path = get_db_config_file_path(db_name, size)
     workloads_file_path = get_workloads_file_path(size)
     db_main_dir_path = get_db_main_dir_path(db_name, size, main_dir_path)
@@ -157,7 +156,7 @@ def run(
     )
 
     transactional_flag = "-t" if transactional else ""
-    lazy_flag = '-l' if with_ebpf else ''
+    lazy_flag = "-l" if with_ebpf else ""
     filter = ",".join(workload_names)
     db_storage_dir_paths = ",".join(db_storage_dir_paths)
 
@@ -177,19 +176,20 @@ def run(
         from ebpf.ebpf import attach_probes, harvest_ebpf
         bpf, pid, process = attach_probes(
             process=process,
+            syscall_details=with_syscall_details,
             with_memory=with_ebpf_memory,
             communicate_with_signals=True,
         )
         # Send SIGUSR1 to the process to notify it that the probes are attached
         os.kill(process.pid, signal.SIGUSR1)
         thread = Thread(target=harvest_ebpf, args=(bpf,), kwargs={
-            'process': process,
-            'interval': 5,
-            'with_memory': with_ebpf_memory,
-            'with_syscall_stacks': with_syscall_stacks,
-            'snapshot_prefix': "-".join(workload_names),
-            'save_snapshots': f'./bench/ebpf/snapshots/{db_name}_{size}',
-            'communicate_with_signals': True,
+            process: process,
+            "interval": 5,
+            "with_memory": with_ebpf_memory,
+            "with_syscall_details": with_syscall_details,
+            "snapshot_prefix": "-".join(workload_names),
+            "save_snapshots": f"./bench/ebpf/snapshots/{db_name}_{size}",
+            "communicate_with_signals": True,
         })
         thread.start()
     if with_ebpf:
@@ -223,7 +223,7 @@ def parse_args():
     global run_in_docker_container
     global with_ebpf
     global with_ebpf_memory
-    global with_syscall_stacks
+    global with_syscall_details
 
     parser = argparse.ArgumentParser()
 
@@ -321,10 +321,10 @@ def parse_args():
     )
     parser.add_argument(
         "-es",
-        "--with-ebpf-syscall-stacks",
+        "--with-ebpf-syscall-details",
         help="Collect eBPF syscall stack traces",
-        default=with_syscall_stacks,
-        dest="with_syscall_stacks",
+        default=with_syscall_details,
+        dest="with_syscall_details",
         action=argparse.BooleanOptionalAction
     )
 
@@ -341,7 +341,7 @@ def parse_args():
     run_in_docker_container = args.run_docker
     with_ebpf = args.with_ebpf
     with_ebpf_memory = args.with_ebpf_memory
-    with_syscall_stacks = args.with_syscall_stacks
+    with_syscall_details = args.with_syscall_details
 
 
 def check_args():
@@ -356,9 +356,9 @@ def check_args():
     if not workload_names:
         sys.exit("Workload name(s) not specified")
     if run_in_docker_container and with_ebpf:
-        sys.exit('Running ebpf benchmarks in docker container is not supported')
+        sys.exit("Running ebpf benchmarks in docker container is not supported")
     if with_ebpf_memory and not with_ebpf:
-        sys.exit('Memory related ebpf benchmarks require ebpf benchmarks to be enabled, run with --with-ebpf flag')
+        sys.exit("Memory related ebpf benchmarks require ebpf benchmarks to be enabled, run with --with-ebpf flag")
 
 
 def main() -> None:
@@ -432,7 +432,7 @@ def main() -> None:
                         len(workload_names),
                         with_ebpf,
                         with_ebpf_memory,
-                        with_syscall_stacks
+                        with_syscall_details
                     )
             else:
                 run(
@@ -449,7 +449,7 @@ def main() -> None:
                     1,
                     with_ebpf,
                     with_ebpf_memory,
-                    with_syscall_stacks
+                    with_syscall_details
                 )
 
 
diff --git a/src/leveldb/leveldb.hpp b/src/leveldb/leveldb.hpp
index 4b72918d..e0aca0e3 100644
--- a/src/leveldb/leveldb.hpp
+++ b/src/leveldb/leveldb.hpp
@@ -195,7 +195,7 @@ operation_result_t leveldb_t::remove(key_t key) {
 
 operation_result_t leveldb_t::read(key_t key, value_span_t value) const {
 
-    // Unlike RocksDB, we can't read into some form fo a `PinnableSlice`,
+    // Unlike RocksDB, we can't read into some form of a `PinnableSlice`,
     // just `std::string`, causing heap allocations.
     std::string data;
     leveldb::Status status = db_->Get(read_options_, to_slice(key), &data);