From dc30d84d4b4caa969f48fe18507756f5d08f3390 Mon Sep 17 00:00:00 2001
From: Ism Hong <ism.hong@gmail.com>
Date: Fri, 3 Oct 2025 17:51:13 +0800
Subject: [PATCH] libbpf-tools/stackcount: Add libbpf-based stackcount tool

This commit introduces libbpf version of stackcount tool based on BCC
Python based stackcount. The usage is the same with original one.

Change-Id: I1b057cded14f5803ebba9840837d903df25257f9
Signed-off-by: Ism Hong <ism.hong@gmail.com>
---
 libbpf-tools/.gitignore       |   1 +
 libbpf-tools/Makefile         |   1 +
 libbpf-tools/stackcount.bpf.c |  87 +++++
 libbpf-tools/stackcount.c     | 584 ++++++++++++++++++++++++++++++++++
 libbpf-tools/stackcount.h     |  15 +
 5 files changed, 688 insertions(+)
 create mode 100644 libbpf-tools/stackcount.bpf.c
 create mode 100644 libbpf-tools/stackcount.c
 create mode 100644 libbpf-tools/stackcount.h

diff --git a/libbpf-tools/.gitignore b/libbpf-tools/.gitignore
index 14e91ec45ad0..b45dfd85685b 100644
--- a/libbpf-tools/.gitignore
+++ b/libbpf-tools/.gitignore
@@ -54,6 +54,7 @@
 /slabratetop
 /softirqs
 /solisten
+/stackcount
 /statsnoop
 /syncsnoop
 /syscount
diff --git a/libbpf-tools/Makefile b/libbpf-tools/Makefile
index a442da985a6e..0df6ffa197a0 100644
--- a/libbpf-tools/Makefile
+++ b/libbpf-tools/Makefile
@@ -80,6 +80,7 @@ APPS = \
 	slabratetop \
 	softirqs \
 	solisten \
+	stackcount \
 	statsnoop \
 	syncsnoop \
 	syscount \
diff --git a/libbpf-tools/stackcount.bpf.c b/libbpf-tools/stackcount.bpf.c
new file mode 100644
index 000000000000..bdc60f5a83cd
--- /dev/null
+++ b/libbpf-tools/stackcount.bpf.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2025, Realtek
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "stackcount.h"
+#include "core_fixes.bpf.h"
+
+#define MAX_ENTRIES 10240
+
+const volatile int target_pid = 0;
+const volatile int target_cpu = -1;
+const volatile bool kernel_stacks_only = false;
+const volatile bool user_stacks_only = false;
+const volatile bool per_pid = false;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, struct key_t);
+	__type(value, u64);
+} counts SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__type(key, u32);
+} stack_traces SEC(".maps");
+
+static int trace_count(void *ctx)
+{
+	if (target_cpu >= 0 && bpf_get_smp_processor_id() != (u32)target_cpu)
+		return 0;
+
+	u32 tgid = bpf_get_current_pid_tgid() >> 32;
+	if (target_pid != 0 && tgid != target_pid)
+		return 0;
+
+	struct key_t key = {};
+	s32 kern_stack_id = -1, user_stack_id = -1;
+
+	if (per_pid) {
+		key.tgid = tgid;
+		bpf_get_current_comm(&key.name, sizeof(key.name));
+	}
+
+	if (!user_stacks_only)
+		kern_stack_id = bpf_get_stackid(ctx, &stack_traces, 0);
+
+	if (!kernel_stacks_only)
+		user_stack_id = bpf_get_stackid(ctx, &stack_traces, BPF_F_USER_STACK);
+
+	key.kernel_stack_id = kern_stack_id;
+	key.user_stack_id = user_stack_id;
+
+	u64 *count;
+
+	count = bpf_map_lookup_elem(&counts, &key);
+	if (count) {
+		__sync_fetch_and_add(count, 1);
+	} else {
+		u64 init_val = 1;
+		bpf_map_update_elem(&counts, &key, &init_val, BPF_ANY);
+	}
+
+	return 0;
+}
+
+SEC("kprobe/dummy")
+int BPF_KPROBE(kprobe_prog)
+{
+	return trace_count(ctx);
+}
+
+SEC("tracepoint/dummy/dummy")
+int BPF_PROG(tp_prog)
+{
+	return trace_count(ctx);
+}
+
+SEC("uprobe/dummy")
+int BPF_KPROBE(uprobe_prog)
+{
+	return trace_count(ctx);
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/libbpf-tools/stackcount.c b/libbpf-tools/stackcount.c
new file mode 100644
index 000000000000..9aecc9ebb7d1
--- /dev/null
+++ b/libbpf-tools/stackcount.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2025, Ism Hong
+ *
+ * Based on stackcount(8) from BCC by Brendan Gregg and others.
+ * 2025-10-13   Ism Hong   Created this.
+ *
+ * TODO:
+ * - Add regex support
+ */
+#include <argp.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <regex.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include <fnmatch.h>
+#include <fcntl.h>
+#include "stackcount.h"
+#include "stackcount.skel.h"
+#include "trace_helpers.h"
+
+#ifndef USE_BLAZESYM
+struct usyms;
+struct usym {
+	const char *name;
+};
+static struct usyms *usyms__new(pid_t pid, const char *path) { return NULL; }
+static void usyms__free(struct usyms *usyms) {}
+static const struct usym *usyms__lookup_addr(struct usyms *usyms, unsigned long long addr) { return NULL; }
+#endif
+
+#define warn(...) fprintf(stderr, __VA_ARGS__)
+
+#define OPT_PERF_MAX_STACK_DEPTH 1
+#define OPT_STACK_STORAGE_SIZE 2
+
+static struct env {
+	pid_t pid;
+	int cpu;
+	long interval;
+	long duration;
+	bool timestamp;
+	bool regexp;
+	bool offset;
+	bool perpid;
+	bool kernel_stacks_only;
+	bool user_stacks_only;
+	bool verbose;
+	bool delimited;
+	bool folded;
+	char *pattern;
+	char *cgroup_path;
+	bool cg;
+	int stack_storage_size;
+	int perf_max_stack_depth;
+} env = {
+	.interval = 99999999,
+	.duration = 99999999,
+	.cpu = -1,
+	.stack_storage_size = 1024,
+	.perf_max_stack_depth = 127,
+};
+
+static volatile bool exiting;
+static struct bpf_link **links = NULL;
+static int num_links = 0;
+
+const char *argp_program_version = "stackcount 0.1";
+const char *argp_program_bug_address =
+	"https://github.com/iovisor/bcc/tree/master/libbpf-tools";
+const char argp_program_doc[] =
+"Count events and their stack traces.\n"
+"\n"
+"USAGE: stackcount [-h] [-p PID] [-c CPU] [-i INTERVAL] [-D DURATION] [-T]"
+"                  [-r] [-s] [-P] [-K] [-U] [-v] [-d] [-f] pattern"
+"\n"
+"EXAMPLES:"
+"    ./stackcount submit_bio         # count kernel stack traces for submit_bio"
+"    ./stackcount -d ip_output       # include a user/kernel stack delimiter"
+"    ./stackcount -s ip_output       # show symbol offsets"
+"    ./stackcount -sv ip_output      # show offsets and raw addresses (verbose)"
+"    ./stackcount 'tcp_send*'        # count stacks for funcs matching tcp_send*"
+"    ./stackcount -r '^tcp_send.*'   # same as above, using regular expressions"
+"    ./stackcount -Ti 5 ip_output    # output every 5 seconds, with timestamps"
+"    ./stackcount -p 185 ip_output   # count ip_output stacks for PID 185 only"
+"    ./stackcount -c 1 put_prev_entity   # count put_prev_entity stacks for CPU 1 only"
+"    ./stackcount -p 185 c:malloc    # count stacks for malloc in PID 185"
+"    ./stackcount t:sched:sched_fork # count stacks for sched_fork tracepoint"
+"    ./stackcount -p 185 u:node:*    # count stacks for all USDT probes in node"
+"    ./stackcount -K t:sched:sched_switch   # kernel stacks only"
+"    ./stackcount -U t:sched:sched_switch   # user stacks only\n";
+
+static const struct argp_option opts[] = {
+	{ "pid", 'p', "PID", 0, "Trace this PID only", 0 },
+	{ "cpu", 'c', "CPU", 0, "Trace this CPU only", 0 },
+	{ "interval", 'i', "SECONDS", 0, "Summary interval, seconds", 0 },
+	{ "duration", 'D', "SECONDS", 0, "Total duration of trace, seconds", 0 },
+	{ "timestamp", 'T', NULL, 0, "Include timestamp on output", 0 },
+	{ "regexp", 'r', NULL, 0, "Use regular expressions. "
+		"Default is '*' wildcards only.", 0 },
+	{ "offset", 's', NULL, 0, "Show address offsets", 0 },
+	{ "perpid", 'P', NULL, 0, "Display stacks separately for each process", 0 },
+	{ "kernel-stacks-only", 'K', NULL, 0, "kernel stack only", 0 },
+	{ "user-stacks-only", 'U', NULL, 0, "user stack only", 0 },
+	{ "verbose", 'v', NULL, 0, "Show raw addresses", 0 },
+	{ "delimited", 'd', NULL, 0, "Insert delimiter between kernel/user stacks", 0 },
+	{ "folded", 'f', NULL, 0, "Output folded format", 0 },
+	{ "cgroup", 'C', "/sys/fs/cgroup/unified", 0, "Trace process in cgroup path", 0 },
+	{ "stack-storage-size", OPT_STACK_STORAGE_SIZE, "SIZE", 0,
+		"The number of unique stack traces that can be stored and displayed "
+		"(default 1024)", 0 },
+	{ "perf-max-stack-depth", OPT_PERF_MAX_STACK_DEPTH, "DEPTH", 0,
+		"The limit for both kernel and user stack traces (default 127)", 0 },
+	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help", 0 },
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	static int pos_args;
+
+	switch (key) {
+	case 'h':
+		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+		break;
+	case 'p':
+		env.pid = strtol(arg, NULL, 10);
+		break;
+	case 'c':
+		env.cpu = strtol(arg, NULL, 10);
+		break;
+	case 'i':
+		env.interval = strtol(arg, NULL, 10);
+		break;
+	case 'D':
+		env.duration = strtol(arg, NULL, 10);
+		break;
+	case 'T':
+		env.timestamp = true;
+		break;
+	case 'r':
+		env.regexp = true;
+		break;
+	case 's':
+		env.offset = true;
+		break;
+	case 'P':
+		env.perpid = true;
+		break;
+	case 'K':
+		env.kernel_stacks_only = true;
+		break;
+	case 'U':
+		env.user_stacks_only = true;
+		break;
+	case 'v':
+		env.verbose = true;
+		break;
+	case 'd':
+		env.delimited = true;
+		break;
+	case 'f':
+		env.folded = true;
+		break;
+	case 'C':
+		env.cgroup_path = arg;
+		env.cg = true;
+		break;
+	case OPT_STACK_STORAGE_SIZE:
+		env.stack_storage_size = strtol(arg, NULL, 10);
+		break;
+	case OPT_PERF_MAX_STACK_DEPTH:
+		env.perf_max_stack_depth = strtol(arg, NULL, 10);
+		break;
+	case ARGP_KEY_ARG:
+		if (pos_args++) {
+			warn("Unrecognized positional argument: %s\n", arg);
+			argp_usage(state);
+		}
+		env.pattern = arg;
+		break;
+	case ARGP_KEY_END:
+		if (!pos_args) {
+			warn("Pattern required.\n");
+			argp_usage(state);
+		}
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+	return 0;
+}
+
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+	if (level == LIBBPF_DEBUG && !env.verbose)
+		return 0;
+	return vfprintf(stderr, format, args);
+}
+
+static void sig_handler(int sig)
+{
+	exiting = true;
+}
+
+struct count_info {
+	struct key_t key;
+	__u64 value;
+};
+
+static int sort_by_value(const void *a, const void *b)
+{
+	const struct count_info *A = a;
+	const struct count_info *B = b;
+
+	return A->value - B->value;
+}
+
+static void print_stacks(struct stackcount_bpf *skel, struct ksyms *ksyms, struct usyms *usyms)
+{
+	int counts_fd = bpf_map__fd(skel->maps.counts);
+	int stack_traces_fd = bpf_map__fd(skel->maps.stack_traces);
+	struct key_t *lookup_key = NULL, next_key;
+	__u64 value;
+	static __u64 *stack;
+	size_t i;
+
+	stack = calloc(env.perf_max_stack_depth, sizeof(__u64));
+	if (!stack) {
+		warn("failed to allocate stack array\n");
+		return;
+	}
+
+	if (env.timestamp) {
+		char ts[32];
+		time_t t;
+
+		time(&t);
+		strftime(ts, sizeof(ts), "%H:%M:%S", localtime(&t));
+		printf("%-8s\n", ts);
+	}
+
+	// Read map and sort
+	struct count_info *items = NULL;
+	size_t items_size = 0;
+	size_t items_capacity = 0;
+
+	while (bpf_map_get_next_key(counts_fd, lookup_key, &next_key) == 0) {
+		if (bpf_map_lookup_elem(counts_fd, &next_key, &value) != 0) {
+			warn("bpf_map_lookup_elem failed\n");
+			goto cleanup;
+		}
+
+		if (items_size >= items_capacity) {
+			items_capacity = items_capacity == 0 ? 64 : items_capacity * 2;
+			struct count_info *new_items =
+					realloc(items, items_capacity * sizeof(*items));
+			if (!new_items) {
+				warn("realloc failed\n");
+				goto cleanup;
+			}
+			items = new_items;
+		}
+		items[items_size].key = next_key;
+		items[items_size].value = value;
+		items_size++;
+
+		lookup_key = &next_key;
+	}
+
+	qsort(items, items_size, sizeof(*items), sort_by_value);
+
+	// Print sorted stacks
+	for (i = 0; i < items_size; i++) {
+		if (env.folded) {
+			// print folded stack output
+			printf("%s;", items[i].key.name);
+			if (usyms && items[i].key.user_stack_id >= 0) {
+				if (bpf_map_lookup_elem(stack_traces_fd, &items[i].key.user_stack_id, stack) != 0) {
+					warn("failed to lookup user stack table\n");
+					continue;
+				}
+				for (int j = 0; j < env.perf_max_stack_depth && stack[j]; j++) {
+					const struct usym *sym = usyms__lookup_addr(usyms, stack[j]);
+					printf("%s;", sym ? sym->name : "[unknown]");
+				}
+			}
+			if (env.delimited)
+				printf("-;");
+
+			if (items[i].key.kernel_stack_id >= 0) {
+				if (bpf_map_lookup_elem(stack_traces_fd, &items[i].key.kernel_stack_id, stack) != 0) {
+					warn("failed to lookup kernel stack table, id = %d: %s\n",
+							items[i].key.kernel_stack_id, strerror(errno));
+					continue;
+				}
+				for (int j = 0; j < env.perf_max_stack_depth && stack[j]; j++) {
+					const struct ksym *sym = ksyms__map_addr(ksyms, stack[j]);
+					printf("%s;", sym ? sym->name : "[unknown]");
+				}
+			}
+			printf(" %llu\n", items[i].value);
+		} else {
+			// print multi-line stack output
+			if (items[i].key.kernel_stack_id >= 0) {
+				if (bpf_map_lookup_elem(stack_traces_fd, &items[i].key.kernel_stack_id, stack) != 0) {
+					warn("failed to lookup kernel stack table\n");
+					continue;
+				}
+				for (int j = 0; j < env.perf_max_stack_depth && stack[j]; j++) {
+					const struct ksym *sym = ksyms__map_addr(ksyms, stack[j]);
+
+					if (env.verbose) {
+						if (env.offset && sym)
+							printf("    %p %s+0x%llx\n",
+									(void *)stack[j], sym->name,
+									stack[j] - sym->addr);
+						else
+							printf("    %p %s\n",
+									(void *)stack[j],
+									sym ? sym->name : "[unknown]");
+					} else {
+						if (env.offset && sym)
+							printf("    %s+0x%llx\n",
+									sym->name,
+									stack[j] - sym->addr);
+						else
+							printf("    %s\n",
+									sym ? sym->name : "[unknown]");
+					}
+				}
+			}
+
+			if (env.delimited)
+				printf("    --\n");
+
+			if (usyms && items[i].key.user_stack_id >= 0) {
+				if (bpf_map_lookup_elem(stack_traces_fd, &items[i].key.user_stack_id, stack) != 0) {
+					warn("failed to lookup user stack table\n");
+					continue;
+				}
+				for (int j = 0; j < env.perf_max_stack_depth && stack[j]; j++) {
+					const struct usym *sym = usyms__lookup_addr(usyms, stack[j]);
+					if (env.verbose)
+						printf("    %p %s\n", (void *)stack[j], sym ? sym->name : "[unknown]");
+					else
+						printf("    %s\n", sym ? sym->name : "[unknown]");
+				}
+			}
+			if (env.perpid)
+				printf("    %s [%d]\n", items[i].key.name, items[i].key.tgid);
+
+			printf("    %llu\n\n", items[i].value);
+		}
+	}
+
+cleanup:
+	free(stack);
+	free(items);
+	// Clear map for next interval
+	lookup_key = NULL;
+	while (bpf_map_get_next_key(counts_fd, lookup_key, &next_key) == 0) {
+		bpf_map_delete_elem(counts_fd, &next_key);
+		lookup_key = &next_key;
+	}
+}
+
+static int attach_kprobes(struct stackcount_bpf *skel)
+{
+	char *p = strdup(env.pattern);
+	int err = 0;
+	FILE *f;
+
+	f = fopen("/proc/kallsyms", "r");
+	if (!f)
+		return -errno;
+
+	char line[256];
+	while (fgets(line, sizeof(line), f)) {
+		char type;
+		char name[256];
+		unsigned long long addr;
+
+		if (sscanf(line, "%llx %c %s", &addr, &type, name) != 3)
+			continue;
+
+		if (type != 'T' && type != 't')
+			continue;
+
+		if (fnmatch(p, name, 0) == 0) {
+			struct bpf_link *link = bpf_program__attach_kprobe(skel->progs.kprobe_prog, false, name);
+			if (!link) {
+				err = -1;
+				goto out;
+			}
+			num_links++;
+			links = realloc(links, num_links * sizeof(*links));
+			links[num_links - 1] = link;
+		}
+	}
+
+out:
+	fclose(f);
+	free(p);
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	static const struct argp argp = {
+		.options = opts,
+		.parser = parse_arg,
+		.doc = argp_program_doc,
+	};
+	struct stackcount_bpf *skel;
+	struct ksyms *ksyms = NULL;
+	struct usyms *usyms = NULL;
+	int err;
+	char *p;
+	char *probe_type, *library, *probe;
+	int cgroup_fd = -1;
+
+	err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+	if (err)
+		return err;
+
+	if (env.kernel_stacks_only && env.user_stacks_only) {
+		warn("-K and -U are mutually exclusive.\n");
+		return 1;
+	}
+
+#ifndef USE_BLAZESYM
+	if (env.user_stacks_only || !env.kernel_stacks_only) {
+		warn("user stacks not supported without blazesym, kernel stacks will be used\n");
+		env.user_stacks_only = false;
+		env.kernel_stacks_only = true;
+	}
+#endif
+
+	libbpf_set_print(libbpf_print_fn);
+
+	skel = stackcount_bpf__open();
+	if (!skel) {
+		warn("failed to open BPF object\n");
+		return 1;
+	}
+
+	bpf_map__set_value_size(skel->maps.stack_traces,
+						env.perf_max_stack_depth * sizeof(unsigned long));
+	bpf_map__set_max_entries(skel->maps.stack_traces, env.stack_storage_size);
+
+	skel->rodata->target_pid = env.pid;
+	skel->rodata->target_cpu = env.cpu;
+	skel->rodata->kernel_stacks_only = env.kernel_stacks_only;
+	skel->rodata->user_stacks_only = env.user_stacks_only;
+	skel->rodata->per_pid = env.perpid;
+
+	p = strdup(env.pattern);
+	probe_type = strtok(p, ":");
+	if (!probe_type) {
+		warn("invalid pattern: %s\n", env.pattern);
+		free(p);
+		return 1;
+	}
+
+	if (strcmp(probe_type, "t") == 0) {
+		bpf_program__set_autoload(skel->progs.kprobe_prog, false);
+		bpf_program__set_autoload(skel->progs.uprobe_prog, false);
+	} else if (strcmp(probe_type, "u") == 0) {
+		bpf_program__set_autoload(skel->progs.kprobe_prog, false);
+		bpf_program__set_autoload(skel->progs.tp_prog, false);
+	} else { // kprobe
+		bpf_program__set_autoload(skel->progs.tp_prog, false);
+		bpf_program__set_autoload(skel->progs.uprobe_prog, false);
+	}
+
+	if (env.cg) {
+		cgroup_fd = open(env.cgroup_path, O_RDONLY);
+		if (cgroup_fd < 0) {
+			warn("failed to open cgroup path: %s\n", env.cgroup_path);
+			goto cleanup;
+		}
+		bpf_program__attach_cgroup(skel->progs.kprobe_prog, cgroup_fd);
+		bpf_program__attach_cgroup(skel->progs.tp_prog, cgroup_fd);
+		bpf_program__attach_cgroup(skel->progs.uprobe_prog, cgroup_fd);
+	}
+
+	err = stackcount_bpf__load(skel);
+	if (err) {
+		warn("failed to load BPF object: %d\n", err);
+		goto cleanup;
+	}
+
+	// re-parse pattern for attach
+	free(p);
+	p = strdup(env.pattern);
+	probe_type = strtok(p, ":");
+
+	if (strcmp(probe_type, "t") == 0) {
+		char *category = strtok(NULL, ":");
+		char *event = strtok(NULL, ":");
+		struct bpf_link *link = bpf_program__attach_tracepoint(skel->progs.tp_prog,
+										category, event);
+		if (!link) {
+			err = -errno;
+			warn("failed to attach tracepoint: %d\n", err);
+			goto cleanup;
+		}
+		num_links++;
+		links = realloc(links, num_links * sizeof(*links));
+		links[num_links - 1] = link;
+	} else if (strcmp(probe_type, "u") == 0) {
+		library = strtok(NULL, ":");
+		probe = strtok(NULL, ":");
+		if (!library || !probe) {
+			warn("invalid uprobe pattern\n");
+			err = -1;
+			goto cleanup;
+		}
+		struct bpf_uprobe_opts opts = { .sz = sizeof(opts),
+										.func_name = probe,
+										.retprobe = false };
+		struct bpf_link *link = bpf_program__attach_uprobe_opts(
+			skel->progs.uprobe_prog, env.pid ?: -1, library, 0, &opts);
+		if (!link) {
+			err = -errno;
+			warn("failed to attach uprobe: %d\n", err);
+			goto cleanup;
+		}
+		num_links++;
+		links = realloc(links, num_links * sizeof(*links));
+		links[num_links - 1] = link;
+	} else {
+		err = attach_kprobes(skel);
+		if (err) {
+			warn("failed to attach kprobes\n");
+			goto cleanup;
+		}
+	}
+
+
+	ksyms = ksyms__load();
+	if (!ksyms) {
+		warn("failed to load kallsyms\n");
+		goto cleanup;
+	}
+	usyms = usyms__new(env.pid ?: -1, NULL);
+	if (!usyms && (env.user_stacks_only || !env.kernel_stacks_only)) {
+		warn("failed to load usyms\n");
+		goto cleanup;
+	}
+
+	printf("Tracing... Hit Ctrl-C to end.\n");
+
+	signal(SIGINT, sig_handler);
+	signal(SIGTERM, sig_handler);
+
+	while (!exiting) {
+		sleep(env.interval);
+		if (env.duration-- == 0)
+			break;
+		print_stacks(skel, ksyms, usyms);
+	}
+
+	printf("Detaching...\n");
+
+cleanup:
+	if (cgroup_fd >= 0)
+		close(cgroup_fd);
+	for (int i = 0; i < num_links; i++)
+		bpf_link__destroy(links[i]);
+	free(links);
+	stackcount_bpf__destroy(skel);
+	ksyms__free(ksyms);
+	usyms__free(usyms);
+	free(p);
+
+	return err != 0;
+}
diff --git a/libbpf-tools/stackcount.h b/libbpf-tools/stackcount.h
new file mode 100644
index 000000000000..3d20b19a741d
--- /dev/null
+++ b/libbpf-tools/stackcount.h
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#ifndef __STACKCOUNT_H
+#define __STACKCOUNT_H
+
+#define TASK_COMM_LEN 16
+#define MAX_STACK_DEPTH 127
+
+struct key_t {
+	__u32 tgid;
+	__s32 kernel_stack_id;
+	__s32 user_stack_id;
+	char name[TASK_COMM_LEN];
+};
+
+#endif /* __STACKCOUNT_H */