diff --git a/libbpf-tools/tcppktlat.bpf.c b/libbpf-tools/tcppktlat.bpf.c index 24b4c8ff3aa7..8180f2618892 100644 --- a/libbpf-tools/tcppktlat.bpf.c +++ b/libbpf-tools/tcppktlat.bpf.c @@ -5,18 +5,22 @@ #include #include +#include "bits.bpf.h" #include "compat.bpf.h" #include "core_fixes.bpf.h" +#include "maps.bpf.h" #include "tcppktlat.h" -#define MAX_ENTRIES 10240 -#define AF_INET 2 +#define MAX_ENTRIES 10240 +#define AF_INET 2 const volatile pid_t targ_pid = 0; const volatile pid_t targ_tid = 0; const volatile __u16 targ_sport = 0; const volatile __u16 targ_dport = 0; const volatile __u64 targ_min_us = 0; +const volatile bool targ_hist = false; +const volatile bool targ_per_thread = false; struct { __uint(type, BPF_MAP_TYPE_HASH); @@ -25,6 +29,15 @@ struct { __type(value, u64); } start SEC(".maps"); +static struct hist zero; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, u32); + __type(value, struct hist); +} hists SEC(".maps"); + static int handle_tcp_probe(struct sock *sk, struct sk_buff *skb) { const struct inet_sock *inet = (struct inet_sock *)(sk); @@ -33,9 +46,10 @@ static int handle_tcp_probe(struct sock *sk, struct sk_buff *skb) if (targ_sport && targ_sport != BPF_CORE_READ(inet, inet_sport)) return 0; - if (targ_dport && targ_dport != BPF_CORE_READ(sk, __sk_common.skc_dport)) + if (targ_dport && + targ_dport != BPF_CORE_READ(sk, __sk_common.skc_dport)) return 0; - th = (const struct tcphdr*)BPF_CORE_READ(skb, data); + th = (const struct tcphdr *)BPF_CORE_READ(skb, data); doff = BPF_CORE_READ_BITFIELD_PROBED(th, doff); len = BPF_CORE_READ(skb, len); /* `doff * 4` means `__tcp_hdrlen` */ @@ -47,32 +61,52 @@ static int handle_tcp_probe(struct sock *sk, struct sk_buff *skb) return 0; } -static int handle_tcp_rcv_space_adjust(void *ctx, struct sock *sk) +static int handle_hist_event(struct sock *sk, u32 pid, u32 tid, s64 delta_us) +{ + struct hist *histp; + struct task_struct *current; + u32 hkey; + u64 slot; + + /* Use TID for per-thread, PID (tgid) for per-process */ + if (targ_per_thread) + hkey = tid; + else + hkey = pid; + + histp = bpf_map_lookup_or_try_init(&hists, &hkey, &zero); + if (!histp) + return -1; + + /* Store comm if not already set */ + if (!histp->comm[0]) { + if (targ_per_thread) { + /* For per-thread, use current thread comm */ + bpf_get_current_comm(&histp->comm, TASK_COMM_LEN); + } else { + /* For per-process, use process group leader comm */ + current = (struct task_struct *)bpf_get_current_task(); + BPF_CORE_READ_STR_INTO(&histp->comm, current, + group_leader, comm); + } + } + slot = log2l(delta_us); + if (slot >= MAX_SLOTS) + slot = MAX_SLOTS - 1; + __sync_fetch_and_add(&histp->slots[slot], 1); + return 0; +} + +static int handle_event_output(void *ctx, struct sock *sk, u32 pid, u32 tid, + s64 delta_us) { const struct inet_sock *inet = (struct inet_sock *)(sk); - u64 sock_ident = get_sock_ident(sk); - u64 id = bpf_get_current_pid_tgid(), *tsp; - u32 pid = id >> 32, tid = id; struct event *eventp; - s64 delta_us; u16 family; - tsp = bpf_map_lookup_elem(&start, &sock_ident); - if (!tsp) - return 0; - - if (targ_pid && targ_pid != pid) - goto cleanup; - if (targ_tid && targ_tid != tid) - goto cleanup; - - delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; - if (delta_us < 0 || delta_us <= targ_min_us) - goto cleanup; - eventp = reserve_buf(sizeof(*eventp)); if (!eventp) - goto cleanup; + return -1; eventp->pid = pid; eventp->tid = tid; @@ -85,11 +119,41 @@ static int handle_tcp_rcv_space_adjust(void *ctx, struct sock *sk) eventp->saddr[0] = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); eventp->daddr[0] = BPF_CORE_READ(sk, __sk_common.skc_daddr); } else { /* family == AF_INET6 */ - BPF_CORE_READ_INTO(eventp->saddr, sk, __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); - BPF_CORE_READ_INTO(eventp->daddr, sk, __sk_common.skc_v6_daddr.in6_u.u6_addr32); + BPF_CORE_READ_INTO( + eventp->saddr, sk, + __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); + BPF_CORE_READ_INTO(eventp->daddr, sk, + __sk_common.skc_v6_daddr.in6_u.u6_addr32); } eventp->family = family; submit_buf(ctx, eventp, sizeof(*eventp)); + return 0; +} + +static int handle_tcp_rcv_space_adjust(void *ctx, struct sock *sk) +{ + u64 sock_ident = get_sock_ident(sk); + u64 id = bpf_get_current_pid_tgid(), *tsp; + u32 pid = id >> 32, tid = id; + s64 delta_us; + + tsp = bpf_map_lookup_elem(&start, &sock_ident); + if (!tsp) + return 0; + + if (targ_pid && targ_pid != pid) + goto cleanup; + if (targ_tid && targ_tid != tid) + goto cleanup; + + delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; + if (delta_us < 0 || delta_us <= targ_min_us) + goto cleanup; + + if (targ_hist) + handle_hist_event(sk, pid, tid, delta_us); + else + handle_event_output(ctx, sk, pid, tid, delta_us); cleanup: bpf_map_delete_elem(&start, &sock_ident); @@ -123,7 +187,8 @@ int BPF_PROG(tcp_destroy_sock_btf, struct sock *sk) } SEC("raw_tp/tcp_probe") -int BPF_PROG(tcp_probe, struct sock *sk, struct sk_buff *skb) { +int BPF_PROG(tcp_probe, struct sock *sk, struct sk_buff *skb) +{ return handle_tcp_probe(sk, skb); } diff --git a/libbpf-tools/tcppktlat.c b/libbpf-tools/tcppktlat.c index d0f966f5bb0c..5d46349f6245 100644 --- a/libbpf-tools/tcppktlat.c +++ b/libbpf-tools/tcppktlat.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,9 @@ #include "tcppktlat.skel.h" #include "compat.h" #include "trace_helpers.h" +#include "map_helpers.h" + +#define DEFAULT_INTERVAL 99999999 /* Only print on Ctrl-C by default */ static struct env { pid_t pid; @@ -25,7 +29,14 @@ static struct env { __u16 rport; bool timestamp; bool verbose; -} env = {}; + bool histogram; + bool per_thread; + __u32 interval; + int times; +} env = { + .interval = DEFAULT_INTERVAL, + .times = DEFAULT_INTERVAL, +}; static volatile sig_atomic_t exiting = 0; static int column_width = 15; @@ -34,23 +45,36 @@ const char *argp_program_version = "tcppktlat 0.1"; const char *argp_program_bug_address = "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; const char argp_program_doc[] = -"Trace latency between TCP received pkt and picked up by userspace thread.\n" -"\n" -"USAGE: tcppktlat [--help] [-T] [-p PID] [-t TID] [-l LPORT] [-r RPORT] [-w] [-v]\n" -"\n" -"EXAMPLES:\n" -" tcppktlat # Trace all TCP packet picked up latency\n" -" tcppktlat -T # summarize with timestamps\n" -" tcppktlat -p # filter for pid\n" -" tcppktlat -t # filter for tid\n" -" tcppktlat -l # filter for local port\n" -" tcppktlat -r # filter for remote port\n" -" tcppktlat 1000 # filter for latency higher than 1000us"; + "Trace latency between TCP received pkt and picked up by userspace thread.\n" + "\n" + "USAGE: tcppktlat [--help] [-T] [-H] [-L] [-p PID] [-t TID] [-l LPORT] [-r RPORT] [-w] [-v]\n" + " [min_us | interval [count]]\n" + "\n" + "Positional args:\n" + " min_us Minimum latency filter (microseconds) when not using -H\n" + " interval [count] With -H, interval is the histogram print interval (seconds)\n" + " and count limits how many times histograms are printed\n" + "\n" + "EXAMPLES:\n" + " tcppktlat # Trace all TCP packet picked up latency\n" + " tcppktlat -T # summarize with timestamps\n" + " tcppktlat -H # show latency histogram\n" + " tcppktlat -H 5 # show latency histogram, print every 5 seconds\n" + " tcppktlat -H 1 5 # show latency histogram, print every 1 second, 5 times\n" + " tcppktlat -H -L # show latency histogram per thread\n" + " tcppktlat -p # filter for pid\n" + " tcppktlat -t # filter for tid\n" + " tcppktlat -l # filter for local port\n" + " tcppktlat -r # filter for remote port\n" + " tcppktlat 1000 # filter for latency higher than 1000us"; static const struct argp_option opts[] = { { "pid", 'p', "PID", 0, "Process PID to trace", 0 }, { "tid", 't', "TID", 0, "Thread TID to trace", 0 }, { "timestamp", 'T', NULL, 0, "include timestamp on output", 0 }, + { "histogram", 'H', NULL, 0, + "Show latency histogram. Positional args become interval/count", 0 }, + { "threads", 'L', NULL, 0, "Print a histogram per thread ID", 0 }, { "lport", 'l', "LPORT", 0, "filter for local port", 0 }, { "rport", 'r', "RPORT", 0, "filter for remote port", 0 }, { "verbose", 'v', NULL, 0, "Verbose debug output", 0 }, @@ -75,6 +99,13 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'T': env.timestamp = true; break; + case 'H': + env.histogram = true; + /* Interval/count, if any, are parsed from positional args */ + break; + case 'L': + env.per_thread = true; + break; case 'p': errno = 0; pid = strtol(arg, NULL, 10); @@ -115,18 +146,47 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) column_width = 26; break; case ARGP_KEY_ARG: - if (pos_args++) { - fprintf(stderr, - "Unrecognized positional argument: %s\n", arg); - argp_usage(state); + if (env.histogram) { + /* For histogram mode, positional args are interval and count */ + errno = 0; + if (pos_args == 0) { + env.interval = strtoul(arg, NULL, 10); + if (errno || env.interval == 0) { + fprintf(stderr, + "Invalid interval: %s\n", arg); + argp_usage(state); + } + } else if (pos_args == 1) { + env.times = strtol(arg, NULL, 10); + if (errno || env.times <= 0) { + fprintf(stderr, "Invalid count: %s\n", + arg); + argp_usage(state); + } + } else { + fprintf(stderr, + "Unrecognized positional argument: %s\n", + arg); + argp_usage(state); + } + pos_args++; + } else { + /* For non-histogram mode, positional arg is min_us */ + if (pos_args++) { + fprintf(stderr, + "Unrecognized positional argument: %s\n", + arg); + argp_usage(state); + } + errno = 0; + min_us = strtoll(arg, NULL, 10); + if (errno || min_us <= 0) { + fprintf(stderr, "Invalid delay (in us): %s\n", + arg); + argp_usage(state); + } + env.min_us = min_us; } - errno = 0; - min_us = strtoll(arg, NULL, 10); - if (errno || min_us <= 0) { - fprintf(stderr, "Invalid delay (in us): %s\n", arg); - argp_usage(state); - } - env.min_us = min_us; break; default: return ARGP_ERR_UNKNOWN; @@ -134,7 +194,8 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return 0; } -static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, + va_list args) { if (level == LIBBPF_DEBUG && !env.verbose) return 0; @@ -159,9 +220,9 @@ static int handle_event(void *ctx, void *data, size_t data_sz) inet_ntop(e->family, &e->saddr, saddr, sizeof(saddr)); inet_ntop(e->family, &e->daddr, daddr, sizeof(daddr)); - printf("%-7d %-7d %-16s %-*s %-5d %-*s %-5d %-.2f\n", - e->pid, e->tid, e->comm, column_width, saddr, ntohs(e->sport), column_width, daddr, - ntohs(e->dport), e->delta_us / 1000.0); + printf("%-7d %-7d %-16s %-*s %-5d %-*s %-5d %-.2f\n", e->pid, e->tid, + e->comm, column_width, saddr, ntohs(e->sport), column_width, + daddr, ntohs(e->dport), e->delta_us / 1000.0); return 0; } @@ -171,6 +232,184 @@ static void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu); } +static int calc_hist_stats(__u32 *slots, int slots_size, double *min, + double *max, double *avg, double *mean, double *p95, + double *p99) +{ + unsigned long long total = 0; + unsigned long long sum = 0; + int i, min_slot = -1, max_slot = -1; + unsigned long long low, high, mid; + + /* Calculate total count and find min/max slots */ + for (i = 0; i < slots_size; i++) { + if (slots[i] > 0) { + if (min_slot < 0) + min_slot = i; + max_slot = i; + total += slots[i]; + } + } + + if (min_slot < 0) { + /* No data available */ + return -1; + } + + /* Calculate min (low bound of first slot) */ + low = (1ULL << (min_slot + 1)) >> 1; + if (low == (1ULL << (min_slot + 1)) - 1) + low -= 1; + *min = low; + + /* Calculate max (high bound of last slot) */ + high = (1ULL << (max_slot + 1)) - 1; + *max = high; + + /* Calculate avg (average) */ + for (i = 0; i < slots_size; i++) { + if (slots[i] > 0) { + low = (1ULL << (i + 1)) >> 1; + high = (1ULL << (i + 1)) - 1; + if (low == high) + low -= 1; + /* Use midpoint of the range as representative value */ + mid = (low + high) / 2; + sum += mid * slots[i]; + } + } + *avg = total > 0 ? (double)sum / total : 0; + + /* Calculate mean (p50, median) and percentiles */ + unsigned long long p50_count = total * 50 / 100; + unsigned long long p95_count = total * 95 / 100; + unsigned long long p99_count = total * 99 / 100; + unsigned long long cumsum = 0; + + *mean = *p95 = *p99 = 0; + for (i = 0; i < slots_size; i++) { + if (slots[i] > 0) { + cumsum += slots[i]; + low = (1ULL << (i + 1)) >> 1; + high = (1ULL << (i + 1)) - 1; + if (low == high) + low -= 1; + + if (*mean == 0 && cumsum >= p50_count) { + *mean = high; /* Use high bound for median */ + } + if (*p95 == 0 && cumsum >= p95_count) { + *p95 = high; /* Use high bound for percentile */ + } + if (*p99 == 0 && cumsum >= p99_count) { + *p99 = high; /* Use high bound for percentile */ + } + } + } + return 0; +} + +static int print_hist(struct bpf_map *hists_map) +{ + const char *units = "usecs"; + int err, fd = bpf_map__fd(hists_map); + __u32 keys[MAX_ENTRIES]; + struct hist values[MAX_ENTRIES]; + __u32 count = MAX_ENTRIES; + __u32 invalid_key = -1; + double min, max, avg, mean, p95, p99; + char ts[32]; + static time_t start_time = 0; + time_t now = time(NULL); + int i; + + /* Print timestamp header for interval-based output */ + if (env.interval < DEFAULT_INTERVAL) { + if (start_time == 0) { + start_time = now; + } + str_timestamp("%Y-%m-%d %H:%M:%S", ts, sizeof(ts)); + printf("[%s] (elapsed: %ld seconds)\n", ts, now - start_time); + } + + /* Use atomic lookup_and_delete to avoid race conditions */ + err = dump_hash(fd, keys, sizeof(__u32), values, sizeof(struct hist), + &count, &invalid_key, true); + if (err) { + fprintf(stderr, "failed to dump hist map: %d\n", err); + return -1; + } + + /* Print all histograms */ + for (i = 0; i < count; i++) { + if (env.timestamp) { + str_timestamp("%H:%M:%S", ts, sizeof(ts)); + printf("%-8s ", ts); + } + + if (env.per_thread) + printf("\ntid = %d %s\n", keys[i], values[i].comm); + else + printf("\npid = %d %s\n", keys[i], values[i].comm); + print_log2_hist(values[i].slots, MAX_SLOTS, units); + + /* Calculate and print statistics */ + err = calc_hist_stats(values[i].slots, MAX_SLOTS, &min, &max, + &avg, &mean, &p95, &p99); + if (err == 0) { + printf(" min = %.2f %s, max = %.2f %s, mean = %.2f %s, " + "avg = %.2f %s, p95 = %.2f %s, p99 = %.2f %s\n", + min, units, max, units, mean, units, avg, units, + p95, units, p99, units); + } + } + + return 0; +} + +static void run_histogram_mode(struct tcppktlat_bpf *obj) +{ + bool is_interval_mode = (env.interval < DEFAULT_INTERVAL); + + while (!exiting && env.times > 0) { + if (is_interval_mode) { + sleep(env.interval); + if (!exiting) { + printf("\n"); + print_hist(obj->maps.hists); + env.times--; + } + } else { + /* Default: wait for Ctrl-C */ + sleep(1); + } + } + + /* Print histogram on exit only for default mode (no interval specified) */ + if (!is_interval_mode) { + printf("\n"); + print_hist(obj->maps.hists); + } +} + +static int run_event_mode(struct bpf_buffer *buf) +{ + int err = 0; + + while (!exiting) { + err = bpf_buffer__poll(buf, POLL_TIMEOUT_MS); + if (err < 0 && err != -EINTR) { + fprintf(stderr, "error polling ring/perf buffer: %s\n", + strerror(-err)); + return err; + } + /* reset err to return 0 if exiting */ + err = 0; + } + + return err; +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -186,6 +425,12 @@ int main(int argc, char **argv) if (err) return err; + if (env.per_thread && !env.histogram) { + fprintf(stderr, + "Error: -L option requires -H (histogram mode)\n"); + return 1; + } + libbpf_set_print(libbpf_print_fn); obj = tcppktlat_bpf__open(); @@ -199,6 +444,8 @@ int main(int argc, char **argv) obj->rodata->targ_sport = env.lport; obj->rodata->targ_dport = env.rport; obj->rodata->targ_min_us = env.min_us; + obj->rodata->targ_hist = env.histogram; + obj->rodata->targ_per_thread = env.per_thread; buf = bpf_buffer__new(obj->maps.events, obj->maps.heap); if (!buf) { @@ -209,17 +456,22 @@ int main(int argc, char **argv) if (probe_tp_btf("tcp_probe")) { bpf_program__set_autoload(obj->progs.tcp_probe, false); - bpf_program__set_autoload(obj->progs.tcp_rcv_space_adjust, false); + bpf_program__set_autoload(obj->progs.tcp_rcv_space_adjust, + false); bpf_program__set_autoload(obj->progs.tcp_destroy_sock, false); } else { bpf_program__set_autoload(obj->progs.tcp_probe_btf, false); - bpf_program__set_autoload(obj->progs.tcp_rcv_space_adjust_btf, false); - bpf_program__set_autoload(obj->progs.tcp_destroy_sock_btf, false); + bpf_program__set_autoload(obj->progs.tcp_rcv_space_adjust_btf, + false); + bpf_program__set_autoload(obj->progs.tcp_destroy_sock_btf, + false); } err = tcppktlat_bpf__load(obj); if (err) { - fprintf(stderr, "failed to load BPF object: %d, maybe your kernel doesn't support `bpf_get_socket_cookie`\n", err); + fprintf(stderr, + "failed to load BPF object: %d, maybe your kernel doesn't support `bpf_get_socket_cookie`\n", + err); goto cleanup; } @@ -236,24 +488,28 @@ int main(int argc, char **argv) } if (signal(SIGINT, sig_int) == SIG_ERR) { - fprintf(stderr, "can't set signal handler: %s\n", strerror(errno)); + fprintf(stderr, "can't set signal handler: %s\n", + strerror(errno)); err = 1; goto cleanup; } - if (env.timestamp) - printf("%-8s ", "TIME(s)"); - printf("%-7s %-7s %-16s %-*s %-5s %-*s %-5s %-s\n", - "PID", "TID", "COMM", column_width, "LADDR", "LPORT", column_width, "RADDR", "RPORT", "MS"); + if (env.histogram) { + printf("Summarize TCP packet latency as a histogram. Hit Ctrl-C to end.\n"); + } else { + if (env.timestamp) + printf("%-8s ", "TIME(s)"); + printf("%-7s %-7s %-16s %-*s %-5s %-*s %-5s %-s\n", "PID", + "TID", "COMM", column_width, "LADDR", "LPORT", + column_width, "RADDR", "RPORT", "MS"); + } - while (!exiting) { - err = bpf_buffer__poll(buf, POLL_TIMEOUT_MS); - if (err < 0 && err != -EINTR) { - fprintf(stderr, "error polling ring/perf buffer: %s\n", strerror(-err)); + if (env.histogram) { + run_histogram_mode(obj); + } else { + err = run_event_mode(buf); + if (err) goto cleanup; - } - /* reset err to return 0 if exiting */ - err = 0; } cleanup: bpf_buffer__free(buf); diff --git a/libbpf-tools/tcppktlat.h b/libbpf-tools/tcppktlat.h index 00a93aab7536..44f31a5cb155 100644 --- a/libbpf-tools/tcppktlat.h +++ b/libbpf-tools/tcppktlat.h @@ -2,7 +2,9 @@ #ifndef __TCPPKGLAT_H #define __TCPPKGLAT_H -#define TASK_COMM_LEN 16 +#define TASK_COMM_LEN 16 +#define MAX_SLOTS 26 +#define MAX_ENTRIES 10240 struct event { __u32 saddr[4]; @@ -16,4 +18,9 @@ struct event { char comm[TASK_COMM_LEN]; }; +struct hist { + __u32 slots[MAX_SLOTS]; + char comm[TASK_COMM_LEN]; +}; + #endif /* __TCPPKGLAT_H_ */ diff --git a/libbpf-tools/tcppktlat_example.txt b/libbpf-tools/tcppktlat_example.txt index 07c0a0186f63..4a0c8b387c68 100644 --- a/libbpf-tools/tcppktlat_example.txt +++ b/libbpf-tools/tcppktlat_example.txt @@ -39,27 +39,118 @@ PID COMM LADDR LPORT RADDR RPORT MS ^C +The -H option can be used to show latency as a histogram, grouped by PID. + +# tcppktlat -H +Summarize TCP packet latency as a histogram. Hit Ctrl-C to end. +^C + +pid = 2333671 sshd-session + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 1 |* | + 4 -> 7 : 0 | | + 8 -> 15 : 10 |*********** | + 16 -> 31 : 15 |**************** | + 32 -> 63 : 36 |****************************************| + 64 -> 127 : 2 |** | + +pid = 1357503 verge-mihomo + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 2 |************* | + 16 -> 31 : 2 |************* | + 32 -> 63 : 1 |****** | + 64 -> 127 : 6 |****************************************| + 128 -> 255 : 1 |****** | +^C + + +The -H option can also take an interval argument to print histograms periodically. + +# tcppktlat -H 5 +Summarize TCP packet latency as a histogram. Hit Ctrl-C to end. + +pid = 1357503 verge-mihomo + usecs : count distribution + 0 -> 1 : 1 |*** | + 2 -> 3 : 1 |*** | + 4 -> 7 : 4 |************* | + 8 -> 15 : 4 |************* | + 16 -> 31 : 7 |*********************** | + 32 -> 63 : 12 |****************************************| + 64 -> 127 : 9 |****************************** | + 128 -> 255 : 1 |*** | + +pid = 1357503 verge-mihomo + usecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 2 |************* | + 16 -> 31 : 2 |************* | + 32 -> 63 : 1 |****** | + 64 -> 127 : 6 |****************************************| + 128 -> 255 : 1 |****** | +^C + + +The -p option can be combined with -H to show histogram for a specific PID only. + +# tcppktlat -p 1357503 -H +Summarize TCP packet latency as a histogram. Hit Ctrl-C to end. +^C + +pid = 1357503 verge-mihomo + usecs : count distribution + 0 -> 1 : 1 |*** | + 2 -> 3 : 1 |*** | + 4 -> 7 : 4 |************* | + 8 -> 15 : 4 |************* | + 16 -> 31 : 7 |*********************** | + 32 -> 63 : 12 |****************************************| + 64 -> 127 : 9 |****************************** | + 128 -> 255 : 1 |*** | +^C + + # tcppktlat --help Usage: tcppktlat [OPTION...] Trace latency between TCP received pkt and picked up by userspace thread. -USAGE: tcppktlat [--help] [-T] [-p PID] [-t TID] [-l LPORT] [-r RPORT] [-v] +USAGE: tcppktlat [--help] [-T] [-H] [-L] [-p PID] [-t TID] [-l LPORT] [-r RPORT] [-w] [-v] + [min_us | interval [count]] + +Positional args: + min_us Minimum latency filter (microseconds) when not using -H + interval [count] With -H, interval is the histogram print interval (seconds) + and count limits how many times histograms are printed EXAMPLES: tcppktlat # Trace all TCP packet picked up latency tcppktlat -T # summarize with timestamps + tcppktlat -H # show latency histogram + tcppktlat -H 5 # show latency histogram, print every 5 seconds + tcppktlat -H 1 5 # show latency histogram, print every 1 second, 5 times + tcppktlat -H -L # show latency histogram per thread tcppktlat -p # filter for pid tcppktlat -t # filter for tid tcppktlat -l # filter for local port tcppktlat -r # filter for remote port tcppktlat 1000 # filter for latency higher than 1000us + -H, --histogram Show latency histogram. Positional args + become interval/count + -L, --threads Print a histogram per thread ID -l, --lport=LPORT filter for local port -p, --pid=PID Process PID to trace -r, --rport=RPORT filter for remote port -t, --tid=TID Thread TID to trace -T, --timestamp include timestamp on output -v, --verbose Verbose debug output + -w, --wide Wide column output (fits IPv6 addresses) -?, --help Give this help list --usage Give a short usage message -V, --version Print program version