diff --git a/README.md b/README.md index 6fc2abb1e2a2..f75df5255502 100644 --- a/README.md +++ b/README.md @@ -187,6 +187,7 @@ pair of .c and .py files, and some are directories of files. - tools/[biotop](tools/biotop.py): Top for disks: Summarize block device I/O by process. [Examples](tools/biotop_example.txt). - tools/[biopattern](tools/biopattern.py): Identify random/sequential disk access patterns. [Examples](tools/biopattern_example.txt). - tools/[biosnoop](tools/biosnoop.py): Trace block device I/O with PID and latency. [Examples](tools/biosnoop_example.txt). +- tools/[biohint](tools/biohint.py): Summarize block device I/O latency as a histogram. [Examples](tools/biohint_example.txt). - tools/[dirtop](tools/dirtop.py): File reads and writes by directory. Top for directories. [Examples](tools/dirtop_example.txt). - tools/[filelife](tools/filelife.py): Trace the lifespan of short-lived files. [Examples](tools/filelife_example.txt). - tools/[filegone](tools/filegone.py): Trace why file gone (deleted or renamed). [Examples](tools/filegone_example.txt). diff --git a/man/man8/biohint.8 b/man/man8/biohint.8 new file mode 100644 index 000000000000..a6db2b076ee6 --- /dev/null +++ b/man/man8/biohint.8 @@ -0,0 +1,81 @@ +.TH biohint 8 "2025-11-26" "USER COMMANDS" +.SH NAME +biohint \- Summarize write hint in block of FDP SSDs. +.SH SYNOPSIS +.B biohint [\-h] [\-t] [\-s] [\-d ] [interval [count]] +.SH DESCRIPTION +biohint is used to monitor the bi_write_hint field in each block I/O, which +is only relevant for FDP SSDs. It prints ratio of every hint usage for each +disk or the specified disk either on Ctrl-C, or after a given interval in +seconds. + +This works by tracing kernel tracepoint block:block_bio_queue. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH OPTIONS +\-h +Print usage message. +.TP +\-t +Include timestamps on output. +.TP +\-s +Output histograms of every device separately +.TP +\-d +Trace this disk only. +.TP +interval +Output interval, in seconds. +.TP +count +Number of outputs. +.SH EXAMPLES +.TP +Summarize write hint in block of FDP SSDs +# +.B biohint +.TP +Print 1 second summaries, 10 times, and include timestamps on output: +# +.B biohint -T 1 5 +.TP +Print histograms of every device separately: +# +.B biohint -s +.TP +Trace disk mounted on /mnt/fdp only: +# +.B biohint -d /mnt/fdp +.SH FIELDS +.TP +hint +hint can be choose to place data +.TP +count +How many I/O fell into this range +.TP +distribution +An ASCII bar chart to visualize the distribution (count column) +.SH OVERHEAD +This traces kernel functions and maintains in-kernel timestamps and a histgroam, +which are asynchronously copied to user-space. This method is very efficient, +and the overhead for most storage I/O rates (< 10k IOPS) should be negligible. +If you have a higher IOPS storage environment, test and quantify the overhead +before use. +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Samsung Electronics Co., Ltd. +.SH SEE ALSO diff --git a/tools/biohint.py b/tools/biohint.py new file mode 100644 index 000000000000..fdf78ec69639 --- /dev/null +++ b/tools/biohint.py @@ -0,0 +1,246 @@ +# Copyright (c) 2025 Samsung Electronics Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License") +from __future__ import print_function +from bcc import BPF +from time import sleep, strftime +from pathlib import Path +from threading import Event +import argparse +import json +import sys +import os +import signal + +disklookup = {} +mounts = "/proc/mounts" +epilog = """examples: + ./biohint # Summarize block I/O hint in histogram. + ./biohint 1 10 # Print 10 reports at 1 second intervals. + ./biohint -t 1 # Print summary with timestamp at 1 second intervals. + ./biohint -s # Show histograms of every device separately. + ./biohint -d /mnt/data # Trace the device which has been mounted on '/mnt/data'. +""" +hints = """ + 0: NOT_SET + 1: NONE + 2: SHORT + 3: MEDIUM + 4: LONG + 5: EXTREME +""" +bpf_text = """ +#include +#include +#include +#include + +typedef struct disk_key { + u64 dev; + u64 slot; +} disk_key_t; + +STORAGE + +RAW_TRACEPOINT_PROBE(block_bio_queue) +{ + struct bio *b = (void *)ctx->args[0]; + unsigned int flags = b->bi_opf; + unsigned int flag = flags & REQ_OP_MASK; + dev_t dev = b->bi_bdev->bd_dev; + HINT_GET + + DISK_FILTER + + if(flag | REQ_OP_WRITE){ + STORE + } + return 0; +} +""" +class EqualSignHelpFormatter(argparse.RawDescriptionHelpFormatter): + def _format_action_invocation(self, action): + if not action.option_strings: + #positional arguments + metavar = self._metavar_formatter(action, action.dest)(1) + if isinstance(metavar, tuple): + metavar = ' '.join(metavar) + return metavar + else: + #optional arguments + parts = [] + for option_string in action.option_strings: + if option_string == "--dev": + parts.append(f"{option_string}={action.metavar}") + elif option_string == "-d": + parts.append(f"{option_string} {action.metavar}") + else: + parts.append(f"{option_string}") + return ", ".join(parts) + def _format_action(self, action): + help_text = action.help + option_str = self._format_action_invocation(action) + + if help_text == "Trace the device which has been mounted on specific directory": + return f" {option_str}\t\t{help_text}\n" + elif help_text == "Print histograms of every device separately": + return f" {option_str}\t\t{help_text}\n" + else: + return f" {option_str}\t\t\t{help_text}\n" + +def args_config(): + parser = argparse.ArgumentParser( + description = "Summarize write hint in block of FDP SSDs", + formatter_class = EqualSignHelpFormatter, + epilog = epilog) + parser.add_argument("-t", "--ts", action = "store_true", + help = "Print histogram with timestamp") + parser.add_argument("-s", "--devices", action = "store_true", + help = "Print histograms of every device separately") + parser.add_argument("interval", nargs = "?", default = 99999999, type = int, + help = "Specify the amount of time in seconds between each report") + parser.add_argument("count", nargs = "?", default = 99999999, type = int, + help = "Limit the number of report, the default is ....") + parser.add_argument("-d", "--dir", type = str, metavar = '', + help = "Trace the device which has been mounted on specific directory") + parser._optionals._actions[0].help = "Show this help" + args = parser.parse_args() + return args + +def bpf_text_config(args, is_hint): + global bpf_text + global disklookup + if(args.dir): + args.dir = str(Path(args.dir).resolve()) + storage_str = "" + store_str = "" + disk_filter_str = "" + if args.devices: + storage_str += "BPF_HISTOGRAM(dist, disk_key_t);" + store_str += """ + disk_key_t dkey = {}; + dkey.dev = dev; + dkey.slot = hint; + dist.atomic_increment(dkey); + """ + else: + storage_str += "BPF_HISTOGRAM(dist);" + store_str += "dist.atomic_increment(hint);" + + if args.dir is not None: + if args.dir not in disklookup: + print("erro: invalid mount point!") + return False + disk_path = disklookup[args.dir] + if not os.path.exists(disk_path): + print("no such dev '%s'" % args.dev) + exit(1) + + stat_info = os.stat(disk_path) + dev = os.major(stat_info.st_rdev) << 20 | os.minor(stat_info.st_rdev) + + disk_filter_str += """ + if(dev != %s) { + return 0; + } + """ % (dev) + bpf_text = bpf_text.replace("STORAGE", storage_str) + bpf_text = bpf_text.replace("STORE", store_str) + bpf_text = bpf_text.replace("DISK_FILTER", disk_filter_str) + + if is_hint == True: + bpf_text = bpf_text.replace("HINT_GET", "u32 hint = b->bi_write_hint;") + else: + bpf_text = bpf_text.replace("HINT_GET", "return 0;") + + return True + +def disk_print(d): + major = d >> 20 + minor = d & ((1 << 20) - 1) + + disk = str(major) + "," + str(minor) + if disk in disklookup: + diskname = disklookup[disk] + else: + diskname = "?" + + return diskname + +def disk_look(): + with open(mounts) as stats: + for line in stats: + a = line.split() + disklookup[a[1]] = a[0] + return disklookup + +def print_linear_hist(dic_hint, max): + dic_hint = dict(sorted(dic_hint.items(), key = lambda x: x[0])) + print("{:>10}:".format("hint"), "{:<10}".format("count"), " {:<40}".format("distribution")) + for key, value in dic_hint.items(): + num = int(value / max * 40) + print("{:>10}:".format(key), "{:<10}".format(value), "|{:<40}|".format("*"*num)) + +def main(): + args = args_config() + global hints + global disklookup + print("the program is being configured!") + print(hints) + + disklookup = disk_look() + is_hint = BPF.kernel_struct_has_field(b'bio', b'bi_write_hint') + if bpf_text_config(args, is_hint) == False: + return + b = BPF(text = bpf_text) + + countdown = int(args.count) + exiting = 0 if args.interval else 1 + dist = b.get_table("dist") + print("configure complete! the program is running!") + + while True: + try: + sleep(int(args.interval)) + except KeyboardInterrupt: + exiting = 1 + dic_hint = {} + dic_hint_max = {} + hint_max = 0 + if args.ts: + print("%-8s\n" % strftime("%H:%M:%S"), end = "") + + for key, value in dist.items(): + cnt = value.value + if args.devices: + dev = key.dev + hint = key.slot + disk_name = disk_print(dev) + if disk_name in dic_hint: + dic_hint[disk_name][hint] = cnt + dic_hint_max[disk_name] = max(dic_hint_max[disk_name], cnt) + else: + dic_temp = {} + dic_temp[hint] = cnt + dic_hint[disk_name] = dic_temp + dic_hint_max[disk_name] = cnt + else: + hint = key.value + if cnt == 0: + continue + dic_hint[hint] = cnt + hint_max = max(hint_max, cnt) + + if args.devices: + for key, value in dic_hint.items(): + print("disk = ", key) + print_linear_hist(value, dic_hint_max[key]) + else: + print_linear_hist(dic_hint, hint_max) + print() + print() + countdown -= 1 + if exiting or countdown == 0: + exit(0) + +if __name__ == '__main__': + main() diff --git a/tools/biohint_example.txt b/tools/biohint_example.txt new file mode 100644 index 000000000000..b945e2ee45ca --- /dev/null +++ b/tools/biohint_example.txt @@ -0,0 +1,160 @@ +Demonstrations of biohint, the Linux eBPF/bcc version. + + +biohint traces block device I/O with FDP feature, and records the distribution +of hint usage. +For example: + +#python3 biohint.py +the program is being configured! + + 0: NOT_SET + 1: NONE + 2: SHORT + 3: MEDIUM + 4: LONG + 5: EXTREME + +configure complete! the program is running! +^C hint : count distribution + 2 : 2366712 |*************************************** | + 3 : 2368218 |****************************************| + 4 : 2365956 |*************************************** | + 5 : 1673268 |**************************** | + +The integer from 0 to 5 of hint represent the NOT_SET, NONE, SHORT, MEDIUM, +LONG, EXTREME as shown above. + +This example output shows the hint count distribution. You can find the definition +about the hint in include/linux/rw_hint.h + +In the following example, the option -t is used to print timestamps with the +output, and to print 1 second summaries 5 times: + +#python3 biohint.py -t 1 5 +the program is being configured! + + 0: NOT_SET + 1: NONE + 2: SHORT + 3: MEDIUM + 4: LONG + 5: EXTREME + +configure complete! the program is running! +15:14:04 + hint : count distribution + 2 : 112977 |*************************************** | + 3 : 115540 |****************************************| + 4 : 114868 |*************************************** | + 5 : 115111 |*************************************** | +15:14:05 + hint : count distribution + 2 : 114007 |*************************************** | + 3 : 114716 |*************************************** | + 4 : 114291 |*************************************** | + 5 : 115320 |****************************************| +15:14:06 + hint : count distribution + 2 : 114235 |*************************************** | + 3 : 115257 |*************************************** | + 4 : 114750 |*************************************** | + 5 : 115523 |****************************************| +15:14:07 + hint : count distribution + 2 : 114082 |*************************************** | + 3 : 115068 |*************************************** | + 4 : 114739 |*************************************** | + 5 : 115638 |****************************************| +15:14:08 + hint : count distribution + 2 : 114122 |*************************************** | + 3 : 115307 |*************************************** | + 4 : 114738 |*************************************** | + 5 : 115752 |****************************************| + +How the hint count distribution changes over time can be seen. + +The -s option will print a histogram per disk. Eg: + +#python3 biohint.py -s +the program is being configured! + + 0: NOT_SET + 1: NONE + 2: SHORT + 3: MEDIUM + 4: LONG + 5: EXTREME + +configure complete! the program is running! +^C +disk = nvme0n1 + hint : count distribution + 0 : 10 | | + 2 : 1557181 |*************************************** | + 3 : 1562215 |****************************************| + 4 : 1547968 |*************************************** | + 5 : 1560136 |*************************************** | + +disk = sda3 + hint : count distribution + 0 : 16 | | + 1 : 921 | | + 2 : 285812 |****************************************| + 3 : 285691 |*************************************** | + 4 : 285417 |*************************************** | + 5 : 285790 |*************************************** | + +disk = dm-0 + hint : count distribution + 0 : 13 | | + 1 : 921 | | + 2 : 285812 |****************************************| + 3 : 285691 |*************************************** | + 4 : 285417 |*************************************** | + 5 : 285790 |*************************************** | + + +The -d option trace the disk which is mounted on the followed dir +For example: +#python3 biohint.py -d /mnt/fdp +the program is being configured! + + 0: NOT_SET + 1: NONE + 2: SHORT + 3: MEDIUM + 4: LONG + 5: EXTREME + +configure complete! the program is running! +^C hint : count distribution + 0 : 308 | | + 2 : 684855 |*************************************** | + 3 : 682569 |*************************************** | + 4 : 689318 |*************************************** | + 5 : 690660 |****************************************| + + +#python3 biohint.py -h +usage: biohint.py [-h] [-t] [-s] [-d ] [interval] [count] + +Summarize write hint in block of FDP SSDs + +positional arguments: + interval Specify the amount of time in seconds between each report + count Limit the number of report, the default is .... + +options: + -h, --help Show this help + -t, --ts Print histogram with timestamp + -s, --devices Print histograms of every device separately + -d , --dir Trace the device which has been mounted on specific directory + +examples: + ./biohint # Summarize block I/O hint in histogram. + ./biohint 1 10 # Print 10 reports at 1 second intervals. + ./biohint -t 1 # Print summary with timestamp at 1 second intervals. + ./biohint -s # Show histograms of every device separately. + ./biohint -d /mnt/data # Trace the device which has been mounted on '/mnt/data'.