-
Notifications
You must be signed in to change notification settings - Fork 0
/
qdirstat-generate-cache.py
executable file
·141 lines (116 loc) · 4.41 KB
/
qdirstat-generate-cache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python3
import gzip
import argparse
from os import stat_result
from stat import S_ISREG, S_ISLNK, S_ISDIR, S_ISBLK, S_ISCHR, S_ISFIFO, S_ISSOCK
from typing import TextIO
from pathlib import Path
from urllib.parse import quote
DESCRIPTION = """
QDirStat can read information about disk usage from cache files instead of
looking at a live file system. This allows you to easily browse disk usage
on servers and other systems where QDirStat and a GUI environment cannot be
installed on directly. This script generates cache files which you can open
in QDirStat -> "File" -> "Read Cache File...".
This is a Python rewrite of a similar Perl script, qdirstat-cache-writer.
It depends only on a standard Python 3.6+ installation, available on most
modern Linux distributions out-of-the-box, no extra deps needed. It also
does gzip compression on-the-fly instead of compressing the final file,
but can't generate the long format.
"""
CACHE_HEADER = """
[qdirstat 2.0 cache file]
# Automatically generated by qdirstat-generate-cache.py - do not edit
# type path size uid gid mode mtime <optional fields>
"""
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument(
"path",
help="Path from which to start collecting disk usage data.",
)
parser.add_argument(
"--cross-mounts",
action="store_true",
help="Collect data about files under other mountpoints.",
)
parser.add_argument(
"-o",
"--output",
default="qdirstat.cache.gz",
help="Name of the file to write the cache to.",
)
return parser.parse_args()
def get_prefix_from_mode(mode: int) -> str:
if S_ISREG(mode):
return "F"
elif S_ISLNK(mode):
return "L"
elif S_ISDIR(mode):
return "D"
elif S_ISBLK(mode):
return "BlockDev"
elif S_ISCHR(mode):
return "CharDev"
elif S_ISFIFO(mode):
return "FIFO"
elif S_ISSOCK(mode):
return "Socket"
else:
return "# UNKNOWN!"
def generate_path_info(path: str, stat: stat_result) -> str:
"""Generate a single text info line on a specific entry."""
blocks, links = "", ""
if stat.st_blocks > 0 and stat.st_blocks * 512 < stat.st_size:
blocks = "\tblocks:" + str(stat.st_blocks)
if stat.st_nlink > 1:
links = "\tlinks:" + str(stat.st_nlink)
# Long time no see, %-format! This is ~10% faster when CPU-bound
# than str.format/f-strings and works on all Python 3 versions.
return "%s\t%s\t%d\t%d\t%d\t%04o\t%d%s%s\n" % (
get_prefix_from_mode(stat.st_mode),
quote(path),
stat.st_size,
stat.st_uid,
stat.st_gid,
stat.st_mode & 0o7777,
int(stat.st_mtime),
blocks,
links,
)
def process_dir(dir: Path, output: TextIO, cross_mounts: bool) -> None:
"""Recursively generate the cache for a given directory."""
dirs = []
try:
for child in dir.iterdir():
try:
# Why not pathlib instead? It does not always cache
# the lstat call and makes the program ~25% slower.
stat = child.lstat()
except: # noqa
output.write("# lstat failed: " + str(child.absolute()))
continue
if S_ISDIR(stat.st_mode) and not S_ISLNK(stat.st_mode):
dirs.append((child, stat)) # Handle dirs only after all the files
else:
output.write(generate_path_info(child.name, stat))
except: # noqa
output.write("# iterdir failed: " + str(dir.resolve()))
for subdir, stat in dirs:
output.write("\n" + generate_path_info(str(subdir.absolute()), stat))
if subdir.is_mount() and not cross_mounts:
output.write("# Not crossing mountpoint: " + str(subdir.resolve()))
else:
process_dir(subdir, output, cross_mounts)
def process_tree(output_filename: str, path: str, cross_mounts: bool) -> None:
if output_filename.endswith(".gz"):
output = gzip.open(output_filename, "wt")
else:
output = open(output_filename, "wt")
output.write(CACHE_HEADER.strip())
root = Path(path).resolve()
output.write("\n" + generate_path_info(str(root), root.lstat()))
process_dir(root, output, cross_mounts)
if __name__ == "__main__":
args = parse_args()
process_tree(args.output, args.path, args.cross_mounts)