-
Notifications
You must be signed in to change notification settings - Fork 9
/
docker_dedup.sh
executable file
·90 lines (79 loc) · 3.88 KB
/
docker_dedup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/bin/bash
set -e
cd "$(dirname "$0")"
for cmd in find grep sed xargs; do
! which "$cmd" >/dev/null || continue
printf '\033[31m[ERROR] Missing command "%s".\033[0m\n' "$cmd" >&2
exit 1
done
# Be extremely cautious with this dangerous script.
if [ "_$(uname -s)" != '_Linux' ]; then
printf '\033[31m[ERROR] Unexpected OS "%s".\033[0m\n' "$(uname -s)" >&2
exit 1
fi
if ! ./inside_container.sh; then
printf '\033[31m[ERROR] Only run within container or it will damage the system.\033[0m\n' >&2
exit 1
fi
# Do not dedup empty/tiny files, as they are more likely to be false positives.
[ "$min_size" ] || min_size=4096
chksum_dir="$(mktemp -dt 'inode-dedup-XXXXXXXX')"
trap "trap - SIGTERM; $(sed 's/^\(..*\)$/rm \-rf "\1"/' <<< "$chksum_dir"); kill -- -'$$'" SIGINT SIGTERM EXIT
# Hash metadata+data, permissively, and report inexact matches as suggestions.
# Record as symlink if never seen, or hard link to target of the symlink.
cat "$@" \
| sort -u \
| $(which parallel >/dev/null 2>&1 && echo "parallel -j$(nproc) -kmq" || echo 'xargs -rI{}') \
find {} -type f \
| sort -u \
| $(which parallel >/dev/null 2>&1 && echo "parallel -j$(nproc) -kq" || echo 'xargs -rI{}') \
bash -c "$(printf '%s' '
set -e;
src='"'"'{}'"'"';
chksum_dir='"'$chksum_dir'"';
min_size='"'$min_size'"';
if [ "$(stat -c "%s" "$src")" -ge "$min_size" ]; then
printf '"'"'%s/%s\v%s\n'"'"'
"$chksum_dir"
"$(stat -c "%D" "$src"
| cat - "$src"
| sha512sum
| cut -d" " -f1
)"
"$src";
fi;
' \
| sed 's/^[[:space:]]*//' \
| grep '.' \
| paste -sd' ' -
)" \
| grep '.' \
| sed -n 's/\(.*\)'"$(printf '\v')"'\(.*\)/'"$(
printf '%s' '
chksum='"'"'\1'"'"';
src='"'"'\2'"'"';
if [ -e "$chksum" ]; then
if [ "_$(stat -Lc "%D %u:%g %04a" "$chksum")" != "_$(stat -Lc "%D %u:%g %04a" "$src")" ]; then
printf '"'"'\033[33m[WARNING] Dedup of "%s" skipped due to mismatched permission.\033[0m\n'"'"' "$src";
stat "$(realpath -e "$chksum")" "$src"
| sed "s/^/$(printf "\033[33m[WARNING] ")/"
| sed "s/\$/$(printf "\033[0m")/";
elif [ "_$(stat -Lc "%i" "$chksum")" != "_$(stat -Lc "%i" "$src")" ]; then
printf '"'"'\033[36m[INFO] Dedup file "%s" -> "%s".\033[0m\n'"'"' "$src" "$(realpath -e "$chksum")";
ln -Lfn "$chksum" "$src";
fi;
else
ln -fns "$src" "$chksum";
fi;
' \
| sed 's/\([]\[\\\/\&\|\^\$\.\-]\)/\\\1/g' \
| sed 's/\\\(\\[1-9]\)/\1/g' \
| sed 's/^[[:space:]]*//' \
| grep '.' \
| paste -sd' ' -
)"'/p' \
| cat <(printf 'set -e\n') - \
| bash
rm -rf "$chksum_dir"
trap - SIGTERM SIGINT EXIT
printf '\033[32m[INFO] File dedup completed.\033[0m\n' >&2