-
Notifications
You must be signed in to change notification settings - Fork 1
/
dir_checksum.sh
executable file
·220 lines (183 loc) · 5.78 KB
/
dir_checksum.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#!/bin/bash
#
# Directory checksum script
#
# It computes md5sum of all files in one directory
# recursively and saved to a checksum file under it.
# Subsequent execution on that directory will compare
# last checksum with current one and report missed/
# added/modified files.
#
# =======================================
#
# Author: Mark Kuo ([email protected])
# Date: 2013.3.20
#
#
#
# Original Requirements:
#
# 可以把所有的hash data都放在一個文字檔案裡面
# 在檔案名稱永遠相同的前提之下
# 程式提式的訊息基本上只需要
# 1 match
# 2 modified
# 3 disappeared
#
#
# Dependent commands:
# md5sum/md5, diff, comm, xargs, find, sort
# grep, sed, rm, mv, wc, cut, uniq
#
# Optional commands:
# pv
#
# Notes:
# only tested under Ubuntu 12.04
#
# GLOBAL CONFIG
# ==============
# checksum filename (old checksum will have same filename with .old suffix)
CHECKSUM_NAME=".dir_checksum"
# number of parallel process
PARALLEL_COUNT=2
# determine OS
PLATFORM=`uname -s` #'Darwin' for mac, 'Linux' for linux
# md5 program
MD5SUM="md5sum"
# sort program
SORT="sort"
CUT_FIELD=3
# platform specific
if [[ $PLATFORM == 'Linux' ]]; then
PARALLEL_COUNT=`grep -c ^processor /proc/cpuinfo`
MD5SUM="md5sum"
SORT="sort --parallel=$PARALLEL_COUNT"
elif [[ $PLATFORM == 'Darwin' ]]; then
PARALLEL_COUNT=`sysctl hw.ncpu | cut -d: -f2`
MD5SUM="md5 -r"
CUT_FIELD=2
fi
# === create checksum ===
# $1 target dir
# $2 target checksum filename
function create_checksum()
{
local path=$1
local checksum=$2
echo "Count files..."
local count=`find -L "$path" ! -name $CHECKSUM_NAME ! -name $CHECKSUM_NAME.old \
-type f | wc -l`
echo "$count files found"
# check pv existence
local PV_CMD="pv -cN MD5SUM --line-mode -s $count"
if ! type pv > /dev/null 2>&1; then
echo "'pv' not installed. Progress bar disabled"
local PV_CMD="cat" #bypassing
fi
echo "Computing checksum..."
# the long pipeline of 'find | xargs md5sum | pv | sort'
find -L "$path" ! -name $CHECKSUM_NAME ! -name $CHECKSUM_NAME.old ! -name .DS_Store \
-type f -print0 | #find every file under $path (follow symbolic links)
xargs -0 -n 1 -P $PARALLEL_COUNT $MD5SUM | #create md5sum in parallel
$PV_CMD | #showing nice progress bar using pv
$SORT -k 2 | #should sort or diff will fail badly
sed '' > "$checksum" #save to checksume file only
#tee "$checksum" #save to checksume file and output to screen
if [ $? -eq 0 ]; then
echo "Done. Checksum file written to $checksum"
else
echo "Checksum creation failed. Exiting.."
exit 1
fi
echo
}
# === compare checksum ===
# $1 target dir
# $2 old checksum file
# $3 new checksum file
function compare_checksum()
{
# diff filename
local DIFF_NAME="${CHECKSUM_NAME}.diff"
local path=$1
local old=$2
local new=$3
#echo "comparing $old and $new..."
diff --suppress-common-lines --unified=0 "$old" "$new" | #diff
sed '/^@/d;/^---/d;/^+++/d' > "$path/$DIFF_NAME" #remove other info
if [ $? -ne 0 ]; then
echo "Error running diff. Exiting.."
exit 1
fi
# example output here:
# -0dea76f1d4581b591409bffe8fe6f722 ../tmp/test_enum/main.c
# +330a71bf82c38415860d19490cec2648 ../tmp/test_enum/main.c
# -d41d8cd98f00b204e9800998ecf8427e ../tmp/test_enum/test1
# +d41d8cd98f00b204e9800998ecf8427e ../tmp/test_enum/test3
# example result:
# modified: main.c
# missed: test1
# added: test3
changes=`cut -d' ' -f$CUT_FIELD- "$path/$DIFF_NAME" | $SORT | uniq | wc -l`
# grep - and + respectively into 2 sets (miss and new)
sed -n '/^-/p' "$path/$DIFF_NAME" | cut -d' ' -f$CUT_FIELD- | $SORT > "$path/$DIFF_NAME.miss"
sed -n '/^+/p' "$path/$DIFF_NAME" | cut -d' ' -f$CUT_FIELD- | $SORT > "$path/$DIFF_NAME.new"
echo "=== Report ==="
echo "File changed: $changes"
echo
echo "Modified:" # the intersection
comm -12 "$path/$DIFF_NAME.miss" "$path/$DIFF_NAME.new" | sed '/^$/d'
echo "--------------"
echo "Missed:" #in miss but not in new
comm -2 "$path/$DIFF_NAME.miss" "$path/$DIFF_NAME.new" | cut -f 1 | sed '/^$/d'
echo "--------------"
echo "Added:" #in new but not in miss
comm -2 "$path/$DIFF_NAME.new" "$path/$DIFF_NAME.miss" | cut -f 1 | sed '/^$/d'
echo "--------------"
# clean up tmp files
rm "$path/$DIFF_NAME"*
}
# === usage ===
function usage()
{
local E_BADARGS=65
echo "Usage: $0 [directory]"
echo " directory: the directory to check (default: current directory)"
exit $E_BADARGS
}
# === main ===
# check arguments
if [ $# -gt 1 ]; then
echo "Wrong arguments"
usage
fi
# default: current working directory
dir=${1:-`pwd`}
if [ ! -e "$dir" ]; then
echo "$1 doesn't exist or is not a directory. Exiting.."
exit 1
fi
echo "Platform: $PLATFORM"
echo "Target directory: $dir"
echo "Parallel process: $PARALLEL_COUNT"
# check if checksum already exist
checksum_path="$dir/$CHECKSUM_NAME"
if [ -e "$checksum_path" ]; then
echo "Old checksum exists. Renamed: $checksum_path.old"
mv "$checksum_path" "$checksum_path.old"
fi
# create_checksum
create_checksum "$dir" "$checksum_path"
# see if we need to compare
if [ -e "$checksum_path.old" ]; then
compare_checksum "$dir" "$checksum_path.old" "$checksum_path"
# keep old copy for reference?
#rm $checksum_path.old
fi
# progress example: (may work only in linux)
#echo -ne '##### (33%)\r'; sleep 1
#echo -ne '############# (66%)\r'; sleep 1
#echo -ne '####################### (100%)\r'
#echo -ne '\n'
exit