-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdedupe.py
More file actions
52 lines (42 loc) · 1.67 KB
/
dedupe.py
File metadata and controls
52 lines (42 loc) · 1.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
import pickle
import shutil
# LICENSE: WTFPL http://sam.zoy.org/wtfpl/
# DESCRIPTION
# dedupe.py lets you dedupe files (usually images) that are exactly identical.
# I wrote this script to dedupe my Whatsapp images (since i send the same pictures
# to many people in whatsapp, it duplicates the images). Running this script can
# delete the duplicates!
#### configure the following parameters ####
# path to adb command in the android sdk
adb = "/Users/vigneshv/Downloads/android-sdk-macosx/platform-tools/adb"
# temporary directory to store pulled files
tmpdir = "/tmp/whatsapp_pics"
# path to image files on the phone
imgdir = "/sdcard/Whatsapp/Media/Whatsapp\ Images"
# command used for checksumming
checksumcmd = "shasum5.12"
#### end of configuration parameters ####
# create temp directory
os.makedirs(tmpdir)
# get files list
files = [a.strip() for a in os.popen("%(adb)s shell ls %(imgdir)s" % locals()).read().strip().split("\n")]
# pull all files and compute checksum
print "Pulling files!"
checksums = {}
for file in files:
print "Pulling file %(file)s" % locals()
os.system("%(adb)s pull %(imgdir)s/%(file)s %(tmpdir)s/%(file)s" % locals())
checksum = os.popen("%(checksumcmd)s %(tmpdir)s/%(file)s" % locals()).read().strip().split(' ')[0]
if checksums.has_key(checksum):
checksums[checksum].append(file)
else:
checksums[checksum] = [file]
# remove duplicate files having same checksum leaving only one copy
for files in checksums:
for file in checksums[files][1:]:
print "removing %(file)s" % locals()
os.system("%(adb)s shell rm %(imgdir)s/%(file)s" % locals())
# remove temp directory
shutil.rmtree(tmpdir)
print "done!"