-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdup_del.py
More file actions
27 lines (25 loc) · 933 Bytes
/
dup_del.py
File metadata and controls
27 lines (25 loc) · 933 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import sys
import os
import hashlib
def chunk_reader(fobj, chunk_size=1024):
while True:
chunk = fobj.read(chunk_size)
if not chunk:
return
yield chunk
def check_for_duplicates(paths, hash=hashlib.sha1):
hashes = {}
for path in paths:
for dirpath, dirnames, filenames in os.walk(path):
for filename in filenames:
full_path = os.path.join(dirpath, filename)
hashobj = hash()
for chunk in chunk_reader(open(full_path, 'rb')):
hashobj.update(chunk)
file_id = (hashobj.digest(), os.path.getsize(full_path))
duplicate = hashes.get(file_id, None)
if duplicate:
print ("Duplicate found: %s and %s" % (full_path, duplicate))
os.remove(full_path)
else:
hashes[file_id] = full_path