forked from jokkebk/fileson
-
Notifications
You must be signed in to change notification settings - Fork 1
/
fileson_backup.py
253 lines (224 loc) · 10.5 KB
/
fileson_backup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#!/usr/bin/env python3
from collections import defaultdict, namedtuple
from fileson import Fileson, gmt_str, gmt_epoch
from logdict import LogDict
from crypt import keygen as kg, AESFile, sha1, calc_etag
import argparse, os, sys, json, signal, time, hashlib, inspect, shutil, re
import boto3, threading
class BotoProgress(object):
def __init__(self, ptype):
self._seen = 0
self._last = 0
self._type = ptype
self._lock = threading.Lock()
def __call__(self, bytes_amount):
with self._lock:
self._seen += bytes_amount
if self._last + 2**20 > self._seen: return # every 1 MB
sys.stdout.write("\r%.2f MB %sed" % (self._seen / 2**20, self._type))
sys.stdout.flush()
self._last = self._seen
class S3Action(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
m = re.match('s3://(\w+)/(.+)', values)
if not m: raise ValueError('S3 address in format s3://bucket/objpath')
setattr(namespace, self.dest, (m.group(1), m.group(2)))
# Return key or if a filename, its contents
def key_or_file(key):
if isinstance(key, bytes): return key # passthrough
if os.path.exists(key):
with open(key, 'r') as f: key = ''.join(f.read().split())
return bytes.fromhex(key)
# These are the different argument types that can be added to a command
arg_adders = {
'password': lambda p: p.add_argument('password', type=str, nargs='?', help='Password', default=None),
'salt': lambda p: p.add_argument('salt', type=str, nargs='?', help='Salt', default=None),
'input': lambda p: p.add_argument('input', type=str, help='Input file'),
'output': lambda p: p.add_argument('output', type=str, help='Output file'),
's3path': lambda p: p.add_argument('s3path', type=str, action=S3Action,
help='S3 path in form s3://bucket/objpath'),
'deep_archive': lambda p: p.add_argument('-d', '--deep-archive', action='store_true',
help='Upload to S3 DEEP_ARCHIVE storage class'),
'in_obj': lambda p: p.add_argument('in_obj', type=str, help='Input file or S3 object name'),
'out_obj': lambda p: p.add_argument('out_obj', type=str, help='Output file or S3 object name'),
'key': lambda p: p.add_argument('key', type=str,
help='Key in hex format or filename of the keyfile'),
'keyfile': lambda p: p.add_argument('-k', '--keyfile', type=str,
help='Key in hex format or filename of the keyfile'),
'partsize': lambda p: p.add_argument('-p', '--partsize', type=int,
default=8, help='Multipart upload partsize (default 8 matching boto3)'),
'iterations': lambda p: p.add_argument('-i', '--iterations', type=str,
default='1M', help='PBKDF2 iterations (default 1M)'),
'dbfile': lambda p: p.add_argument('dbfile', type=str,
help='Database file (JSON format)'),
'logfile': lambda p: p.add_argument('logfile', type=str,
help='Logfile to append all operations to'),
'source': lambda p: p.add_argument('source', type=str,
help='Source directory'),
'destination': lambda p: p.add_argument('destination', type=str,
help='Destination directory'),
'dir': lambda p: p.add_argument('dir', nargs='?', type=str, default=None,
help='Directory to scan'),
'verbose': lambda p: p.add_argument('-v', '--verbose', action='count',
default=0, help='Print verbose status. Repeat for even more.'),
'force': lambda p: p.add_argument('-f', '--force', action='store_true',
help='Force action without additional prompts'),
}
logfiles = []
def close_logs():
while logfiles: logfiles.pop().close()
# Function per command
def keygen(args):
"""Create a 32 byte key for AES256 encryption with a password and salt."""
if not args.password:
if args.verbose: print('No password specified, generating random key')
print(os.urandom(32).hex())
return
if not args.salt:
print('Specify password AND salt or neither!')
return
iterations = int(args.iterations.replace('M', '000k').replace('k', '000'))
start = time.time()
keyhex = kg(args.password, args.salt, iterations).hex()
print(keyhex)
if args.verbose: print('Generating that took %.3f seconds' % (time.time()-start))
keygen.args = 'password salt iterations verbose'.split()
def cryptfile(infile, outfile, verbose=False):
startTime, bs = time.time(), 0
while True:
data = infile.read(65536)
if not data: break
outfile.write(data)
bs += len(data)
secs = time.time() - startTime
if verbose: print('%d b in %.1f s, %.2f GiB/s' % (bs, secs, bs/2**30/secs))
def encrypt(args):
if not args.force and os.path.exists(args.output) and not 'y' in \
input('Output exists! Do you wish to overwrite? [y/n] '): return
with AESFile(args.input, 'rb', key_or_file(args.key)) as fin:
with open(args.output, 'wb') as fout:
cryptfile(fin, fout, verbose=args.verbose)
encrypt.args = 'input output key verbose force'.split()
def decrypt(args):
if not args.force and os.path.exists(args.output) and not 'y' in \
input('Output exists! Do you wish to overwrite? [y/n] '): return
with open(args.input, 'rb') as fin:
with AESFile(args.output, 'wb', key_or_file(args.key)) as fout:
cryptfile(fin, fout, verbose=args.verbose)
decrypt.args = 'input output key verbose force'.split()
def etag(args):
with open(args.input, 'rb') as f: print(calc_etag(f, args.partsize))
etag.args = 'input partsize'.split()
def upload(args):
bucket, objpath = args.s3path
s3 = boto3.client('s3')
if args.keyfile: fp = AESFile(args.input, 'rb', key_or_file(args.keyfile))
else: fp = open(args.input, 'rb')
if args.verbose: print('Upload', args.input, 'to', bucket, objpath)
extra = {'Callback': BotoProgress('upload')}
if args.deep_archive: extra['ExtraArgs'] = {'StorageClass': 'DEEP_ARCHIVE'}
s3.upload_fileobj(fp, bucket, objpath, **extra)
fp.close()
upload.args = 'input s3path keyfile deep_archive verbose'.split()
def download(args):
bucket, objpath = args.s3path
s3 = boto3.client('s3')
if args.keyfile: fp = AESFile(args.output, 'wb', key_or_file(args.keyfile))
else: fp = open(args.output, 'wb')
if args.verbose: print('Download', bucket, objpath, 'to', args.output)
s3.download_fileobj(bucket, objpath, fp, Callback=BotoProgress('download'))
fp.close()
download.args = 's3path output keyfile verbose'.split()
def backup(args):
"""Perform backup based on latest Fileson DB state."""
fs = Fileson.load_or_scan(args.dbfile, checksum='sha1')
if fs.get(':checksum:', None) != 'sha1':
print('Backup only works with full SHA1 hash. Safety first.')
return
log = Fileson.load(args.logfile)
log.startLogging(args.logfile)
log[':backup:'] = log.get(':backup:', 0) + 1
log[':dbfile:'] = args.dbfile
log[':date_gmt:'] = gmt_str()
log[':destination:'] = args.destination
if args.keyfile:
key = key_or_file(args.keyfile)
log[':keyhash:'] = sha1(key).hex()
m = re.match('s3://(\w+)/(.+)', args.destination)
if m:
bucket, folder = m.group(1), m.group(2)
myargs = namedtuple('myargs', 'input s3path keyfile deep_archive verbose')
make_backup = lambda a,b: upload(myargs(a, (bucket, folder+'/'+b),
key if args.keyfile else None, args.deep_archive, True))
else:
if args.keyfile:
myargs = namedtuple('myargs', 'input output key verbose force')
make_backup = lambda a,b: encrypt(myargs(a,
os.path.join(args.destination, b), key, False, True))
else: make_backup = lambda a,b: shutil.copyfile(a,
os.path.join(args.destination, b))
uploaded = { log[p]['sha1']: p for p in log.files() }
seed = log[':date_gmt:'] # for backup filename generation
for p in fs.files():
o = fs[p]
if o['sha1'] in uploaded:
if args.verbose: print('Already uploaded', p)
continue
name = sha1(seed+o['sha1']).hex() # deterministic random name
print('Backup', p.split(os.sep)[-1], o['sha1'], 'to', name)
make_backup(os.path.join(fs[':directory:'], p), name)
log[name] = { 'sha1': o['sha1'], 'size': o['size'] }
log.endLogging()
backup.args = 'dbfile logfile destination keyfile deep_archive verbose'.split() # args to add
def restore(args):
"""Restore backup based on Fileson DB and backup log."""
fs = Fileson.load(args.dbfile)
if fs.get(':checksum:', None) != 'sha1':
print('Cannot restore without SHA1 hash.')
return
log = Fileson.load(args.logfile)
if args.keyfile:
key = key_or_file(args.keyfile)
myargs = namedtuple('myargs', 'input output key verbose force')
make_restore = lambda a,b: decrypt(myargs(a, b, key, False, True))
keyhash = sha1(key).hex()
if keyhash != log[':keyhash:']:
print(f'Provided key hash {keyhash} does not match backup file!')
return
else: make_restore = lambda a,b: shutil.copyfile(a, b)
uploaded = { log[p]['sha1']: p for p in log.files() }
for p in sorted(fs.dirs()):
fp = args.destination
if p != '.': fp = os.path.join(fp, p)
print('mkdir', fp)
os.makedirs(fp, exist_ok=True)
mtime = gmt_epoch(fs[p]['modified_gmt'])
os.utime(fp, (mtime, mtime))
for p in sorted(fs.files()):
b = uploaded.get(fs[p]['sha1'], None)
if not b:
print('Missing', p, fs[p])
continue
fp = os.path.join(args.destination, p)
bp = os.path.join(args.source, b)
print('get', fp, 'from', bp)
make_restore(bp, fp)
mtime = gmt_epoch(fs[p]['modified_gmt'])
os.utime(fp, (mtime, mtime))
restore.args = 'dbfile logfile source destination keyfile verbose'.split() # args to add
if __name__ == "__main__":
# register signal handler to close any open log files
signal.signal(signal.SIGINT, close_logs)
# create the top-level parser
parser = argparse.ArgumentParser(description='Fileson backup utilities')
subparsers = parser.add_subparsers(help='sub-command help')
# add commands using function metadata and properties
for name,cmd in inspect.getmembers(sys.modules[__name__]):
if inspect.isfunction(cmd) and hasattr(cmd, 'args'):
cmd.parser = subparsers.add_parser(cmd.__name__, description=cmd.__doc__)
for argname in cmd.args: arg_adders[argname](cmd.parser)
cmd.parser.set_defaults(func=cmd)
# parse the args and call whatever function was selected
args = parser.parse_args()
if len(sys.argv)==1: parser.print_help(sys.stderr)
else: args.func(args)