-
Notifications
You must be signed in to change notification settings - Fork 149
/
Copy pathhindsight.py
executable file
·338 lines (278 loc) · 15.3 KB
/
hindsight.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
#!/usr/bin/env python3
"""Hindsight - Internet history forensics for Google Chrome/Chromium.
This script parses the files in the Chrome data folder, runs various plugins
against the data, and then outputs the results in a spreadsheet.
"""
import argparse
import datetime
import importlib
import logging
import os
import re
import shutil
import sys
import time
import pyhindsight
import pyhindsight.plugins
from pyhindsight.analysis import AnalysisSession
from pyhindsight.utils import banner, format_meta_output, format_plugin_output
# Try to import module for timezone support
try:
import pytz
except ImportError:
print(f'Could not import module \'pytz\'; all timestamps in XLSX output '
f'will be in examiner local time ({time.tzname[time.daylight]}).')
def parse_arguments(analysis_session):
description = f'''
Hindsight v{pyhindsight.__version__} - Internet history forensics for Google Chrome/Chromium.
This script parses the files in the Chrome/Chromium/Brave data folder, runs various plugins
against the data, and then outputs the results in a spreadsheet. '''
epi = r'''
Example: C:\hindsight.py -i "C:\Users\Ryan\AppData\Local\Google\Chrome\User Data\Default" -o test_case
The Chrome Profile folder default locations are:
Vista - 11: <userdir>\AppData\Local\Google\Chrome\\User Data\Default\
WinXP: <userdir>\Local Settings\Application Data\Google\Chrome
\User Data\Default\
Linux: <userdir>/.config/google-chrome/Default/
MacOS: <userdir>/Library/Application Support/Google/Chrome/Default/
iOS: \Applications\com.google.chrome.ios\Library\Application Support
\Google\Chrome\Default\
Chromium OS: \home\user\<GUID>\
'''
class MyParser(argparse.ArgumentParser):
def error(self, message):
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
parser = MyParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=description,
epilog=epi)
parser.add_argument('-i', '--input', required=True,
help='Path to the Chrome(ium) profile directory (typically "Default"). If a higher-level '
'directory is specified instead, Hindsight will recursively search for profiles.', )
parser.add_argument('-o', '--output', help='Name of the output file (without extension)')
parser.add_argument('-b', '--browser_type', help='Type of input files', default='Chrome',
choices=['Chrome', 'Brave'])
parser.add_argument('-f', '--format', choices=analysis_session.available_output_formats,
default=analysis_session.available_output_formats[-1], help='Output format')
parser.add_argument('-l', '--log', help='Location Hindsight should log to (will append if exists)',
default=os.path.join(os.getcwd(), 'hindsight.log'))
parser.add_argument('-t', '--timezone', help='Display timezone for the timestamps in XLSX output', default='UTC')
parser.add_argument('-d', '--decrypt', choices=['mac', 'linux'], default=None,
help='Try to decrypt Chrome data from a Linux or Mac system; support for both is currently '
'buggy and enabling this may cause problems. Only use "--decrypt linux" on data from a '
'Linux system, and only use "--decrypt mac" when running Hindsight on the same Mac the '
'Chrome data is from.')
parser.add_argument('-c', '--cache',
help='Path to the cache directory; only needed if the directory is outside the given "input" '
'directory. Mac systems are set up this way by default. On a Mac, the default cache '
'directory location for Chrome is <userdir>/Library/Caches/Google/Chrome/Default/Cache/')
parser.add_argument('--nocopy', '--no_copy', help='Don\'t copy files before opening them; this might run faster, '
'but some locked files may be inaccessible', action='store_true')
parser.add_argument('--temp_dir', default='hindsight-temp',
help='If files are copied before being opened, use this directory as the copy destination')
args = parser.parse_args()
if args.timezone:
try:
__import__('pytz')
except ImportError:
args.timezone = None
else:
try:
args.timezone = pytz.timezone(args.timezone)
except pytz.exceptions.UnknownTimeZoneError:
print("Couldn't understand timezone; using UTC.")
args.timezone = pytz.timezone('UTC')
# Disable decryption on Linux unless explicitly enabled and supported
if args.decrypt == 'linux' and analysis_session.available_decrypts['linux'] == 1:
analysis_session.available_decrypts['linux'] = 1
else:
analysis_session.available_decrypts['linux'] = 0
# Disable decryption on Mac unless explicitly enabled and supported
if args.decrypt == 'mac' and analysis_session.available_decrypts['mac'] == 1:
analysis_session.available_decrypts['mac'] = 1
else:
analysis_session.available_decrypts['mac'] = 0
return args
def main():
def write_excel(analysis_session):
import io
# Set up a StringIO object to save the XLSX content to before saving to disk
string_buffer = io.BytesIO()
# Generate the XLSX content using the function in the AnalysisSession and save it to the StringIO object
analysis_session.generate_excel(string_buffer)
# Go back to the beginning (be kind, rewind)
string_buffer.seek(0)
# Write the StringIO object to a file on disk named what the user specified
with open(f'{analysis_session.output_name}.{analysis_session.selected_output_format}', 'wb') as file_output:
shutil.copyfileobj(string_buffer, file_output)
def write_sqlite(analysis_session):
output_file = analysis_session.output_name + '.sqlite'
if os.path.exists(output_file):
if os.path.getsize(output_file) > 0:
print(('\nDatabase file "{}" already exists.\n'.format(output_file)))
user_input = input('Would you like to (O)verwrite it, (R)ename output file, or (E)xit? ')
over_re = re.compile(r'(^o$|overwrite)', re.IGNORECASE)
rename_re = re.compile(r'(^r$|rename)', re.IGNORECASE)
exit_re = re.compile(r'(^e$|exit)', re.IGNORECASE)
if re.search(exit_re, user_input):
print("Exiting... ")
sys.exit()
elif re.search(over_re, user_input):
os.remove(output_file)
print(("Deleted old \"%s\"" % output_file))
elif re.search(rename_re, user_input):
output_file = "{}_1.sqlite".format(output_file[:-7])
print(("Renaming new output to {}".format(output_file)))
else:
print("Did not understand response. Exiting... ")
sys.exit()
analysis_session.generate_sqlite(output_file)
def write_jsonl(analysis_session):
output_file = analysis_session.output_name + '.jsonl'
analysis_session.generate_jsonl(output_file)
print(banner)
# Useful when Hindsight is run from a different directory than where the file is located
real_path = os.path.dirname(os.path.realpath(sys.argv[0]))
# Set up the AnalysisSession object, and transfer the relevant input arguments to it
analysis_session = AnalysisSession()
# parse_arguments needs the analysis_session as an input to set things like available decrypts
args = parse_arguments(analysis_session)
if args.output:
analysis_session.output_name = args.output
if args.cache:
analysis_session.cache_path = args.cache
analysis_session.selected_output_format = args.format
analysis_session.browser_type = args.browser_type
analysis_session.timezone = args.timezone
analysis_session.no_copy = args.nocopy
analysis_session.temp_dir = args.temp_dir
analysis_session.log_path = args.log
# Set up logging
logging.basicConfig(filename=analysis_session.log_path, level=logging.DEBUG,
format='%(asctime)s.%(msecs).03d | %(levelname).01s | %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
log = logging.getLogger(__name__)
# Hindsight version info
log.info(
'\n' + '#' * 80 +
f'\n## Hindsight v{pyhindsight.__version__} (https://github.com/obsidianforensics/hindsight) ##\n' +
'#' * 80)
# Analysis start time
print((format_meta_output("Start time", str(datetime.datetime.now())[:-3])))
# Print input & output directories
analysis_session.input_path = args.input
print((format_meta_output('Input directory', args.input)))
print((format_meta_output(
'Output name', f'{analysis_session.output_name}.{analysis_session.selected_output_format}')))
# Run the AnalysisSession
print("\n Processing:")
run_status = analysis_session.run()
if not run_status:
if analysis_session.fatal_error:
print(f"\n Fatal Error '{analysis_session.fatal_error}'")
sys.exit(1)
print("\n Running plugins:")
log.info("Plugins:")
completed_plugins = []
# First run built-in plugins that ship with Hindsight
log.info(" Built-in Plugins:")
for plugin in pyhindsight.plugins.__all__:
# Check to see if we've already run this plugin (likely from a different path)
if plugin in completed_plugins:
continue
log.debug(f" - Loading '{plugin}'")
try:
module = importlib.import_module(f'pyhindsight.plugins.{plugin}')
except ImportError as e:
log.error(f' - Error: {e}')
print((format_plugin_output(plugin, "-unknown", 'import failed (see log)')))
continue
except Exception as e:
log.error(f' - Exception in {plugin} plugin: {e}')
continue
try:
log.info(f" - Running '{module.friendlyName}' plugin")
parsed_items = module.plugin(analysis_session)
print((format_plugin_output(module.friendlyName, module.version, parsed_items)))
log.info(f' - Completed; {parsed_items}')
completed_plugins.append(plugin)
except Exception as e:
print((format_plugin_output(module.friendlyName, module.version, 'failed')))
log.info(f' - Failed; {e}')
# Then look for any custom user-provided plugins in a 'plugins' directory
log.info(" Custom Plugins:")
if real_path not in sys.path:
sys.path.insert(0, real_path)
# Loop through all paths, to pick up all potential locations for custom plugins
for potential_path in sys.path:
# If a subdirectory exists called 'plugins' or 'pyhindsight/plugins' at the current path, continue on
for potential_plugin_path in [os.path.join(potential_path, 'plugins'),
os.path.join(potential_path, 'pyhindsight', 'plugins')]:
if os.path.isdir(potential_plugin_path):
log.info(" Found custom plugin directory {}:".format(potential_plugin_path))
try:
# Insert the current plugin location to the system path, so we can import plugin modules by name
sys.path.insert(0, potential_plugin_path)
# Get list of available plugins and run them
plugin_listing = os.listdir(potential_plugin_path)
log.debug(" - Contents of plugin folder: " + str(plugin_listing))
for plugin in plugin_listing:
if plugin[-3:] == ".py" and plugin[0] != '_':
plugin = plugin.replace(".py", "")
# Check to see if we've already run this plugin (likely from a different path)
if plugin in completed_plugins:
log.debug(" - Skipping '{}'; a plugin with that name has run already".format(plugin))
continue
log.debug(" - Loading '{}'".format(plugin))
try:
module = __import__(plugin)
except ImportError as e:
log.error(f' - Error: {e}')
print((format_plugin_output(plugin, "-unknown", 'import failed (see log)')))
continue
except Exception as e:
log.error(f' - Exception in {plugin} plugin: {e}')
try:
log.info(" - Running '{}' plugin".format(module.friendlyName))
parsed_items = module.plugin(analysis_session)
print((format_plugin_output(module.friendlyName, module.version, parsed_items)))
log.info(" - Completed; {}".format(parsed_items))
completed_plugins.append(plugin)
except Exception as e:
print((format_plugin_output(module.friendlyName, module.version, 'failed')))
log.info(" - Failed; {}".format(e))
except Exception as e:
log.debug(' - Error loading plugins ({})'.format(e))
print(' - Error loading plugins')
finally:
# Remove the current plugin location from the system path, so we don't loop over it again
sys.path.remove(potential_plugin_path)
# Check if output directory exists; attempt to create if it doesn't
if os.path.dirname(analysis_session.output_name) != "" \
and not os.path.exists(os.path.dirname(analysis_session.output_name)):
os.makedirs(os.path.dirname(analysis_session.output_name))
# Get desired output type form args.format and call the correct output creation function
if analysis_session.selected_output_format == 'xlsx':
log.info("Writing output; XLSX format selected")
try:
print(("\n Writing {}.xlsx".format(analysis_session.output_name)))
write_excel(analysis_session)
except IOError:
error_type, value, traceback = sys.exc_info()
print((value, "- is the file open? If so, please close it and try again."))
log.error(f"Error writing XLSX file; type: {error_type}, value: {value}, traceback: {traceback}")
elif args.format == 'jsonl':
log.info("Writing output; JSONL format selected")
print(("\n Writing {}.jsonl".format(analysis_session.output_name)))
write_jsonl(analysis_session)
elif args.format == 'sqlite':
log.info("Writing output; SQLite format selected")
print(("\n Writing {}.sqlite".format(analysis_session.output_name)))
write_sqlite(analysis_session)
# Display and log finish time
print(f'\n Finish time: {str(datetime.datetime.now())[:-3]}')
log.info(f'Finish time: {str(datetime.datetime.now())[:-3]}\n\n')
if __name__ == "__main__":
main()