-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDOM-Auditor.py
184 lines (150 loc) · 6.64 KB
/
DOM-Auditor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import os
import glob
import re
import json
import subprocess
from bs4 import BeautifulSoup
# Parse HTML and find id/name attributes
def parse_html(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
ids = [tag.get('id') for tag in soup.find_all(id=True)]
names = [tag.get('name') for tag in soup.find_all(name=True) if tag.get('name')]
# Find inline event handlers
inline_events = []
for tag in soup.find_all():
for attr in tag.attrs:
if attr.startswith('on'):
inline_events.append((tag.name, attr, tag[attr]))
return ids, names, inline_events
# Invoke Esprima
def parse_js_ast(js_content):
try:
with open('temp.js', 'w', encoding='utf-8') as temp_js:
temp_js.write(js_content)
result = subprocess.run(['node', '-e',
"const esprima = require('esprima');"
"const fs = require('fs');"
"const code = fs.readFileSync('temp.js', 'utf-8');"
"const ast = esprima.parseScript(code, {range: true});"
"console.log(JSON.stringify(ast, null, 2));"],
capture_output=True, text=True)
return json.loads(result.stdout)
except Exception as e:
print(f"Error parsing JavaScript via AST: {e}")
return None
# Track the flow of user input data
def track_user_input_flow(ast, html_ids):
vulnerabilities = []
user_input_vars = set()
def is_sanitized(expr):
try:
safe_functions = {'encodeURIComponent', 'escapeHTML'}
return (expr['type'] == 'CallExpression' and
expr['callee']['type'] == 'Identifier' and
expr['callee']['name'] in safe_functions)
except KeyError as e:
print(f"KeyError while checking sanitization: {e}")
return False
except Exception as e:
print(f"Unexpected error while checking sanitization: {e}")
return False
try:
for node in ast['body']:
if node['type'] == 'VariableDeclaration':
for decl in node['declarations']:
if decl['init'] and decl['init']['type'] == 'CallExpression':
if (decl['init']['callee']['type'] == 'MemberExpression' and
decl['init']['callee']['object']['name'] in {'document', 'location'}):
user_input_vars.add(decl['id']['name'])
for node in ast['body']:
if (node['type'] == 'ExpressionStatement' and
node['expression']['type'] == 'AssignmentExpression'):
left = node['expression']['left']
right = node['expression']['right']
if (left['type'] == 'MemberExpression' and
left['property']['name'] == 'innerHTML'):
if (right['type'] == 'Identifier' and
right['name'] in user_input_vars and
not is_sanitized(right)):
vulnerabilities.append(f"Potential XSS: unsanitized user input assigned to innerHTML at position {node['range']}.")
except KeyError as e:
print(f"KeyError in AST traversal: {e}")
except Exception as e:
print(f"Unexpected error during AST traversal: {e}")
return vulnerabilities
# Analyze JS using AST for dangerous patterns
def analyze_js_ast(js_file, html_ids):
with open(js_file, 'r', encoding='utf-8') as file:
js_content = file.read()
ast = parse_js_ast(js_content)
if not ast:
print(f"Could not analyze {js_file}")
return
vulnerabilities = track_user_input_flow(ast, html_ids)
print(f"----- Analyzing JS file (AST): {js_file} -----")
if vulnerabilities:
for vuln in vulnerabilities:
print(vuln)
else:
print("No unsafe input-to-DOM patterns detected.")
# Parse JavaScript code to detect vulnerabilities via regex
def parse_javascript(js_content, html_ids):
patterns = {
'innerHTML': r'\.innerHTML\s*=\s*',
'eval': r'eval\s*\(',
'setTimeout': r'setTimeout\s*\(',
'setInterval': r'setInterval\s*\(',
}
vulnerabilities = {}
for issue, pattern in patterns.items():
if re.search(pattern, js_content):
vulnerabilities[issue] = re.findall(pattern, js_content)
# Look for potential DOM clobbering
variables = re.findall(r'var\s+(\w+)\s*=', js_content)
dom_clobbering = set(variables) & set(html_ids)
return vulnerabilities, dom_clobbering
# Scan a single HTML file
def analyze_html_file(html_file):
with open(html_file, 'r', encoding='utf-8') as file:
html_content = file.read()
html_ids, html_names, inline_events = parse_html(html_content)
print(f"----- Analyzing HTML file: {html_file} -----")
print(f"Found IDs: {html_ids}")
print(f"Found Names: {html_names}")
if inline_events:
print("Found inline event handlers:")
for tag, attr, val in inline_events:
print(f" Tag: <{tag}> Attribute: {attr} -> {val}")
else:
print("No inline event handlers found.")
return html_ids
# A function to analyze JavaScript files (AST and regex)
def analyze_js_file(js_file, html_ids):
with open(js_file, 'r', encoding='utf-8') as file:
js_content = file.read()
analyze_js_ast(js_file, html_ids)
js_vulnerabilities, dom_clobbering = parse_javascript(js_content, html_ids)
print(f"----- Analyzing JS file (Regex): {js_file} -----")
if js_vulnerabilities:
for issue, occurrences in js_vulnerabilities.items():
print(f"Possible {issue} usage found {len(occurrences)} time(s).")
else:
print("No unsafe JavaScript patterns detected.")
if dom_clobbering:
print(f"Potential DOM clobbering detected: {dom_clobbering}")
else:
print("No DOM clobbering issues detected.")
# Scan all files in a folder
def analyze_folder(folder):
html_files = glob.glob(os.path.join(folder, '**', '*.html'), recursive=True)
js_files = glob.glob(os.path.join(folder, '**', '*.js'), recursive=True)
print(f"\nFound {len(html_files)} HTML files and {len(js_files)} JS files in '{folder}'\n")
all_html_ids = []
for html_file in html_files:
html_ids = analyze_html_file(html_file)
all_html_ids.extend(html_ids)
for js_file in js_files:
analyze_js_file(js_file, all_html_ids)
# Directory to analize
folder_to_scan = '/home/b0llull0s/Documents/Projects/The_Odin_Project/odin-recipes'
analyze_folder(folder_to_scan)