Skip to content

Commit 1440b08

Browse files
authored
Merge pull request #1 from Open-Paws/security/fix-critical-vulnerabilities
Security: Fix critical path injection, XSS, and weak hashing vulnerabilities
2 parents 479ba5b + 4346969 commit 1440b08

File tree

2 files changed

+80
-32
lines changed

2 files changed

+80
-32
lines changed

auth-proxy/admin.py

Lines changed: 48 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,41 @@ def get_privatemode_key_status() -> tuple[str, str]:
130130
CSRF_TTL = 3600 # 1 hour
131131

132132

133+
PBKDF2_SALT = os.environ.get('PBKDF2_SALT', '').encode()
134+
if len(PBKDF2_SALT) < 16:
135+
raise ValueError("PBKDF2_SALT environment variable must be at least 16 bytes")
136+
137+
# Cache the derived Fernet key at module level so the expensive PBKDF2
138+
# computation only runs once (avoids blocking the async event loop on
139+
# every encrypt/decrypt call).
140+
_FERNET_KEY: bytes | None = None
141+
142+
133143
def _get_fernet_key() -> bytes:
134-
"""Derive a Fernet key from admin password."""
135-
# Use SHA256 to get 32 bytes, then base64 encode for Fernet
136-
key_material = hashlib.sha256(ADMIN_PASSWORD.encode()).digest()
137-
return base64.urlsafe_b64encode(key_material)
144+
"""Derive a Fernet key from admin password using PBKDF2.
145+
146+
Uses PBKDF2-HMAC-SHA256 with a configurable salt for key derivation.
147+
The key is computed once and cached to avoid blocking the async event loop.
148+
"""
149+
global _FERNET_KEY
150+
if _FERNET_KEY is not None:
151+
return _FERNET_KEY
152+
# Use PBKDF2 with 600,000 iterations (OWASP recommended for HMAC-SHA256)
153+
key_material = hashlib.pbkdf2_hmac(
154+
'sha256',
155+
ADMIN_PASSWORD.encode(),
156+
PBKDF2_SALT,
157+
iterations=600_000,
158+
dklen=32 # Fernet requires 32 bytes
159+
)
160+
_FERNET_KEY = base64.urlsafe_b64encode(key_material)
161+
return _FERNET_KEY
162+
163+
164+
# Derive the key once at import time so the expensive PBKDF2 computation
165+
# happens during startup, not on the first request (which would block the
166+
# async event loop).
167+
_get_fernet_key()
138168

139169

140170
def _encrypt_key_for_display(key: str) -> str:
@@ -613,7 +643,8 @@ async def admin_login_page(request: web.Request) -> web.Response:
613643
raise web.HTTPFound('/admin')
614644

615645
error = request.query.get('error', '')
616-
error_html = f'<p class="error">{error}</p>' if error else ''
646+
# Escape error message to prevent XSS attacks
647+
error_html = f'<p class="error">{escape(error)}</p>' if error else ''
617648

618649
csrf_token = generate_csrf_token()
619650
html = HTML_TEMPLATE.format(
@@ -771,7 +802,7 @@ async def admin_dashboard(request: web.Request) -> web.Response:
771802
# Determine base URL for usage examples
772803
scheme = request.headers.get('X-Forwarded-Proto', request.scheme)
773804
host = request.headers.get('X-Forwarded-Host', request.host)
774-
base_url = f"{scheme}://{host}"
805+
base_url = escape(f"{scheme}://{host}")
775806

776807
content = DASHBOARD_CONTENT.format(
777808
keys_table=keys_table,
@@ -1487,27 +1518,21 @@ async def admin_static(request: web.Request) -> web.Response:
14871518
"""Serve static files (logo, etc.)."""
14881519
filename = request.match_info.get('filename', '')
14891520

1490-
# Security: only allow specific files
1491-
allowed_files = {'logo.png'}
1492-
if filename not in allowed_files:
1493-
raise web.HTTPNotFound()
1494-
1521+
# Security: allowlist maps filenames to (relative path, content type).
1522+
# The user-controlled value is only used as a dict key — the filesystem
1523+
# path is constructed entirely from hardcoded values, breaking taint flow.
14951524
static_dir = Path(__file__).parent / 'static'
1496-
file_path = static_dir / filename
1525+
allowed_files = {
1526+
'logo.png': (static_dir / 'logo.png', 'image/png'),
1527+
}
14971528

1498-
if not file_path.exists():
1529+
entry = allowed_files.get(filename)
1530+
if entry is None:
14991531
raise web.HTTPNotFound()
15001532

1501-
# Determine content type
1502-
content_types = {
1503-
'.png': 'image/png',
1504-
'.jpg': 'image/jpeg',
1505-
'.jpeg': 'image/jpeg',
1506-
'.svg': 'image/svg+xml',
1507-
'.ico': 'image/x-icon'
1508-
}
1509-
suffix = file_path.suffix.lower()
1510-
content_type = content_types.get(suffix, 'application/octet-stream')
1533+
file_path, content_type = entry
1534+
if not file_path.exists():
1535+
raise web.HTTPNotFound()
15111536

15121537
with open(file_path, 'rb') as f:
15131538
return web.Response(body=f.read(), content_type=content_type)

scripts/scrape_docs.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
import os
77
import re
8+
from pathlib import Path
9+
810
import requests
911
from bs4 import BeautifulSoup
1012
from markdownify import markdownify as md
@@ -93,15 +95,32 @@ def extract_content(soup: BeautifulSoup) -> tuple[str, str]:
9395

9496

9597
def url_to_filename(url: str) -> str:
96-
"""Convert URL to filename."""
98+
"""Convert URL to filename safely, preventing path traversal attacks."""
9799
path = urlparse(url).path.strip('/')
98100
if not path:
99101
return "index.md"
100102
# Replace slashes with underscores
101103
name = path.replace('/', '_')
104+
# Remove any path traversal attempts and dangerous characters
105+
# Only allow alphanumeric, underscore, and hyphen (block backslashes too)
106+
name = re.sub(r'[^a-zA-Z0-9_-]', '', name)
107+
if not name:
108+
return "index.md"
102109
return f"{name}.md"
103110

104111

112+
def safe_join_path(base_dir: str, filename: str) -> str:
113+
"""Safely join base directory and filename, preventing path traversal."""
114+
# Resolve to absolute paths
115+
base = Path(base_dir).resolve()
116+
target = (base / filename).resolve()
117+
# Ensure the target is within the base directory using pathlib's semantic check
118+
# which is immune to partial string prefix attacks (e.g. /docs vs /docs_evil)
119+
if target != base and base not in target.parents:
120+
raise ValueError(f"Path traversal detected: {filename}")
121+
return str(target)
122+
123+
105124
def scrape_all():
106125
"""Scrape all documentation pages."""
107126
to_visit = list(SEED_URLS)
@@ -116,15 +135,19 @@ def scrape_all():
116135
title, content = extract_content(soup)
117136
if content:
118137
filename = url_to_filename(url)
119-
filepath = os.path.join(DOCS_DIR, filename)
138+
try:
139+
filepath = safe_join_path(DOCS_DIR, filename)
120140

121-
with open(filepath, 'w') as f:
122-
if title:
123-
f.write(f"# {title}\n\n")
124-
f.write(content)
141+
with open(filepath, 'w') as f:
142+
if title:
143+
f.write(f"# {title}\n\n")
144+
f.write(content)
125145

126-
print(f" Saved: {filename}")
127-
pages[url] = {'title': title, 'file': filename}
146+
print(f" Saved: {filename}")
147+
pages[url] = {'title': title, 'file': filename}
148+
except (ValueError, requests.RequestException) as e:
149+
print(f" Skipping {url}: {e}")
150+
continue
128151

129152
# Discover more links
130153
for link in extract_nav_links(soup):
@@ -133,7 +156,7 @@ def scrape_all():
133156
to_visit.append(link)
134157

135158
# Create index
136-
with open(os.path.join(DOCS_DIR, "README.md"), 'w') as f:
159+
with open(safe_join_path(DOCS_DIR, "README.md"), 'w') as f:
137160
f.write("# Privatemode Documentation\n\n")
138161
f.write("Scraped from https://docs.privatemode.ai\n\n")
139162
f.write("## Pages\n\n")

0 commit comments

Comments
 (0)