diff --git a/migrations/0004_add_canonical_url_and_composite_uniqueness.sql b/migrations/0004_add_canonical_url_and_composite_uniqueness.sql
new file mode 100644
index 00000000..a0708688
--- /dev/null
+++ b/migrations/0004_add_canonical_url_and_composite_uniqueness.sql
@@ -0,0 +1,34 @@
+-- Migration: Add canonical_url metadata and composite uniqueness for PR identity
+-- Created: 2026-03-25
+-- Description:
+-- 1) Adds canonical_url column for normalized PR URL storage
+-- 2) Backfills canonical_url for existing rows
+-- 3) Removes duplicate rows by (repo_owner, repo_name, pr_number), keeping latest id
+-- 4) Enforces uniqueness on (repo_owner, repo_name, pr_number)
+
+ALTER TABLE prs ADD COLUMN canonical_url TEXT;
+
+UPDATE prs
+SET canonical_url = 'https://github.com/' || repo_owner || '/' || repo_name || '/pull/' || pr_number
+WHERE canonical_url IS NULL OR canonical_url = '';
+
+WITH ranked_prs AS (
+ SELECT
+ id,
+ ROW_NUMBER() OVER (
+ PARTITION BY LOWER(repo_owner), LOWER(repo_name), pr_number
+ ORDER BY readiness_computed_at DESC NULLS LAST,
+ updated_at DESC NULLS LAST,
+ id DESC
+ ) AS row_num
+ FROM prs
+)
+DELETE FROM prs
+WHERE id IN (
+ SELECT id
+ FROM ranked_prs
+ WHERE row_num > 1
+);
+
+CREATE UNIQUE INDEX IF NOT EXISTS idx_prs_identity_unique
+ON prs(repo_owner COLLATE NOCASE, repo_name COLLATE NOCASE, pr_number);
diff --git a/package.json b/package.json
index cb87ef1a..011705e4 100644
--- a/package.json
+++ b/package.json
@@ -11,8 +11,9 @@
"db:migrations:list": "wrangler d1 migrations list pr_tracker",
"lint": "echo 'Linting not configured yet' && exit 0",
"format:check": "echo 'Format checking not configured yet' && exit 0",
- "test": "node test-data-display.js",
- "test:data-display": "node test-data-display.js"
+ "test": "node test-data-display.js && node test-url-validation.js",
+ "test:data-display": "node test-data-display.js",
+ "test:url-validation": "node test-url-validation.js"
},
"keywords": [
"github",
diff --git a/public/index.html b/public/index.html
index ff0b2152..d7c8a977 100644
--- a/public/index.html
+++ b/public/index.html
@@ -568,6 +568,7 @@
Error<
}, FEEDBACK_DISMISS_DELAY);
}
+ const PR_PATH_RE = /^\/([^/]+)\/([^/]+)\/pull\/(\d+)\/?$/i;
+ const REPO_PATH_RE = /^\/([^/]+)\/([^/]+)\/?$/i;
+ const ORGS_PATH_RE = /^\/orgs\/([^/]+)\/?$/i;
+ const ORG_PATH_RE = /^\/([^/]+)\/?$/i;
+ const RESERVED_OWNER_NAMES = new Set([
+ 'about', 'codespaces', 'collections', 'discussions', 'enterprise',
+ 'explore', 'features', 'issues', 'login', 'marketplace', 'new',
+ 'notifications', 'organizations', 'orgs', 'pricing', 'pulls',
+ 'security', 'settings', 'signup', 'sponsors', 'topics', 'trending'
+ ]);
+
+ function setUrlError(message) {
+ const el = document.getElementById('url-error');
+ const input = document.getElementById('prUrlInput');
+ if (!el) return;
+ if (message) {
+ el.textContent = message;
+ el.classList.remove('hidden');
+ if (input) {
+ input.setAttribute('aria-invalid', 'true');
+ input.setAttribute('aria-describedby', 'url-error');
+ }
+ } else {
+ el.textContent = '';
+ el.classList.add('hidden');
+ if (input) {
+ input.removeAttribute('aria-invalid');
+ input.removeAttribute('aria-describedby');
+ }
+ }
+ }
+
+ function normalizeRepoKey(owner, repo) {
+ return `${String(owner || '').toLowerCase()}/${String(repo || '').toLowerCase()}`;
+ }
+
+ function findRepoByIdentity(owner, repo) {
+ const targetKey = normalizeRepoKey(owner, repo);
+ return repos.find(r => normalizeRepoKey(r.repo_owner, r.repo_name) === targetKey) || null;
+ }
+
+ function findPrByIdentity(owner, repo, prNumber) {
+ const targetKey = normalizeRepoKey(owner, repo);
+ const targetPrNumber = String(prNumber || '');
+ return allPrs.find(pr => {
+ const prKey = normalizeRepoKey(pr.repo_owner, pr.repo_name);
+ if (prKey !== targetKey) {
+ return false;
+ }
+ if (!targetPrNumber) {
+ return true;
+ }
+ return String(pr.pr_number || '') === targetPrNumber;
+ }) || null;
+ }
+
+ function parseGitHubTrackingUrl(rawUrl) {
+ const value = typeof rawUrl === 'string' ? rawUrl.trim() : '';
+ if (!value) {
+ return { valid: false, reason: 'Please enter a PR, repository, or organization URL' };
+ }
+ if (value.length > 2048) {
+ return { valid: false, reason: 'URL is too long' };
+ }
+
+ let url;
+ try {
+ url = new URL(value);
+ } catch {
+ return { valid: false, reason: 'Please enter a valid URL' };
+ }
+
+ const protocol = url.protocol.toLowerCase();
+ if (protocol !== 'http:' && protocol !== 'https:') {
+ return { valid: false, reason: 'URL must start with http:// or https://' };
+ }
+
+ const hostname = url.hostname.toLowerCase();
+ if (hostname !== 'github.com' && hostname !== 'www.github.com') {
+ return { valid: false, reason: 'Only github.com URLs are allowed' };
+ }
+
+ const cleanPath = url.pathname.replace(/\/+$/, '') || '/';
+ let match = cleanPath.match(PR_PATH_RE);
+ if (match) {
+ const owner = match[1];
+ const repo = match[2];
+ const prNumber = parseInt(match[3], 10);
+ return {
+ valid: true,
+ type: 'pr',
+ owner,
+ repo,
+ prNumber,
+ canonicalUrl: `https://github.com/${owner}/${repo}/pull/${prNumber}`
+ };
+ }
+
+ match = cleanPath.match(REPO_PATH_RE);
+ if (match) {
+ const owner = match[1];
+ const repo = match[2];
+ if (!RESERVED_OWNER_NAMES.has(owner.toLowerCase()) && repo && repo.trim()) {
+ return {
+ valid: true,
+ type: 'repo',
+ owner,
+ repo,
+ canonicalUrl: `https://github.com/${owner}/${repo}`
+ };
+ }
+ }
+
+ match = cleanPath.match(ORGS_PATH_RE);
+ if (match) {
+ const owner = match[1];
+ return {
+ valid: true,
+ type: 'org',
+ owner,
+ canonicalUrl: `https://github.com/${owner}`
+ };
+ }
+
+ match = cleanPath.match(ORG_PATH_RE);
+ if (match) {
+ const owner = match[1];
+ if (!RESERVED_OWNER_NAMES.has(owner.toLowerCase())) {
+ return {
+ valid: true,
+ type: 'org',
+ owner,
+ canonicalUrl: `https://github.com/${owner}`
+ };
+ }
+ }
+
+ return {
+ valid: false,
+ reason: 'Use a GitHub PR, repository, or organization URL'
+ };
+ }
+
function showSectionMessage(message, type) {
// Show an inline feedback message in the Add PR section on the page
const section = document.getElementById('prUrlInput')?.closest('section');
@@ -3164,19 +3308,32 @@
${emptyTitl
const checkbox = document.getElementById('addAllPrsCheckbox');
const prUrl = input.value.trim();
const addAll = checkbox.checked;
+ const parsedUrl = parseGitHubTrackingUrl(prUrl);
- if (!prUrl) {
- showInputError('prUrlInput', 'Please enter a PR URL');
+ if (!parsedUrl.valid) {
+ setUrlError(parsedUrl.reason);
+ input.focus();
return;
}
- // Auto-detect if URL is an org or repo URL (not a PR URL)
- const isOrgOrRepoUrl = prUrl.match(/^https?:\/\/github\.com\/[^/]+\/?$/) ||
- (prUrl.match(/^https?:\/\/github\.com\/[^/]+\/[^/]+\/?$/) && !prUrl.includes('/pull/'));
- const effectiveAddAll = addAll || !!isOrgOrRepoUrl;
+ // Enforce compatibility between import mode (single vs bulk) and URL type
+ if (!addAll && parsedUrl.type !== 'pr') {
+ setUrlError('Use a pull request URL when bulk import is not enabled');
+ input.focus();
+ return;
+ }
+ if (addAll && parsedUrl.type === 'pr') {
+ setUrlError('For bulk import, enter a repository or organization URL');
+ input.focus();
+ return;
+ }
+
+ setUrlError('');
+ const canonicalInputUrl = parsedUrl.canonicalUrl;
+ const isBulkImport = addAll || parsedUrl.type !== 'pr';
btn.disabled = true;
- btn.textContent = effectiveAddAll ? 'Importing...' : 'Adding...';
+ btn.textContent = isBulkImport ? 'Importing...' : 'Adding...';
try {
// If importing all, handle logic to send repo_url appropriately
@@ -3187,16 +3344,18 @@ ${emptyTitl
'Content-Type': 'application/json'
},
body: JSON.stringify({
- pr_url: prUrl,
- add_all: addAll
+ pr_url: canonicalInputUrl,
+ add_all: isBulkImport
})
});
const data = await response.json();
if (data.error) {
- showInputError('prUrlInput', data.error);
+ setUrlError(data.error);
+ input.focus();
} else {
input.value = '';
+ setUrlError('');
// Show success message for bulk imports
if (data.imported_count !== undefined) {
if (data.truncated) {
@@ -3207,27 +3366,35 @@ ${emptyTitl
}
invalidateApiCache();
// For single PR additions, select the org/repo and highlight the added PR
- const prUrlPattern = /^https?:\/\/github\.com\/([a-zA-Z0-9._-]+)\/([a-zA-Z0-9._-]+)\/pull\/\d+/;
- const prMatch = !effectiveAddAll && prUrl.match(prUrlPattern);
- if (prMatch) {
- const prOwner = prMatch[1];
- const prRepoKey = `${prMatch[1]}/${prMatch[2]}`;
- // Update org filter to highlight the PR's owner/org
- currentOrg = prOwner;
+ if (!isBulkImport && parsedUrl.type === 'pr') {
+ const targetOwner = parsedUrl.owner;
+ const targetRepo = parsedUrl.repo;
+ const targetPrNumber = parsedUrl.prNumber;
+
+ // Reset to owner scope first, then resolve exact casing/key from loaded repo list.
+ currentOrg = targetOwner;
+ currentPage = 1;
+ await loadRepos(true);
+
+ const matchedRepo = findRepoByIdentity(targetOwner, targetRepo);
+ const resolvedOwner = matchedRepo ? matchedRepo.repo_owner : targetOwner;
+ const resolvedRepo = matchedRepo ? matchedRepo.repo_name : targetRepo;
+
+ currentOrg = resolvedOwner;
localStorage.setItem('currentOrg', currentOrg);
const orgFilterInput = document.getElementById('orgFilterInput');
const orgFilterClearBtn = document.getElementById('orgFilterClearBtn');
- if (orgFilterInput) orgFilterInput.value = prOwner;
+ if (orgFilterInput) orgFilterInput.value = currentOrg;
if (orgFilterClearBtn) orgFilterClearBtn.classList.remove('hidden');
- // Select the specific repo in the sidebar
- currentRepo = prRepoKey;
+
+ currentRepo = `${resolvedOwner}/${resolvedRepo}`;
localStorage.setItem('currentRepo', currentRepo);
- currentPage = 1;
- await loadRepos(true);
+
await loadPrs(true);
await loadRateLimit(true);
+
// Highlight the newly added PR row
- const added = allPrs.find(pr => pr.pr_url === prUrl);
+ const added = findPrByIdentity(targetOwner, targetRepo, targetPrNumber);
if (added) highlightPrRow(added.id);
} else {
// Force bypass service worker cache after import
@@ -3238,7 +3405,8 @@ ${emptyTitl
}
} catch (error) {
console.error('Error adding PR:', error);
- showInputError('prUrlInput', 'Failed to add PR');
+ setUrlError('Failed to add PR');
+ input.focus();
} finally {
btn.disabled = false;
btn.textContent = 'Add PR';
@@ -3339,6 +3507,9 @@ ${emptyTitl
});
document.getElementById('addPrBtn').addEventListener('click', addPr);
+ document.getElementById('prUrlInput').addEventListener('input', () => {
+ setUrlError('');
+ });
document.getElementById('prUrlInput').addEventListener('keypress', (e) => {
if (e.key === 'Enter') addPr();
});
@@ -3649,42 +3820,50 @@ ${emptyTitl
if (ownerValid && repoValid && prValid) {
// New format: construct the full PR URL from parts
prUrl = `https://github.com/${ownerParam}/${repoParam}/pull/${prParam}`;
- repoKey = `${ownerParam}/${repoParam}`;
+ repoKey = normalizeRepoKey(ownerParam, repoParam);
} else if (prParam) {
// Old format: only accept prParam if it looks like a GitHub PR URL
const legacyPrUrlPattern = /^https?:\/\/github\.com\/([a-zA-Z0-9._-]+)\/([a-zA-Z0-9._-]+)\/pull\/\d+$/;
const match = prParam.match(legacyPrUrlPattern);
if (match) {
prUrl = prParam;
- repoKey = `${match[1]}/${match[2]}`;
+ repoKey = normalizeRepoKey(match[1], match[2]);
}
}
if (!prUrl) return;
+ const parsedParamUrl = parseGitHubTrackingUrl(prUrl);
+ if (!parsedParamUrl.valid || parsedParamUrl.type !== 'pr') return;
+ const canonicalInputUrl = parsedParamUrl.canonicalUrl;
+
// Pre-fill the input with the PR URL
const input = document.getElementById('prUrlInput');
- if (input) input.value = prUrl;
+ if (input) input.value = canonicalInputUrl;
// Select the correct repo in the sidebar if needed
- if (repoKey && repoKey !== currentRepo) {
- currentRepo = repoKey;
+ const matchedRepo = findRepoByIdentity(parsedParamUrl.owner, parsedParamUrl.repo);
+ const resolvedOwner = matchedRepo ? matchedRepo.repo_owner : parsedParamUrl.owner;
+ const resolvedRepo = matchedRepo ? matchedRepo.repo_name : parsedParamUrl.repo;
+ const resolvedRepoKey = `${resolvedOwner}/${resolvedRepo}`;
+ if (repoKey && normalizeRepoKey(...currentRepo.split('/')) !== normalizeRepoKey(resolvedOwner, resolvedRepo)) {
+ currentRepo = resolvedRepoKey;
localStorage.setItem('currentRepo', currentRepo);
updateMobileRepoDisplay();
document.querySelectorAll('.repo-item').forEach(item => {
- setRepoItemActiveState(item, item.dataset.repo === currentRepo);
+ setRepoItemActiveState(item, normalizeRepoKey(...item.dataset.repo.split('/')) === normalizeRepoKey(resolvedOwner, resolvedRepo));
});
await loadPrs();
}
// Check if the PR is already tracked
- const existing = allPrs.find(pr => pr.pr_url === prUrl);
+ const existing = findPrByIdentity(parsedParamUrl.owner, parsedParamUrl.repo, parsedParamUrl.prNumber);
if (existing) {
highlightPrRow(existing.id);
} else {
// Auto-add the PR, then highlight it
await addPr();
- const added = allPrs.find(pr => pr.pr_url === prUrl);
+ const added = findPrByIdentity(parsedParamUrl.owner, parsedParamUrl.repo, parsedParamUrl.prNumber);
if (added) highlightPrRow(added.id);
}
}
diff --git a/src/database.py b/src/database.py
index ad946a5f..b5a0f4ed 100644
--- a/src/database.py
+++ b/src/database.py
@@ -261,14 +261,17 @@ async def delete_readiness_from_db(env, pr_id):
async def upsert_pr(db, pr_url, owner, repo, pr_number, pr_data):
"""Helper to insert or update PR in database (Deduplicates logic)"""
current_timestamp = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+ canonical_url = f'https://github.com/{owner}/{repo}/pull/{pr_number}'
stmt = db.prepare('''
- INSERT INTO prs (pr_url, repo_owner, repo_name, pr_number, title, state,
+ INSERT INTO prs (pr_url, canonical_url, repo_owner, repo_name, pr_number, title, state,
is_merged, mergeable_state, files_changed, author_login, author_avatar, repo_owner_avatar, checks_passed, checks_failed, checks_skipped,
commits_count, behind_by, review_status, last_updated_at,
last_refreshed_at, updated_at, is_draft, open_conversations_count, reviewers_json, etag)
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
- ON CONFLICT(pr_url) DO UPDATE SET
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ ON CONFLICT(repo_owner, repo_name, pr_number) DO UPDATE SET
+ pr_url = excluded.pr_url,
+ canonical_url = excluded.canonical_url,
title = excluded.title,
state = excluded.state,
is_merged = excluded.is_merged,
@@ -289,7 +292,7 @@ async def upsert_pr(db, pr_url, owner, repo, pr_number, pr_data):
reviewers_json = excluded.reviewers_json,
etag = excluded.etag
''').bind(
- pr_url, owner, repo, pr_number,
+ pr_url, canonical_url, owner, repo, pr_number,
pr_data.get('title') or '',
pr_data.get('state') or '',
1 if pr_data.get('is_merged') else 0,
diff --git a/src/handlers.py b/src/handlers.py
index 372bfadb..92e55e5d 100644
--- a/src/handlers.py
+++ b/src/handlers.py
@@ -85,24 +85,32 @@ async def handle_add_pr(request, env):
db = get_db(env)
+ normalized_pr_url = pr_url.strip()
+
# Auto-detect URL type: if it's an org or repo URL (not a PR URL),
# automatically treat as bulk import regardless of checkbox state
if not add_all:
- # Check if the URL is an org URL or a repo URL (not a PR URL)
- if parse_org_url(pr_url) or (parse_repo_url(pr_url) and '/pull/' not in pr_url):
+ is_pr_url = False
+ try:
+ parse_pr_url(normalized_pr_url)
+ is_pr_url = True
+ except ValueError:
+ is_pr_url = False
+
+ if not is_pr_url and (parse_org_url(normalized_pr_url) or parse_repo_url(normalized_pr_url)):
add_all = True
if add_all:
org_owner = ''
# Try parsing as a repo URL first (e.g. https://github.com/owner/repo)
- parsed = parse_repo_url(pr_url)
+ parsed = parse_repo_url(normalized_pr_url)
if parsed:
# Single repo import
repos_to_import = [{'owner': parsed['owner'], 'name': parsed['repo']}]
is_org_import = False
else:
# Parse org url
- org_parsed = parse_org_url(pr_url)
+ org_parsed = parse_org_url(normalized_pr_url)
if not org_parsed:
return Response.new(json.dumps({'error': 'Invalid GitHub Repository or Organization URL'}),
{'status': 400, 'headers': {'Content-Type': 'application/json'}})
@@ -211,7 +219,8 @@ async def handle_add_pr(request, env):
'reviewers_json': '[]'
}
- await upsert_pr(db, item['html_url'], owner, repo, item['number'], pr_data)
+ canonical_pr_url = f"https://github.com/{owner}/{repo}/pull/{item['number']}"
+ await upsert_pr(db, canonical_pr_url, owner, repo, item['number'], pr_data)
added_count += 1
# Build response message
@@ -236,7 +245,7 @@ async def handle_add_pr(request, env):
# Add single pr
# Catch ValueError from parse_pr_url
try:
- parsed = parse_pr_url(pr_url)
+ parsed = parse_pr_url(normalized_pr_url)
except ValueError as e:
return Response.new(
json.dumps({'error': str(e)}),
@@ -259,19 +268,20 @@ async def handle_add_pr(request, env):
return Response.new(json.dumps({'error': 'Cannot add merged/closed PRs'}),
{'status': 400, 'headers': {'Content-Type': 'application/json'}})
- await upsert_pr(db, pr_url, parsed['owner'], parsed['repo'], parsed['pr_number'], pr_data)
+ canonical_pr_url = parsed['canonical_url']
+ await upsert_pr(db, canonical_pr_url, parsed['owner'], parsed['repo'], parsed['pr_number'], pr_data)
# Auto-run readiness analysis for the newly added PR
readiness_data = None
try:
pr_result = await db.prepare(
- 'SELECT * FROM prs WHERE pr_url = ?'
- ).bind(pr_url).first()
+ 'SELECT * FROM prs WHERE repo_owner = ? COLLATE NOCASE AND repo_name = ? COLLATE NOCASE AND pr_number = ?'
+ ).bind(parsed['owner'], parsed['repo'], parsed['pr_number']).first()
if pr_result:
pr_row = pr_result.to_py()
readiness_data = await _run_readiness_analysis(env, pr_row, pr_row['id'], user_token)
except Exception as analysis_err:
- print(f"Auto-analysis failed for PR {pr_url}: {type(analysis_err).__name__}: {str(analysis_err)}")
+ print(f"Auto-analysis failed for PR {canonical_pr_url}: {type(analysis_err).__name__}: {str(analysis_err)}")
# Include repo_owner, repo_name, pr_number, and pr_url in the response for frontend display
response_data = {
@@ -279,7 +289,7 @@ async def handle_add_pr(request, env):
'repo_owner': parsed['owner'],
'repo_name': parsed['repo'],
'pr_number': parsed['pr_number'],
- 'pr_url': pr_url
+ 'pr_url': canonical_pr_url
}
result_obj = {'success': True, 'data': response_data}
@@ -1110,8 +1120,8 @@ async def handle_github_webhook(request, env):
db = get_db(env)
pr_url = f"https://github.com/{repo_owner}/{repo_name}/pull/{pr_number}"
result = await db.prepare(
- 'SELECT id FROM prs WHERE pr_url = ?'
- ).bind(pr_url).first()
+ 'SELECT id FROM prs WHERE repo_owner = ? COLLATE NOCASE AND repo_name = ? COLLATE NOCASE AND pr_number = ?'
+ ).bind(repo_owner, repo_name, pr_number).first()
# Handle opened PRs - add to tracking automatically
if action == 'opened':
@@ -1136,8 +1146,8 @@ async def handle_github_webhook(request, env):
# Get the newly created PR ID
new_result = await db.prepare(
- 'SELECT id FROM prs WHERE pr_url = ?'
- ).bind(pr_url).first()
+ 'SELECT id FROM prs WHERE repo_owner = ? COLLATE NOCASE AND repo_name = ? COLLATE NOCASE AND pr_number = ?'
+ ).bind(repo_owner, repo_name, pr_number).first()
new_pr_id = new_result.to_py()['id'] if new_result else None
return Response.new(
@@ -1295,8 +1305,8 @@ async def handle_github_webhook(request, env):
for pr_number, repo_owner, repo_name in prs_to_update:
pr_url = f"https://github.com/{repo_owner}/{repo_name}/pull/{pr_number}"
result = await db.prepare(
- 'SELECT id FROM prs WHERE pr_url = ?'
- ).bind(pr_url).first()
+ 'SELECT id FROM prs WHERE repo_owner = ? COLLATE NOCASE AND repo_name = ? COLLATE NOCASE AND pr_number = ?'
+ ).bind(repo_owner, repo_name, pr_number).first()
if not result:
# PR not being tracked - skip it
diff --git a/src/utils.py b/src/utils.py
index 92957473..c4b25c7b 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -11,50 +11,73 @@
# Reduces overall readiness score by 33% when mergeable state is 'dirty' (conflicts)
_MERGE_CONFLICTS_SCORE_MULTIPLIER = 0.67
+_GITHUB_PR_RE = re.compile(
+ r'^https?://github\.com/([^/?#\s]+)/([^/?#\s]+)/pull/(\d+)(?:[/?#].*)?$',
+ re.IGNORECASE,
+)
+_GITHUB_REPO_RE = re.compile(
+ r'^https?://github\.com/(?!orgs(?:/|$))([^/?#\s]+)/([^/?#\s]+)/?(?:[?#].*)?$',
+ re.IGNORECASE,
+)
+_GITHUB_ORG_RE = re.compile(
+ r'^https?://github\.com/(?:orgs/)?([A-Za-z0-9_.-]+)/?(?:[?#].*)?$',
+ re.IGNORECASE,
+)
-def parse_pr_url(pr_url):
+
+def parse_pr_url(pr_url: str):
"""
- Parse GitHub PR URL to extract owner, repo, and PR number.
-
- Security Hardening (Issue #45):
- - Type validation to prevent type confusion attacks
- - Anchored regex pattern to block malformed URLs with trailing junk
- - Raises ValueError instead of returning None for better error handling
+ Validates and parses a GitHub PR URL.
+ Returns a dict with owner, repo, and pr_number or raises ValueError.
"""
- # FIX Issue #45: Type validation
if not isinstance(pr_url, str):
raise ValueError("PR URL must be a string")
if not pr_url:
raise ValueError("PR URL is required")
- pr_url = pr_url.strip().rstrip('/')
-
- # FIX Issue #45: Anchored regex - must match EXACTLY, no trailing junk allowed
- pattern = r'^https?://github\.com/([^/]+)/([^/]+)/pull/(\d+)$'
- match = re.match(pattern, pr_url)
-
+ normalized_url = pr_url.strip()
+ match = _GITHUB_PR_RE.match(normalized_url)
+
if not match:
- # FIX Issue #45: Raise error instead of returning None
- raise ValueError("Invalid GitHub PR URL. Format: https://github.com/OWNER/REPO/pull/NUMBER")
-
+ raise ValueError(
+ "Invalid GitHub PR URL. Expected format: "
+ "https://github.com/owner/repo/pull/123"
+ )
+
+ owner, repo, pr_num_str = match.group(1), match.group(2), match.group(3)
+ pr_number = int(pr_num_str)
+
+ if pr_number < 1:
+ raise ValueError("PR number must be a positive integer.")
+
+ # Return as a dictionary to match the original function's signature
return {
- 'owner': match.group(1),
- 'repo': match.group(2),
- 'pr_number': int(match.group(3))
+ 'owner': owner,
+ 'repo': repo,
+ 'pr_number': pr_number,
+ 'canonical_url': f"https://github.com/{owner}/{repo}/pull/{pr_number}"
}
def parse_repo_url(url):
"""Parse GitHub Repo URL to extract owner and repo name"""
- if not url: return None
- url = url.strip().rstrip('/')
- pattern = r'https?://github\.com/([^/]+)/([^/]+)(?:/.*)?$'
- match = re.match(pattern, url)
+ if not url:
+ return None
+ normalized_url = url.strip()
+ match = _GITHUB_REPO_RE.match(normalized_url)
if match:
+ owner = match.group(1)
+ # Security guard: don't misroute /orgs/ bulk URLs to parse_repo_url()
+ # GitHub uses /orgs/ for organization homepages/dashboards.
+ if owner.lower() == 'orgs':
+ return None
+
+ repo = match.group(2)
return {
- 'owner': match.group(1),
- 'repo': match.group(2)
+ 'owner': owner,
+ 'repo': repo,
+ 'canonical_url': f"https://github.com/{owner}/{repo}"
}
return None
@@ -65,21 +88,22 @@ def parse_org_url(url):
"""
if not url:
return None
- url = url.strip().rstrip('/')
- # Match org/user URL: github.com/ with no further path segments
- pattern = r'^https?://github\.com/([A-Za-z0-9_.-]+)$'
- match = re.match(pattern, url)
+ normalized_url = url.strip()
+ match = _GITHUB_ORG_RE.match(normalized_url)
if match:
owner = match.group(1)
# Exclude GitHub reserved paths that aren't orgs/users
reserved = {'settings', 'organizations', 'explore', 'marketplace',
'notifications', 'new', 'login', 'signup', 'features',
'enterprise', 'pricing', 'topics', 'collections',
- 'trending', 'sponsors', 'about', 'security', 'pulls',
+ 'trending', 'sponsors', 'about', 'security', 'pulls', 'orgs',
'issues', 'codespaces', 'discussions'}
if owner.lower() in reserved:
return None
- return {'owner': owner}
+ return {
+ 'owner': owner,
+ 'canonical_url': f"https://github.com/{owner}"
+ }
return None
diff --git a/test-data-display.js b/test-data-display.js
index 96bce05b..c5c21e8e 100755
--- a/test-data-display.js
+++ b/test-data-display.js
@@ -7,7 +7,7 @@
const fs = require('fs');
const path = require('path');
-const { spawn } = require('child_process');
+const { spawn, spawnSync } = require('child_process');
// ANSI color codes for output
const colors = {
@@ -20,16 +20,26 @@ const colors = {
let testsPassed = 0;
let testsFailed = 0;
+let testsSkipped = 0;
function log(message, color = colors.reset) {
console.log(`${color}${message}${colors.reset}`);
}
-function testResult(testName, passed, message = '') {
- if (passed) {
+function testResult(testName, statusOrPassed, message = '') {
+ const isExplicitStatus = statusOrPassed === 'passed' || statusOrPassed === 'failed' || statusOrPassed === 'skipped';
+ const status = isExplicitStatus
+ ? statusOrPassed
+ : (statusOrPassed ? 'passed' : 'failed');
+
+ if (status === 'passed') {
testsPassed++;
log(`✓ ${testName}`, colors.green);
if (message) log(` ${message}`, colors.blue);
+ } else if (status === 'skipped') {
+ testsSkipped++;
+ log(`- ${testName} (skipped)`, colors.yellow);
+ if (message) log(` ${message}`, colors.yellow);
} else {
testsFailed++;
log(`✗ ${testName}`, colors.red);
@@ -41,6 +51,27 @@ function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
+async function waitForChildExit(child, timeoutMs = 800) {
+ if (!child) return true;
+ if (child.exitCode !== null) return true;
+
+ return await new Promise((resolve) => {
+ let settled = false;
+ const finish = (exited) => {
+ if (settled) return;
+ settled = true;
+ clearTimeout(timer);
+ child.removeListener('exit', onExit);
+ child.removeListener('close', onExit);
+ resolve(exited);
+ };
+ const onExit = () => finish(true);
+ const timer = setTimeout(() => finish(child.exitCode !== null), timeoutMs);
+ child.once('exit', onExit);
+ child.once('close', onExit);
+ });
+}
+
function getSetCookieHeader(response) {
if (response?.headers?.getSetCookie) {
return response.headers.getSetCookie().join('\n');
@@ -64,17 +95,79 @@ async function waitForEndpoint(url, timeoutMs = 25000) {
return false;
}
+async function terminateChildProcessTree(child) {
+ if (!child) return;
+
+ if (process.platform === 'win32' && child.pid) {
+ // Ensure cmd.exe wrapper and all descendant processes (wrangler, etc.) are terminated.
+ const result = spawnSync('taskkill', ['/PID', String(child.pid), '/T', '/F'], {
+ windowsHide: true,
+ stdio: 'ignore',
+ });
+
+ if (result.status !== 0) {
+ try {
+ child.kill('SIGTERM');
+ } catch (_) {
+ // Ignore cleanup failures while shutting down test runtime processes.
+ }
+ }
+ return;
+ }
+
+ try {
+ child.kill('SIGTERM');
+ } catch (_) {
+ return;
+ }
+
+ const exited = await waitForChildExit(child, 800);
+ if (!exited && child.exitCode === null) {
+ try {
+ child.kill('SIGKILL');
+ } catch (_) {
+ // Ignore cleanup failures while shutting down test runtime processes.
+ }
+ }
+}
+
async function startRuntimeServer() {
const port = 8788;
const baseUrl = `http://127.0.0.1:${port}`;
- const cmd = process.platform === 'win32' ? 'npx.cmd' : 'npx';
- const args = ['wrangler', 'dev', '--local', '--ip', '127.0.0.1', '--port', String(port), '--log-level', 'error'];
+ const wranglerArgs = ['wrangler', 'dev', '--local', '--ip', '127.0.0.1', '--port', String(port), '--log-level', 'error'];
- const child = spawn(cmd, args, {
- cwd: __dirname,
- env: { ...process.env, BROWSER: 'none' },
- stdio: ['ignore', 'pipe', 'pipe'],
- });
+ if (process.platform === 'win32') {
+ const bashCheck = spawnSync('cmd.exe', ['/d', '/s', '/c', 'where bash'], {
+ stdio: 'ignore',
+ windowsHide: true,
+ });
+ if (bashCheck.status !== 0) {
+ throw new Error('SKIP_RUNTIME_AUTH_TEST: bash not found (required by wrangler [build] command).');
+ }
+ }
+
+ let child;
+ if (process.platform === 'win32') {
+ // Spawning npx.cmd directly can fail with EINVAL on some Windows/Node setups.
+ const quoteForCmd = (arg) => {
+ const value = String(arg);
+ const escaped = value.replace(/"/g, '""');
+ return /[\s"&|<>^()%!]/.test(value) ? `"${escaped}"` : escaped;
+ };
+ const cmdLine = ['npx', ...wranglerArgs].map(quoteForCmd).join(' ');
+ child = spawn('cmd.exe', ['/d', '/s', '/c', cmdLine], {
+ cwd: __dirname,
+ env: { ...process.env, BROWSER: 'none' },
+ stdio: ['ignore', 'pipe', 'pipe'],
+ windowsHide: true,
+ });
+ } else {
+ child = spawn('npx', wranglerArgs, {
+ cwd: __dirname,
+ env: { ...process.env, BROWSER: 'none' },
+ stdio: ['ignore', 'pipe', 'pipe'],
+ });
+ }
let output = '';
child.stdout.on('data', (chunk) => {
@@ -86,7 +179,7 @@ async function startRuntimeServer() {
const ready = await waitForEndpoint(`${baseUrl}/api/auth/user`);
if (!ready) {
- child.kill('SIGTERM');
+ await terminateChildProcessTree(child);
throw new Error(`wrangler dev did not become ready within timeout. Output:\n${output}`);
}
@@ -94,12 +187,8 @@ async function startRuntimeServer() {
}
async function stopRuntimeServer(runtime) {
- if (!runtime || !runtime.child || runtime.child.killed) return;
- runtime.child.kill('SIGTERM');
- await sleep(800);
- if (!runtime.child.killed) {
- runtime.child.kill('SIGKILL');
- }
+ if (!runtime || !runtime.child) return;
+ await terminateChildProcessTree(runtime.child);
}
// Test 1: Verify HTML file exists and contains required elements
@@ -691,7 +780,11 @@ async function testAuthenticationRuntimeBehavior() {
logoutCookies ? 'Found state clear cookie header' : 'Missing Set-Cookie header'
);
} catch (error) {
- testResult('Authentication runtime behavior test setup', false, error.message);
+ if (String(error.message || '').includes('SKIP_RUNTIME_AUTH_TEST:')) {
+ testResult('Authentication runtime behavior test setup', 'skipped', `Skipped: ${error.message}`);
+ } else {
+ testResult('Authentication runtime behavior test setup', false, error.message);
+ }
} finally {
await stopRuntimeServer(runtime);
}
@@ -717,12 +810,14 @@ async function runTests() {
log(' Test Summary', colors.blue);
log('='.repeat(60), colors.blue);
- const total = testsPassed + testsFailed;
+ const total = testsPassed + testsFailed + testsSkipped;
+ const executed = testsPassed + testsFailed;
log(`\nTotal Tests: ${total}`);
log(`Passed: ${testsPassed}`, colors.green);
+ log(`Skipped: ${testsSkipped}`, colors.yellow);
log(`Failed: ${testsFailed}`, testsFailed > 0 ? colors.red : colors.green);
- const successRate = total > 0 ? ((testsPassed / total) * 100).toFixed(1) : 0;
+ const successRate = executed > 0 ? ((testsPassed / executed) * 100).toFixed(1) : 100;
log(`\nSuccess Rate: ${successRate}%\n`, successRate >= 90 ? colors.green : colors.yellow);
// Exit with appropriate code
diff --git a/test-url-validation.js b/test-url-validation.js
new file mode 100644
index 00000000..9fac1f6a
--- /dev/null
+++ b/test-url-validation.js
@@ -0,0 +1,140 @@
+#!/usr/bin/env node
+
+/**
+ * Focused tests for frontend URL validation logic in public/index.html.
+ *
+ * This test loads and evaluates the parser constants/function directly from
+ * the page script so behavior stays aligned with production code.
+ */
+
+const fs = require('fs');
+const path = require('path');
+const vm = require('vm');
+
+const indexPath = path.join(__dirname, 'public', 'index.html');
+const html = fs.readFileSync(indexPath, 'utf8');
+
+function extractBlock(source, startToken, endToken) {
+ const start = source.indexOf(startToken);
+ const end = source.indexOf(endToken, start);
+ if (start === -1 || end === -1) {
+ throw new Error(`Could not extract block between ${startToken} and ${endToken}`);
+ }
+ return source.slice(start, end);
+}
+
+const parserBlock = extractBlock(
+ html,
+ 'const PR_PATH_RE = /^\\/([^/]+)\\/([^/]+)\\/pull\\/(\\d+)\\/?$/i;',
+ 'function showSectionMessage('
+);
+
+const sandbox = {
+ URL,
+ Set,
+ result: null,
+};
+
+vm.createContext(sandbox);
+vm.runInContext(`${parserBlock}\nresult = parseGitHubTrackingUrl;`, sandbox);
+
+const parseGitHubTrackingUrl = sandbox.result;
+if (typeof parseGitHubTrackingUrl !== 'function') {
+ throw new Error('parseGitHubTrackingUrl function not found in page script');
+}
+
+let passed = 0;
+let failed = 0;
+
+function check(name, fn) {
+ try {
+ fn();
+ console.log(`PASS: ${name}`);
+ passed += 1;
+ } catch (err) {
+ console.error(`FAIL: ${name}`);
+ console.error(` ${err.message}`);
+ failed += 1;
+ }
+}
+
+function expectValid(url, expectedType, expectedCanonical) {
+ const out = parseGitHubTrackingUrl(url);
+ if (!out.valid) {
+ throw new Error(`Expected valid URL but got invalid (${out.reason})`);
+ }
+ if (out.type !== expectedType) {
+ throw new Error(`Expected type ${expectedType}, got ${out.type}`);
+ }
+ if (out.canonicalUrl !== expectedCanonical) {
+ throw new Error(`Expected canonical ${expectedCanonical}, got ${out.canonicalUrl}`);
+ }
+}
+
+function expectInvalid(url) {
+ const out = parseGitHubTrackingUrl(url);
+ if (out.valid) {
+ throw new Error(`Expected invalid URL but got valid (${out.canonicalUrl})`);
+ }
+}
+
+check('Accepts PR URL with query suffix', () => {
+ expectValid(
+ 'https://github.com/OWASP-BLT/BLT-Leaf/pull/262?diff=split',
+ 'pr',
+ 'https://github.com/OWASP-BLT/BLT-Leaf/pull/262'
+ );
+});
+
+check('Accepts PR URL with fragment suffix', () => {
+ expectValid(
+ 'https://github.com/OWASP-BLT/BLT-Leaf/pull/262#discussion_r1',
+ 'pr',
+ 'https://github.com/OWASP-BLT/BLT-Leaf/pull/262'
+ );
+});
+
+check('Accepts repository URL for bulk flow', () => {
+ expectValid(
+ 'https://github.com/OWASP-BLT/BLT-Leaf',
+ 'repo',
+ 'https://github.com/OWASP-BLT/BLT-Leaf'
+ );
+});
+
+check('Accepts organization URL', () => {
+ expectValid(
+ 'https://github.com/OWASP-BLT',
+ 'org',
+ 'https://github.com/OWASP-BLT'
+ );
+});
+
+check('Accepts /orgs/owner URL', () => {
+ expectValid(
+ 'https://github.com/orgs/OWASP-BLT',
+ 'org',
+ 'https://github.com/OWASP-BLT'
+ );
+});
+
+check('Rejects non-GitHub host', () => {
+ expectInvalid('https://example.com/OWASP-BLT/BLT-Leaf/pull/262');
+});
+
+check('Rejects GitHub issue URL', () => {
+ expectInvalid('https://github.com/OWASP-BLT/BLT-Leaf/issues/1');
+});
+
+check('Rejects GitHub commit URL', () => {
+ expectInvalid('https://github.com/OWASP-BLT/BLT-Leaf/commit/abcdef');
+});
+
+check('Rejects reserved owner route', () => {
+ expectInvalid('https://github.com/settings');
+});
+
+console.log(`\nSummary: ${passed} passed, ${failed} failed`);
+if (failed > 0) {
+ process.exit(1);
+}