diff --git a/migrations/0004_add_canonical_url_and_composite_uniqueness.sql b/migrations/0004_add_canonical_url_and_composite_uniqueness.sql new file mode 100644 index 00000000..a0708688 --- /dev/null +++ b/migrations/0004_add_canonical_url_and_composite_uniqueness.sql @@ -0,0 +1,34 @@ +-- Migration: Add canonical_url metadata and composite uniqueness for PR identity +-- Created: 2026-03-25 +-- Description: +-- 1) Adds canonical_url column for normalized PR URL storage +-- 2) Backfills canonical_url for existing rows +-- 3) Removes duplicate rows by (repo_owner, repo_name, pr_number), keeping latest id +-- 4) Enforces uniqueness on (repo_owner, repo_name, pr_number) + +ALTER TABLE prs ADD COLUMN canonical_url TEXT; + +UPDATE prs +SET canonical_url = 'https://github.com/' || repo_owner || '/' || repo_name || '/pull/' || pr_number +WHERE canonical_url IS NULL OR canonical_url = ''; + +WITH ranked_prs AS ( + SELECT + id, + ROW_NUMBER() OVER ( + PARTITION BY LOWER(repo_owner), LOWER(repo_name), pr_number + ORDER BY readiness_computed_at DESC NULLS LAST, + updated_at DESC NULLS LAST, + id DESC + ) AS row_num + FROM prs +) +DELETE FROM prs +WHERE id IN ( + SELECT id + FROM ranked_prs + WHERE row_num > 1 +); + +CREATE UNIQUE INDEX IF NOT EXISTS idx_prs_identity_unique +ON prs(repo_owner COLLATE NOCASE, repo_name COLLATE NOCASE, pr_number); diff --git a/package.json b/package.json index cb87ef1a..011705e4 100644 --- a/package.json +++ b/package.json @@ -11,8 +11,9 @@ "db:migrations:list": "wrangler d1 migrations list pr_tracker", "lint": "echo 'Linting not configured yet' && exit 0", "format:check": "echo 'Format checking not configured yet' && exit 0", - "test": "node test-data-display.js", - "test:data-display": "node test-data-display.js" + "test": "node test-data-display.js && node test-url-validation.js", + "test:data-display": "node test-data-display.js", + "test:url-validation": "node test-url-validation.js" }, "keywords": [ "github", diff --git a/public/index.html b/public/index.html index ff0b2152..d7c8a977 100644 --- a/public/index.html +++ b/public/index.html @@ -568,6 +568,7 @@

Error< }, FEEDBACK_DISMISS_DELAY); } + const PR_PATH_RE = /^\/([^/]+)\/([^/]+)\/pull\/(\d+)\/?$/i; + const REPO_PATH_RE = /^\/([^/]+)\/([^/]+)\/?$/i; + const ORGS_PATH_RE = /^\/orgs\/([^/]+)\/?$/i; + const ORG_PATH_RE = /^\/([^/]+)\/?$/i; + const RESERVED_OWNER_NAMES = new Set([ + 'about', 'codespaces', 'collections', 'discussions', 'enterprise', + 'explore', 'features', 'issues', 'login', 'marketplace', 'new', + 'notifications', 'organizations', 'orgs', 'pricing', 'pulls', + 'security', 'settings', 'signup', 'sponsors', 'topics', 'trending' + ]); + + function setUrlError(message) { + const el = document.getElementById('url-error'); + const input = document.getElementById('prUrlInput'); + if (!el) return; + if (message) { + el.textContent = message; + el.classList.remove('hidden'); + if (input) { + input.setAttribute('aria-invalid', 'true'); + input.setAttribute('aria-describedby', 'url-error'); + } + } else { + el.textContent = ''; + el.classList.add('hidden'); + if (input) { + input.removeAttribute('aria-invalid'); + input.removeAttribute('aria-describedby'); + } + } + } + + function normalizeRepoKey(owner, repo) { + return `${String(owner || '').toLowerCase()}/${String(repo || '').toLowerCase()}`; + } + + function findRepoByIdentity(owner, repo) { + const targetKey = normalizeRepoKey(owner, repo); + return repos.find(r => normalizeRepoKey(r.repo_owner, r.repo_name) === targetKey) || null; + } + + function findPrByIdentity(owner, repo, prNumber) { + const targetKey = normalizeRepoKey(owner, repo); + const targetPrNumber = String(prNumber || ''); + return allPrs.find(pr => { + const prKey = normalizeRepoKey(pr.repo_owner, pr.repo_name); + if (prKey !== targetKey) { + return false; + } + if (!targetPrNumber) { + return true; + } + return String(pr.pr_number || '') === targetPrNumber; + }) || null; + } + + function parseGitHubTrackingUrl(rawUrl) { + const value = typeof rawUrl === 'string' ? rawUrl.trim() : ''; + if (!value) { + return { valid: false, reason: 'Please enter a PR, repository, or organization URL' }; + } + if (value.length > 2048) { + return { valid: false, reason: 'URL is too long' }; + } + + let url; + try { + url = new URL(value); + } catch { + return { valid: false, reason: 'Please enter a valid URL' }; + } + + const protocol = url.protocol.toLowerCase(); + if (protocol !== 'http:' && protocol !== 'https:') { + return { valid: false, reason: 'URL must start with http:// or https://' }; + } + + const hostname = url.hostname.toLowerCase(); + if (hostname !== 'github.com' && hostname !== 'www.github.com') { + return { valid: false, reason: 'Only github.com URLs are allowed' }; + } + + const cleanPath = url.pathname.replace(/\/+$/, '') || '/'; + let match = cleanPath.match(PR_PATH_RE); + if (match) { + const owner = match[1]; + const repo = match[2]; + const prNumber = parseInt(match[3], 10); + return { + valid: true, + type: 'pr', + owner, + repo, + prNumber, + canonicalUrl: `https://github.com/${owner}/${repo}/pull/${prNumber}` + }; + } + + match = cleanPath.match(REPO_PATH_RE); + if (match) { + const owner = match[1]; + const repo = match[2]; + if (!RESERVED_OWNER_NAMES.has(owner.toLowerCase()) && repo && repo.trim()) { + return { + valid: true, + type: 'repo', + owner, + repo, + canonicalUrl: `https://github.com/${owner}/${repo}` + }; + } + } + + match = cleanPath.match(ORGS_PATH_RE); + if (match) { + const owner = match[1]; + return { + valid: true, + type: 'org', + owner, + canonicalUrl: `https://github.com/${owner}` + }; + } + + match = cleanPath.match(ORG_PATH_RE); + if (match) { + const owner = match[1]; + if (!RESERVED_OWNER_NAMES.has(owner.toLowerCase())) { + return { + valid: true, + type: 'org', + owner, + canonicalUrl: `https://github.com/${owner}` + }; + } + } + + return { + valid: false, + reason: 'Use a GitHub PR, repository, or organization URL' + }; + } + function showSectionMessage(message, type) { // Show an inline feedback message in the Add PR section on the page const section = document.getElementById('prUrlInput')?.closest('section'); @@ -3164,19 +3308,32 @@

${emptyTitl const checkbox = document.getElementById('addAllPrsCheckbox'); const prUrl = input.value.trim(); const addAll = checkbox.checked; + const parsedUrl = parseGitHubTrackingUrl(prUrl); - if (!prUrl) { - showInputError('prUrlInput', 'Please enter a PR URL'); + if (!parsedUrl.valid) { + setUrlError(parsedUrl.reason); + input.focus(); return; } - // Auto-detect if URL is an org or repo URL (not a PR URL) - const isOrgOrRepoUrl = prUrl.match(/^https?:\/\/github\.com\/[^/]+\/?$/) || - (prUrl.match(/^https?:\/\/github\.com\/[^/]+\/[^/]+\/?$/) && !prUrl.includes('/pull/')); - const effectiveAddAll = addAll || !!isOrgOrRepoUrl; + // Enforce compatibility between import mode (single vs bulk) and URL type + if (!addAll && parsedUrl.type !== 'pr') { + setUrlError('Use a pull request URL when bulk import is not enabled'); + input.focus(); + return; + } + if (addAll && parsedUrl.type === 'pr') { + setUrlError('For bulk import, enter a repository or organization URL'); + input.focus(); + return; + } + + setUrlError(''); + const canonicalInputUrl = parsedUrl.canonicalUrl; + const isBulkImport = addAll || parsedUrl.type !== 'pr'; btn.disabled = true; - btn.textContent = effectiveAddAll ? 'Importing...' : 'Adding...'; + btn.textContent = isBulkImport ? 'Importing...' : 'Adding...'; try { // If importing all, handle logic to send repo_url appropriately @@ -3187,16 +3344,18 @@

${emptyTitl 'Content-Type': 'application/json' }, body: JSON.stringify({ - pr_url: prUrl, - add_all: addAll + pr_url: canonicalInputUrl, + add_all: isBulkImport }) }); const data = await response.json(); if (data.error) { - showInputError('prUrlInput', data.error); + setUrlError(data.error); + input.focus(); } else { input.value = ''; + setUrlError(''); // Show success message for bulk imports if (data.imported_count !== undefined) { if (data.truncated) { @@ -3207,27 +3366,35 @@

${emptyTitl } invalidateApiCache(); // For single PR additions, select the org/repo and highlight the added PR - const prUrlPattern = /^https?:\/\/github\.com\/([a-zA-Z0-9._-]+)\/([a-zA-Z0-9._-]+)\/pull\/\d+/; - const prMatch = !effectiveAddAll && prUrl.match(prUrlPattern); - if (prMatch) { - const prOwner = prMatch[1]; - const prRepoKey = `${prMatch[1]}/${prMatch[2]}`; - // Update org filter to highlight the PR's owner/org - currentOrg = prOwner; + if (!isBulkImport && parsedUrl.type === 'pr') { + const targetOwner = parsedUrl.owner; + const targetRepo = parsedUrl.repo; + const targetPrNumber = parsedUrl.prNumber; + + // Reset to owner scope first, then resolve exact casing/key from loaded repo list. + currentOrg = targetOwner; + currentPage = 1; + await loadRepos(true); + + const matchedRepo = findRepoByIdentity(targetOwner, targetRepo); + const resolvedOwner = matchedRepo ? matchedRepo.repo_owner : targetOwner; + const resolvedRepo = matchedRepo ? matchedRepo.repo_name : targetRepo; + + currentOrg = resolvedOwner; localStorage.setItem('currentOrg', currentOrg); const orgFilterInput = document.getElementById('orgFilterInput'); const orgFilterClearBtn = document.getElementById('orgFilterClearBtn'); - if (orgFilterInput) orgFilterInput.value = prOwner; + if (orgFilterInput) orgFilterInput.value = currentOrg; if (orgFilterClearBtn) orgFilterClearBtn.classList.remove('hidden'); - // Select the specific repo in the sidebar - currentRepo = prRepoKey; + + currentRepo = `${resolvedOwner}/${resolvedRepo}`; localStorage.setItem('currentRepo', currentRepo); - currentPage = 1; - await loadRepos(true); + await loadPrs(true); await loadRateLimit(true); + // Highlight the newly added PR row - const added = allPrs.find(pr => pr.pr_url === prUrl); + const added = findPrByIdentity(targetOwner, targetRepo, targetPrNumber); if (added) highlightPrRow(added.id); } else { // Force bypass service worker cache after import @@ -3238,7 +3405,8 @@

${emptyTitl } } catch (error) { console.error('Error adding PR:', error); - showInputError('prUrlInput', 'Failed to add PR'); + setUrlError('Failed to add PR'); + input.focus(); } finally { btn.disabled = false; btn.textContent = 'Add PR'; @@ -3339,6 +3507,9 @@

${emptyTitl }); document.getElementById('addPrBtn').addEventListener('click', addPr); + document.getElementById('prUrlInput').addEventListener('input', () => { + setUrlError(''); + }); document.getElementById('prUrlInput').addEventListener('keypress', (e) => { if (e.key === 'Enter') addPr(); }); @@ -3649,42 +3820,50 @@

${emptyTitl if (ownerValid && repoValid && prValid) { // New format: construct the full PR URL from parts prUrl = `https://github.com/${ownerParam}/${repoParam}/pull/${prParam}`; - repoKey = `${ownerParam}/${repoParam}`; + repoKey = normalizeRepoKey(ownerParam, repoParam); } else if (prParam) { // Old format: only accept prParam if it looks like a GitHub PR URL const legacyPrUrlPattern = /^https?:\/\/github\.com\/([a-zA-Z0-9._-]+)\/([a-zA-Z0-9._-]+)\/pull\/\d+$/; const match = prParam.match(legacyPrUrlPattern); if (match) { prUrl = prParam; - repoKey = `${match[1]}/${match[2]}`; + repoKey = normalizeRepoKey(match[1], match[2]); } } if (!prUrl) return; + const parsedParamUrl = parseGitHubTrackingUrl(prUrl); + if (!parsedParamUrl.valid || parsedParamUrl.type !== 'pr') return; + const canonicalInputUrl = parsedParamUrl.canonicalUrl; + // Pre-fill the input with the PR URL const input = document.getElementById('prUrlInput'); - if (input) input.value = prUrl; + if (input) input.value = canonicalInputUrl; // Select the correct repo in the sidebar if needed - if (repoKey && repoKey !== currentRepo) { - currentRepo = repoKey; + const matchedRepo = findRepoByIdentity(parsedParamUrl.owner, parsedParamUrl.repo); + const resolvedOwner = matchedRepo ? matchedRepo.repo_owner : parsedParamUrl.owner; + const resolvedRepo = matchedRepo ? matchedRepo.repo_name : parsedParamUrl.repo; + const resolvedRepoKey = `${resolvedOwner}/${resolvedRepo}`; + if (repoKey && normalizeRepoKey(...currentRepo.split('/')) !== normalizeRepoKey(resolvedOwner, resolvedRepo)) { + currentRepo = resolvedRepoKey; localStorage.setItem('currentRepo', currentRepo); updateMobileRepoDisplay(); document.querySelectorAll('.repo-item').forEach(item => { - setRepoItemActiveState(item, item.dataset.repo === currentRepo); + setRepoItemActiveState(item, normalizeRepoKey(...item.dataset.repo.split('/')) === normalizeRepoKey(resolvedOwner, resolvedRepo)); }); await loadPrs(); } // Check if the PR is already tracked - const existing = allPrs.find(pr => pr.pr_url === prUrl); + const existing = findPrByIdentity(parsedParamUrl.owner, parsedParamUrl.repo, parsedParamUrl.prNumber); if (existing) { highlightPrRow(existing.id); } else { // Auto-add the PR, then highlight it await addPr(); - const added = allPrs.find(pr => pr.pr_url === prUrl); + const added = findPrByIdentity(parsedParamUrl.owner, parsedParamUrl.repo, parsedParamUrl.prNumber); if (added) highlightPrRow(added.id); } } diff --git a/src/database.py b/src/database.py index ad946a5f..b5a0f4ed 100644 --- a/src/database.py +++ b/src/database.py @@ -261,14 +261,17 @@ async def delete_readiness_from_db(env, pr_id): async def upsert_pr(db, pr_url, owner, repo, pr_number, pr_data): """Helper to insert or update PR in database (Deduplicates logic)""" current_timestamp = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + canonical_url = f'https://github.com/{owner}/{repo}/pull/{pr_number}' stmt = db.prepare(''' - INSERT INTO prs (pr_url, repo_owner, repo_name, pr_number, title, state, + INSERT INTO prs (pr_url, canonical_url, repo_owner, repo_name, pr_number, title, state, is_merged, mergeable_state, files_changed, author_login, author_avatar, repo_owner_avatar, checks_passed, checks_failed, checks_skipped, commits_count, behind_by, review_status, last_updated_at, last_refreshed_at, updated_at, is_draft, open_conversations_count, reviewers_json, etag) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - ON CONFLICT(pr_url) DO UPDATE SET + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(repo_owner, repo_name, pr_number) DO UPDATE SET + pr_url = excluded.pr_url, + canonical_url = excluded.canonical_url, title = excluded.title, state = excluded.state, is_merged = excluded.is_merged, @@ -289,7 +292,7 @@ async def upsert_pr(db, pr_url, owner, repo, pr_number, pr_data): reviewers_json = excluded.reviewers_json, etag = excluded.etag ''').bind( - pr_url, owner, repo, pr_number, + pr_url, canonical_url, owner, repo, pr_number, pr_data.get('title') or '', pr_data.get('state') or '', 1 if pr_data.get('is_merged') else 0, diff --git a/src/handlers.py b/src/handlers.py index 372bfadb..92e55e5d 100644 --- a/src/handlers.py +++ b/src/handlers.py @@ -85,24 +85,32 @@ async def handle_add_pr(request, env): db = get_db(env) + normalized_pr_url = pr_url.strip() + # Auto-detect URL type: if it's an org or repo URL (not a PR URL), # automatically treat as bulk import regardless of checkbox state if not add_all: - # Check if the URL is an org URL or a repo URL (not a PR URL) - if parse_org_url(pr_url) or (parse_repo_url(pr_url) and '/pull/' not in pr_url): + is_pr_url = False + try: + parse_pr_url(normalized_pr_url) + is_pr_url = True + except ValueError: + is_pr_url = False + + if not is_pr_url and (parse_org_url(normalized_pr_url) or parse_repo_url(normalized_pr_url)): add_all = True if add_all: org_owner = '' # Try parsing as a repo URL first (e.g. https://github.com/owner/repo) - parsed = parse_repo_url(pr_url) + parsed = parse_repo_url(normalized_pr_url) if parsed: # Single repo import repos_to_import = [{'owner': parsed['owner'], 'name': parsed['repo']}] is_org_import = False else: # Parse org url - org_parsed = parse_org_url(pr_url) + org_parsed = parse_org_url(normalized_pr_url) if not org_parsed: return Response.new(json.dumps({'error': 'Invalid GitHub Repository or Organization URL'}), {'status': 400, 'headers': {'Content-Type': 'application/json'}}) @@ -211,7 +219,8 @@ async def handle_add_pr(request, env): 'reviewers_json': '[]' } - await upsert_pr(db, item['html_url'], owner, repo, item['number'], pr_data) + canonical_pr_url = f"https://github.com/{owner}/{repo}/pull/{item['number']}" + await upsert_pr(db, canonical_pr_url, owner, repo, item['number'], pr_data) added_count += 1 # Build response message @@ -236,7 +245,7 @@ async def handle_add_pr(request, env): # Add single pr # Catch ValueError from parse_pr_url try: - parsed = parse_pr_url(pr_url) + parsed = parse_pr_url(normalized_pr_url) except ValueError as e: return Response.new( json.dumps({'error': str(e)}), @@ -259,19 +268,20 @@ async def handle_add_pr(request, env): return Response.new(json.dumps({'error': 'Cannot add merged/closed PRs'}), {'status': 400, 'headers': {'Content-Type': 'application/json'}}) - await upsert_pr(db, pr_url, parsed['owner'], parsed['repo'], parsed['pr_number'], pr_data) + canonical_pr_url = parsed['canonical_url'] + await upsert_pr(db, canonical_pr_url, parsed['owner'], parsed['repo'], parsed['pr_number'], pr_data) # Auto-run readiness analysis for the newly added PR readiness_data = None try: pr_result = await db.prepare( - 'SELECT * FROM prs WHERE pr_url = ?' - ).bind(pr_url).first() + 'SELECT * FROM prs WHERE repo_owner = ? COLLATE NOCASE AND repo_name = ? COLLATE NOCASE AND pr_number = ?' + ).bind(parsed['owner'], parsed['repo'], parsed['pr_number']).first() if pr_result: pr_row = pr_result.to_py() readiness_data = await _run_readiness_analysis(env, pr_row, pr_row['id'], user_token) except Exception as analysis_err: - print(f"Auto-analysis failed for PR {pr_url}: {type(analysis_err).__name__}: {str(analysis_err)}") + print(f"Auto-analysis failed for PR {canonical_pr_url}: {type(analysis_err).__name__}: {str(analysis_err)}") # Include repo_owner, repo_name, pr_number, and pr_url in the response for frontend display response_data = { @@ -279,7 +289,7 @@ async def handle_add_pr(request, env): 'repo_owner': parsed['owner'], 'repo_name': parsed['repo'], 'pr_number': parsed['pr_number'], - 'pr_url': pr_url + 'pr_url': canonical_pr_url } result_obj = {'success': True, 'data': response_data} @@ -1110,8 +1120,8 @@ async def handle_github_webhook(request, env): db = get_db(env) pr_url = f"https://github.com/{repo_owner}/{repo_name}/pull/{pr_number}" result = await db.prepare( - 'SELECT id FROM prs WHERE pr_url = ?' - ).bind(pr_url).first() + 'SELECT id FROM prs WHERE repo_owner = ? COLLATE NOCASE AND repo_name = ? COLLATE NOCASE AND pr_number = ?' + ).bind(repo_owner, repo_name, pr_number).first() # Handle opened PRs - add to tracking automatically if action == 'opened': @@ -1136,8 +1146,8 @@ async def handle_github_webhook(request, env): # Get the newly created PR ID new_result = await db.prepare( - 'SELECT id FROM prs WHERE pr_url = ?' - ).bind(pr_url).first() + 'SELECT id FROM prs WHERE repo_owner = ? COLLATE NOCASE AND repo_name = ? COLLATE NOCASE AND pr_number = ?' + ).bind(repo_owner, repo_name, pr_number).first() new_pr_id = new_result.to_py()['id'] if new_result else None return Response.new( @@ -1295,8 +1305,8 @@ async def handle_github_webhook(request, env): for pr_number, repo_owner, repo_name in prs_to_update: pr_url = f"https://github.com/{repo_owner}/{repo_name}/pull/{pr_number}" result = await db.prepare( - 'SELECT id FROM prs WHERE pr_url = ?' - ).bind(pr_url).first() + 'SELECT id FROM prs WHERE repo_owner = ? COLLATE NOCASE AND repo_name = ? COLLATE NOCASE AND pr_number = ?' + ).bind(repo_owner, repo_name, pr_number).first() if not result: # PR not being tracked - skip it diff --git a/src/utils.py b/src/utils.py index 92957473..c4b25c7b 100644 --- a/src/utils.py +++ b/src/utils.py @@ -11,50 +11,73 @@ # Reduces overall readiness score by 33% when mergeable state is 'dirty' (conflicts) _MERGE_CONFLICTS_SCORE_MULTIPLIER = 0.67 +_GITHUB_PR_RE = re.compile( + r'^https?://github\.com/([^/?#\s]+)/([^/?#\s]+)/pull/(\d+)(?:[/?#].*)?$', + re.IGNORECASE, +) +_GITHUB_REPO_RE = re.compile( + r'^https?://github\.com/(?!orgs(?:/|$))([^/?#\s]+)/([^/?#\s]+)/?(?:[?#].*)?$', + re.IGNORECASE, +) +_GITHUB_ORG_RE = re.compile( + r'^https?://github\.com/(?:orgs/)?([A-Za-z0-9_.-]+)/?(?:[?#].*)?$', + re.IGNORECASE, +) -def parse_pr_url(pr_url): + +def parse_pr_url(pr_url: str): """ - Parse GitHub PR URL to extract owner, repo, and PR number. - - Security Hardening (Issue #45): - - Type validation to prevent type confusion attacks - - Anchored regex pattern to block malformed URLs with trailing junk - - Raises ValueError instead of returning None for better error handling + Validates and parses a GitHub PR URL. + Returns a dict with owner, repo, and pr_number or raises ValueError. """ - # FIX Issue #45: Type validation if not isinstance(pr_url, str): raise ValueError("PR URL must be a string") if not pr_url: raise ValueError("PR URL is required") - pr_url = pr_url.strip().rstrip('/') - - # FIX Issue #45: Anchored regex - must match EXACTLY, no trailing junk allowed - pattern = r'^https?://github\.com/([^/]+)/([^/]+)/pull/(\d+)$' - match = re.match(pattern, pr_url) - + normalized_url = pr_url.strip() + match = _GITHUB_PR_RE.match(normalized_url) + if not match: - # FIX Issue #45: Raise error instead of returning None - raise ValueError("Invalid GitHub PR URL. Format: https://github.com/OWNER/REPO/pull/NUMBER") - + raise ValueError( + "Invalid GitHub PR URL. Expected format: " + "https://github.com/owner/repo/pull/123" + ) + + owner, repo, pr_num_str = match.group(1), match.group(2), match.group(3) + pr_number = int(pr_num_str) + + if pr_number < 1: + raise ValueError("PR number must be a positive integer.") + + # Return as a dictionary to match the original function's signature return { - 'owner': match.group(1), - 'repo': match.group(2), - 'pr_number': int(match.group(3)) + 'owner': owner, + 'repo': repo, + 'pr_number': pr_number, + 'canonical_url': f"https://github.com/{owner}/{repo}/pull/{pr_number}" } def parse_repo_url(url): """Parse GitHub Repo URL to extract owner and repo name""" - if not url: return None - url = url.strip().rstrip('/') - pattern = r'https?://github\.com/([^/]+)/([^/]+)(?:/.*)?$' - match = re.match(pattern, url) + if not url: + return None + normalized_url = url.strip() + match = _GITHUB_REPO_RE.match(normalized_url) if match: + owner = match.group(1) + # Security guard: don't misroute /orgs/ bulk URLs to parse_repo_url() + # GitHub uses /orgs/ for organization homepages/dashboards. + if owner.lower() == 'orgs': + return None + + repo = match.group(2) return { - 'owner': match.group(1), - 'repo': match.group(2) + 'owner': owner, + 'repo': repo, + 'canonical_url': f"https://github.com/{owner}/{repo}" } return None @@ -65,21 +88,22 @@ def parse_org_url(url): """ if not url: return None - url = url.strip().rstrip('/') - # Match org/user URL: github.com/ with no further path segments - pattern = r'^https?://github\.com/([A-Za-z0-9_.-]+)$' - match = re.match(pattern, url) + normalized_url = url.strip() + match = _GITHUB_ORG_RE.match(normalized_url) if match: owner = match.group(1) # Exclude GitHub reserved paths that aren't orgs/users reserved = {'settings', 'organizations', 'explore', 'marketplace', 'notifications', 'new', 'login', 'signup', 'features', 'enterprise', 'pricing', 'topics', 'collections', - 'trending', 'sponsors', 'about', 'security', 'pulls', + 'trending', 'sponsors', 'about', 'security', 'pulls', 'orgs', 'issues', 'codespaces', 'discussions'} if owner.lower() in reserved: return None - return {'owner': owner} + return { + 'owner': owner, + 'canonical_url': f"https://github.com/{owner}" + } return None diff --git a/test-data-display.js b/test-data-display.js index 96bce05b..c5c21e8e 100755 --- a/test-data-display.js +++ b/test-data-display.js @@ -7,7 +7,7 @@ const fs = require('fs'); const path = require('path'); -const { spawn } = require('child_process'); +const { spawn, spawnSync } = require('child_process'); // ANSI color codes for output const colors = { @@ -20,16 +20,26 @@ const colors = { let testsPassed = 0; let testsFailed = 0; +let testsSkipped = 0; function log(message, color = colors.reset) { console.log(`${color}${message}${colors.reset}`); } -function testResult(testName, passed, message = '') { - if (passed) { +function testResult(testName, statusOrPassed, message = '') { + const isExplicitStatus = statusOrPassed === 'passed' || statusOrPassed === 'failed' || statusOrPassed === 'skipped'; + const status = isExplicitStatus + ? statusOrPassed + : (statusOrPassed ? 'passed' : 'failed'); + + if (status === 'passed') { testsPassed++; log(`✓ ${testName}`, colors.green); if (message) log(` ${message}`, colors.blue); + } else if (status === 'skipped') { + testsSkipped++; + log(`- ${testName} (skipped)`, colors.yellow); + if (message) log(` ${message}`, colors.yellow); } else { testsFailed++; log(`✗ ${testName}`, colors.red); @@ -41,6 +51,27 @@ function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } +async function waitForChildExit(child, timeoutMs = 800) { + if (!child) return true; + if (child.exitCode !== null) return true; + + return await new Promise((resolve) => { + let settled = false; + const finish = (exited) => { + if (settled) return; + settled = true; + clearTimeout(timer); + child.removeListener('exit', onExit); + child.removeListener('close', onExit); + resolve(exited); + }; + const onExit = () => finish(true); + const timer = setTimeout(() => finish(child.exitCode !== null), timeoutMs); + child.once('exit', onExit); + child.once('close', onExit); + }); +} + function getSetCookieHeader(response) { if (response?.headers?.getSetCookie) { return response.headers.getSetCookie().join('\n'); @@ -64,17 +95,79 @@ async function waitForEndpoint(url, timeoutMs = 25000) { return false; } +async function terminateChildProcessTree(child) { + if (!child) return; + + if (process.platform === 'win32' && child.pid) { + // Ensure cmd.exe wrapper and all descendant processes (wrangler, etc.) are terminated. + const result = spawnSync('taskkill', ['/PID', String(child.pid), '/T', '/F'], { + windowsHide: true, + stdio: 'ignore', + }); + + if (result.status !== 0) { + try { + child.kill('SIGTERM'); + } catch (_) { + // Ignore cleanup failures while shutting down test runtime processes. + } + } + return; + } + + try { + child.kill('SIGTERM'); + } catch (_) { + return; + } + + const exited = await waitForChildExit(child, 800); + if (!exited && child.exitCode === null) { + try { + child.kill('SIGKILL'); + } catch (_) { + // Ignore cleanup failures while shutting down test runtime processes. + } + } +} + async function startRuntimeServer() { const port = 8788; const baseUrl = `http://127.0.0.1:${port}`; - const cmd = process.platform === 'win32' ? 'npx.cmd' : 'npx'; - const args = ['wrangler', 'dev', '--local', '--ip', '127.0.0.1', '--port', String(port), '--log-level', 'error']; + const wranglerArgs = ['wrangler', 'dev', '--local', '--ip', '127.0.0.1', '--port', String(port), '--log-level', 'error']; - const child = spawn(cmd, args, { - cwd: __dirname, - env: { ...process.env, BROWSER: 'none' }, - stdio: ['ignore', 'pipe', 'pipe'], - }); + if (process.platform === 'win32') { + const bashCheck = spawnSync('cmd.exe', ['/d', '/s', '/c', 'where bash'], { + stdio: 'ignore', + windowsHide: true, + }); + if (bashCheck.status !== 0) { + throw new Error('SKIP_RUNTIME_AUTH_TEST: bash not found (required by wrangler [build] command).'); + } + } + + let child; + if (process.platform === 'win32') { + // Spawning npx.cmd directly can fail with EINVAL on some Windows/Node setups. + const quoteForCmd = (arg) => { + const value = String(arg); + const escaped = value.replace(/"/g, '""'); + return /[\s"&|<>^()%!]/.test(value) ? `"${escaped}"` : escaped; + }; + const cmdLine = ['npx', ...wranglerArgs].map(quoteForCmd).join(' '); + child = spawn('cmd.exe', ['/d', '/s', '/c', cmdLine], { + cwd: __dirname, + env: { ...process.env, BROWSER: 'none' }, + stdio: ['ignore', 'pipe', 'pipe'], + windowsHide: true, + }); + } else { + child = spawn('npx', wranglerArgs, { + cwd: __dirname, + env: { ...process.env, BROWSER: 'none' }, + stdio: ['ignore', 'pipe', 'pipe'], + }); + } let output = ''; child.stdout.on('data', (chunk) => { @@ -86,7 +179,7 @@ async function startRuntimeServer() { const ready = await waitForEndpoint(`${baseUrl}/api/auth/user`); if (!ready) { - child.kill('SIGTERM'); + await terminateChildProcessTree(child); throw new Error(`wrangler dev did not become ready within timeout. Output:\n${output}`); } @@ -94,12 +187,8 @@ async function startRuntimeServer() { } async function stopRuntimeServer(runtime) { - if (!runtime || !runtime.child || runtime.child.killed) return; - runtime.child.kill('SIGTERM'); - await sleep(800); - if (!runtime.child.killed) { - runtime.child.kill('SIGKILL'); - } + if (!runtime || !runtime.child) return; + await terminateChildProcessTree(runtime.child); } // Test 1: Verify HTML file exists and contains required elements @@ -691,7 +780,11 @@ async function testAuthenticationRuntimeBehavior() { logoutCookies ? 'Found state clear cookie header' : 'Missing Set-Cookie header' ); } catch (error) { - testResult('Authentication runtime behavior test setup', false, error.message); + if (String(error.message || '').includes('SKIP_RUNTIME_AUTH_TEST:')) { + testResult('Authentication runtime behavior test setup', 'skipped', `Skipped: ${error.message}`); + } else { + testResult('Authentication runtime behavior test setup', false, error.message); + } } finally { await stopRuntimeServer(runtime); } @@ -717,12 +810,14 @@ async function runTests() { log(' Test Summary', colors.blue); log('='.repeat(60), colors.blue); - const total = testsPassed + testsFailed; + const total = testsPassed + testsFailed + testsSkipped; + const executed = testsPassed + testsFailed; log(`\nTotal Tests: ${total}`); log(`Passed: ${testsPassed}`, colors.green); + log(`Skipped: ${testsSkipped}`, colors.yellow); log(`Failed: ${testsFailed}`, testsFailed > 0 ? colors.red : colors.green); - const successRate = total > 0 ? ((testsPassed / total) * 100).toFixed(1) : 0; + const successRate = executed > 0 ? ((testsPassed / executed) * 100).toFixed(1) : 100; log(`\nSuccess Rate: ${successRate}%\n`, successRate >= 90 ? colors.green : colors.yellow); // Exit with appropriate code diff --git a/test-url-validation.js b/test-url-validation.js new file mode 100644 index 00000000..9fac1f6a --- /dev/null +++ b/test-url-validation.js @@ -0,0 +1,140 @@ +#!/usr/bin/env node + +/** + * Focused tests for frontend URL validation logic in public/index.html. + * + * This test loads and evaluates the parser constants/function directly from + * the page script so behavior stays aligned with production code. + */ + +const fs = require('fs'); +const path = require('path'); +const vm = require('vm'); + +const indexPath = path.join(__dirname, 'public', 'index.html'); +const html = fs.readFileSync(indexPath, 'utf8'); + +function extractBlock(source, startToken, endToken) { + const start = source.indexOf(startToken); + const end = source.indexOf(endToken, start); + if (start === -1 || end === -1) { + throw new Error(`Could not extract block between ${startToken} and ${endToken}`); + } + return source.slice(start, end); +} + +const parserBlock = extractBlock( + html, + 'const PR_PATH_RE = /^\\/([^/]+)\\/([^/]+)\\/pull\\/(\\d+)\\/?$/i;', + 'function showSectionMessage(' +); + +const sandbox = { + URL, + Set, + result: null, +}; + +vm.createContext(sandbox); +vm.runInContext(`${parserBlock}\nresult = parseGitHubTrackingUrl;`, sandbox); + +const parseGitHubTrackingUrl = sandbox.result; +if (typeof parseGitHubTrackingUrl !== 'function') { + throw new Error('parseGitHubTrackingUrl function not found in page script'); +} + +let passed = 0; +let failed = 0; + +function check(name, fn) { + try { + fn(); + console.log(`PASS: ${name}`); + passed += 1; + } catch (err) { + console.error(`FAIL: ${name}`); + console.error(` ${err.message}`); + failed += 1; + } +} + +function expectValid(url, expectedType, expectedCanonical) { + const out = parseGitHubTrackingUrl(url); + if (!out.valid) { + throw new Error(`Expected valid URL but got invalid (${out.reason})`); + } + if (out.type !== expectedType) { + throw new Error(`Expected type ${expectedType}, got ${out.type}`); + } + if (out.canonicalUrl !== expectedCanonical) { + throw new Error(`Expected canonical ${expectedCanonical}, got ${out.canonicalUrl}`); + } +} + +function expectInvalid(url) { + const out = parseGitHubTrackingUrl(url); + if (out.valid) { + throw new Error(`Expected invalid URL but got valid (${out.canonicalUrl})`); + } +} + +check('Accepts PR URL with query suffix', () => { + expectValid( + 'https://github.com/OWASP-BLT/BLT-Leaf/pull/262?diff=split', + 'pr', + 'https://github.com/OWASP-BLT/BLT-Leaf/pull/262' + ); +}); + +check('Accepts PR URL with fragment suffix', () => { + expectValid( + 'https://github.com/OWASP-BLT/BLT-Leaf/pull/262#discussion_r1', + 'pr', + 'https://github.com/OWASP-BLT/BLT-Leaf/pull/262' + ); +}); + +check('Accepts repository URL for bulk flow', () => { + expectValid( + 'https://github.com/OWASP-BLT/BLT-Leaf', + 'repo', + 'https://github.com/OWASP-BLT/BLT-Leaf' + ); +}); + +check('Accepts organization URL', () => { + expectValid( + 'https://github.com/OWASP-BLT', + 'org', + 'https://github.com/OWASP-BLT' + ); +}); + +check('Accepts /orgs/owner URL', () => { + expectValid( + 'https://github.com/orgs/OWASP-BLT', + 'org', + 'https://github.com/OWASP-BLT' + ); +}); + +check('Rejects non-GitHub host', () => { + expectInvalid('https://example.com/OWASP-BLT/BLT-Leaf/pull/262'); +}); + +check('Rejects GitHub issue URL', () => { + expectInvalid('https://github.com/OWASP-BLT/BLT-Leaf/issues/1'); +}); + +check('Rejects GitHub commit URL', () => { + expectInvalid('https://github.com/OWASP-BLT/BLT-Leaf/commit/abcdef'); +}); + +check('Rejects reserved owner route', () => { + expectInvalid('https://github.com/settings'); +}); + +console.log(`\nSummary: ${passed} passed, ${failed} failed`); +if (failed > 0) { + process.exit(1); +}