diff --git a/.github/workflows/leaderboard.yml b/.github/workflows/leaderboard.yml index 07707232..726a4606 100644 --- a/.github/workflows/leaderboard.yml +++ b/.github/workflows/leaderboard.yml @@ -98,50 +98,85 @@ jobs: source .venv/bin/activate agentready validate-report /tmp/submission.json - - name: Verify repository exists and is public + - name: Detect repository host + id: detect_host env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} REPO_URL: ${{ steps.extract.outputs.repo_url }} run: | - # SAFE: REPO_URL comes from workflow output, not direct user input - ORG_REPO=$(echo "$REPO_URL" | sed 's|git@github.com:||' | sed 's|https://github.com/||' | sed 's|\.git$||') - - IS_PRIVATE=$(gh repo view "$ORG_REPO" --json isPrivate -q '.isPrivate') - - if [ "$IS_PRIVATE" == "true" ]; then - echo "::error::Repository $ORG_REPO is private." + # Determine if this is a GitHub or GitLab repository + if echo "$REPO_URL" | grep -q "github\.com"; then + echo "host=github" >> "$GITHUB_OUTPUT" + elif echo "$REPO_URL" | grep -q "gitlab\.com"; then + echo "host=gitlab" >> "$GITHUB_OUTPUT" + else + echo "::error::Unsupported repository host in URL: $REPO_URL" exit 1 fi - echo "✅ Repository $ORG_REPO is public" + # Convert SSH/HTTPS URL to an HTTPS clone URL + # git@:.git -> https:///.git + CLONE_URL=$(echo "$REPO_URL" | sed -E 's|^git@([^:]+):|https://\1/|' | sed 's|\.git$||') + echo "clone_url=${CLONE_URL}.git" >> "$GITHUB_OUTPUT" + echo "browse_url=$CLONE_URL" >> "$GITHUB_OUTPUT" + + - name: Verify repository exists and is public + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO_URL: ${{ steps.extract.outputs.repo_url }} + HOST: ${{ steps.detect_host.outputs.host }} + CLONE_URL: ${{ steps.detect_host.outputs.clone_url }} + run: | + if [ "$HOST" = "github" ]; then + # GitHub: use gh CLI for verification + ORG_REPO=$(echo "$REPO_URL" | sed 's|git@github.com:||' | sed 's|https://github.com/||' | sed 's|\.git$||') + IS_PRIVATE=$(gh repo view "$ORG_REPO" --json isPrivate -q '.isPrivate') + if [ "$IS_PRIVATE" == "true" ]; then + echo "::error::Repository $ORG_REPO is private." + exit 1 + fi + echo "✅ Repository $ORG_REPO is public" + else + # GitLab/other: verify repo is publicly accessible via git ls-remote + if git ls-remote --exit-code "$CLONE_URL" HEAD > /dev/null 2>&1; then + echo "✅ Repository is publicly accessible: $CLONE_URL" + else + echo "::error::Repository is not publicly accessible: $CLONE_URL" + exit 1 + fi + fi - name: Verify submitter has access env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} REPO_URL: ${{ steps.extract.outputs.repo_url }} SUBMITTER: ${{ github.event.pull_request.user.login }} + HOST: ${{ steps.detect_host.outputs.host }} run: | - # SAFE: All values in environment variables - ORG_REPO=$(echo "$REPO_URL" | sed 's|git@github.com:||' | sed 's|https://github.com/||' | sed 's|\.git$||') - - if gh api "/repos/$ORG_REPO/collaborators/$SUBMITTER" 2>/dev/null; then - echo "✅ $SUBMITTER is a collaborator on $ORG_REPO" - elif [ "$(gh api "/repos/$ORG_REPO" -q '.owner.login')" == "$SUBMITTER" ]; then - echo "✅ $SUBMITTER is the owner of $ORG_REPO" + if [ "$HOST" = "github" ]; then + # GitHub: verify via API + ORG_REPO=$(echo "$REPO_URL" | sed 's|git@github.com:||' | sed 's|https://github.com/||' | sed 's|\.git$||') + if gh api "/repos/$ORG_REPO/collaborators/$SUBMITTER" 2>/dev/null; then + echo "✅ $SUBMITTER is a collaborator on $ORG_REPO" + elif [ "$(gh api "/repos/$ORG_REPO" -q '.owner.login')" == "$SUBMITTER" ]; then + echo "✅ $SUBMITTER is the owner of $ORG_REPO" + else + echo "::error::$SUBMITTER does not have commit access to $ORG_REPO" + exit 1 + fi else - echo "::error::$SUBMITTER does not have commit access to $ORG_REPO" - exit 1 + # Non-GitHub: cannot verify cross-platform access automatically + echo "::warning::Cannot verify submitter access for non-GitHub repos. Manual review required." + echo "⚠️ Submitter access for non-GitHub repos must be verified manually by maintainers." fi - name: Re-run assessment env: - REPO_URL: ${{ steps.extract.outputs.repo_url }} + CLONE_URL: ${{ steps.detect_host.outputs.clone_url }} run: | source .venv/bin/activate - # SAFE: REPO_URL in environment variable - echo "Cloning $REPO_URL..." - git clone "$REPO_URL" /tmp/repo-to-assess + echo "Cloning $CLONE_URL..." + git clone "$CLONE_URL" /tmp/repo-to-assess echo "Running assessment..." agentready assess /tmp/repo-to-assess --output-dir /tmp/validation diff --git a/scripts/generate-leaderboard-data.py b/scripts/generate-leaderboard-data.py index f025a088..f714faca 100755 --- a/scripts/generate-leaderboard-data.py +++ b/scripts/generate-leaderboard-data.py @@ -6,6 +6,7 @@ """ import json +import re import sys from collections import defaultdict from datetime import datetime @@ -13,6 +14,43 @@ from typing import Any +def git_url_to_https(url: str) -> str: + """Convert a git remote URL (SSH or HTTPS) to an HTTPS browse URL. + + Handles GitHub and GitLab SSH/HTTPS formats: + git@github.com:org/repo.git -> https://github.com/org/repo + git@gitlab.com:group/sub/project.git -> https://gitlab.com/group/sub/project + https://github.com/org/repo.git -> https://github.com/org/repo + """ + url = url.strip() + # SSH format: git@:.git + ssh_match = re.match(r"^git@([^:]+):(.+?)(?:\.git)?$", url) + if ssh_match: + host, path = ssh_match.groups() + return f"https://{host}/{path}" + # HTTPS format: strip trailing .git + if url.startswith("https://") or url.startswith("http://"): + return re.sub(r"\.git$", "", url) + return url + + +def repo_display_name_from_url(url: str) -> str | None: + """Extract the full repository path from a git URL for display purposes. + + Returns the path portion without the host, e.g.: + git@gitlab.com:redhat/rhel-ai/wheels/builder.git -> redhat/rhel-ai/wheels/builder + https://github.com/org/repo -> org/repo + """ + url = url.strip() + ssh_match = re.match(r"^git@[^:]+:(.+?)(?:\.git)?$", url) + if ssh_match: + return ssh_match.group(1) + https_match = re.match(r"^https?://[^/]+/(.+?)(?:\.git)?$", url) + if https_match: + return https_match.group(1) + return None + + def scan_submissions(submissions_dir: Path) -> dict[str, list[dict[str, Any]]]: """Scan submissions directory and group assessments by repository. @@ -86,16 +124,29 @@ def generate_leaderboard_data(repos: dict[str, list[dict[str, Any]]]) -> dict[st agentready_version = metadata.get("agentready_version", "unknown") research_version = metadata.get("research_version", "unknown") + # Derive display name and URL from the assessment JSON's + # repository.url when available, falling back to the directory- + # derived repo_name for backwards compatibility with GitHub repos. + raw_url = latest["repository"].get("url", "") + display_name = repo_display_name_from_url(raw_url) if raw_url else None + browse_url = git_url_to_https(raw_url) if raw_url else None + + # Fall back to directory-derived values (GitHub assumption) + if not display_name: + display_name = repo_name + if not browse_url: + browse_url = f"https://github.com/{repo_name}" + entry = { - "repo": repo_name, - "org": repo_name.split("/")[0], - "name": repo_name.split("/")[1], + "repo": display_name, + "org": display_name.split("/")[0], + "name": display_name.rsplit("/", 1)[-1], "score": float(latest["overall_score"]), "tier": latest["certification_level"], "language": latest["repository"].get("primary_language", "Unknown"), "size": latest["repository"].get("size_category", "Unknown"), "last_updated": submissions[0]["timestamp"][:10], # YYYY-MM-DD - "url": f"https://github.com/{repo_name}", + "url": browse_url, "agentready_version": agentready_version, "research_version": research_version, "history": [ diff --git a/src/agentready/cli/submit.py b/src/agentready/cli/submit.py index 0f100220..ab229216 100644 --- a/src/agentready/cli/submit.py +++ b/src/agentready/cli/submit.py @@ -17,8 +17,10 @@ SUBPROCESS_TIMEOUT = 60 # seconds MAX_ASSESSMENT_SIZE = 10 * 1024 * 1024 # 10 MB -# Valid GitHub org/repo name pattern: alphanumeric, hyphens, underscores, dots -GITHUB_NAME_PATTERN = re.compile(r"^[a-zA-Z0-9._-]+$") +# Valid GitHub/GitLab org/repo name pattern: alphanumeric, hyphens, underscores, dots +REPO_NAME_PATTERN = re.compile(r"^[a-zA-Z0-9._-]+$") + +SUPPORTED_HOSTS = ("github.com", "gitlab.com") def find_assessment_file(repository: str, assessment_file: str | None) -> Path: @@ -47,8 +49,39 @@ def load_assessment(assessment_path: Path) -> dict: sys.exit(1) -def extract_repo_info(assessment_data: dict) -> tuple[str, str, float, str]: - """Extract org, repo, score, and tier from assessment data.""" +def _parse_repo_url(repo_url: str) -> tuple[str, str]: + """Parse a GitHub or GitLab repo URL into (host, path). + + Supports SSH (git@host:path.git) and HTTPS (https://host/path) formats. + Returns e.g. ("github.com", "org/repo") or ("gitlab.com", "group/sub/project"). + """ + # SSH format: git@:.git + ssh_match = re.match(r"^git@([^:]+):(.+?)(?:\.git)?$", repo_url) + if ssh_match: + return ssh_match.group(1), ssh_match.group(2) + + # HTTPS format + for host in SUPPORTED_HOSTS: + if host in repo_url: + path = repo_url.split(f"{host}/", 1)[-1].strip("/").removesuffix(".git") + return host, path + + return "", "" + + +def _repo_browse_url(host: str, path: str) -> str: + """Build a browsable HTTPS URL from host and path.""" + return f"https://{host}/{path}" + + +def extract_repo_info(assessment_data: dict) -> tuple[str, str, float, str, str, str]: + """Extract org, repo, score, tier, host, and full_path from assessment data. + + For GitHub repos, org/repo is a two-level split. + For GitLab repos with deep paths (e.g. redhat/rhel-ai/wheels/builder), + org is the top-level group and repo is the project name (last segment). + The full_path is preserved for display and URL purposes. + """ try: repo_url = assessment_data["repository"]["url"] score = assessment_data["overall_score"] @@ -57,31 +90,35 @@ def extract_repo_info(assessment_data: dict) -> tuple[str, str, float, str]: click.echo(f"Error: Invalid assessment JSON (missing {e})", err=True) sys.exit(1) - if not repo_url or "github.com" not in repo_url: + if not repo_url: + click.echo("Error: Assessment JSON has no repository URL", err=True) + sys.exit(1) + + host, full_path = _parse_repo_url(repo_url) + + if host not in SUPPORTED_HOSTS: click.echo( - "Error: Only GitHub repositories are supported for the leaderboard", + "Error: Unsupported repository host. Only GitHub and GitLab are supported.", err=True, ) click.echo(f"Repository URL: {repo_url}", err=True) sys.exit(1) - try: - # Handle SSH format: git@github.com:org/repo.git - if repo_url.startswith("git@github.com:"): - org_repo = repo_url.split("git@github.com:")[1].removesuffix(".git") - # Handle HTTPS format: https://github.com/org/repo.git - else: - org_repo = repo_url.split("github.com/")[1].strip("/").removesuffix(".git") - - org, repo = org_repo.split("/") - except (IndexError, ValueError): - click.echo(f"Error: Could not parse GitHub repository from URL: {repo_url}") + if not full_path: + click.echo( + f"Error: Could not parse repository path from URL: {repo_url}", err=True + ) sys.exit(1) - # Validate org/repo names to prevent injection - if not GITHUB_NAME_PATTERN.match(org) or not GITHUB_NAME_PATTERN.match(repo): + # For directory structure: use top-level group as org, project name as repo + path_parts = full_path.split("/") + org = path_parts[0] + repo = path_parts[-1] + + # Validate org/repo names to prevent path injection + if not REPO_NAME_PATTERN.match(org) or not REPO_NAME_PATTERN.match(repo): click.echo( - f"Error: Invalid GitHub org/repo name: {org}/{repo}", + f"Error: Invalid org/repo name: {org}/{repo}", err=True, ) click.echo( @@ -90,14 +127,27 @@ def extract_repo_info(assessment_data: dict) -> tuple[str, str, float, str]: ) sys.exit(1) - return org, repo, score, tier + return org, repo, score, tier, host, full_path -def generate_pr_body(org: str, repo: str, score: float, tier: str, user: str) -> str: +def generate_pr_body( + org: str, + repo: str, + score: float, + tier: str, + user: str, + host: str = "github.com", + full_path: str = "", +) -> str: """Generate the PR body for leaderboard submission.""" + display_path = full_path or f"{org}/{repo}" + browse_url = _repo_browse_url(host, full_path or f"{org}/{repo}") + host_label = "GitLab" if "gitlab" in host else "GitHub" + return f"""## Leaderboard Submission -**Repository**: [{org}/{repo}](https://github.com/{org}/{repo}) +**Repository**: [{display_path}]({browse_url}) +**Host**: {host_label} **Score**: {score:.1f}/100 **Tier**: {tier} **Submitted by**: @{user} @@ -150,6 +200,8 @@ def submit_with_gh_cli( tier: str, assessment_path: Path, timestamp: str, + host: str = "github.com", + full_path: str = "", ) -> None: """Submit assessment using gh CLI.""" # 1. Check gh CLI is available @@ -175,42 +227,70 @@ def submit_with_gh_cli( click.echo(f"Authenticated as: {user}\n") # 4. Verify user has access to submitted repo - org_repo = f"{org}/{repo}" - result = run_gh_command( - [ - "api", - f"repos/{org_repo}", - "--jq", - "{private: .private, permissions: .permissions}", - ] - ) - if result.returncode != 0: - click.echo( - f"Error: Repository {org_repo} not found or not accessible", err=True + browse_url = _repo_browse_url(host, full_path or f"{org}/{repo}") + + if host == "github.com": + # GitHub: use gh API for verification + gh_org_repo = full_path or f"{org}/{repo}" + result = run_gh_command( + [ + "api", + f"repos/{gh_org_repo}", + "--jq", + "{private: .private, permissions: .permissions}", + ] ) - sys.exit(1) + if result.returncode != 0: + click.echo( + f"Error: Repository {gh_org_repo} not found or not accessible", err=True + ) + sys.exit(1) - try: - repo_info = json.loads(result.stdout) - except json.JSONDecodeError as e: - click.echo(f"Error: Failed to parse GitHub API response: {e}", err=True) - sys.exit(1) - if repo_info.get("private"): - click.echo( - f"Error: Repository {org_repo} is private. Only public repositories can be submitted.", - err=True, - ) - sys.exit(1) + try: + repo_info = json.loads(result.stdout) + except json.JSONDecodeError as e: + click.echo(f"Error: Failed to parse GitHub API response: {e}", err=True) + sys.exit(1) + if repo_info.get("private"): + click.echo( + f"Error: Repository {gh_org_repo} is private. Only public repositories can be submitted.", + err=True, + ) + sys.exit(1) - permissions = repo_info.get("permissions", {}) - if not (permissions.get("push") or permissions.get("admin")): - click.echo(f"Error: You must have commit access to {org_repo}", err=True) - click.echo("\nYou can only submit repositories where you are:", err=True) - click.echo(" - Repository owner", err=True) - click.echo(" - Collaborator with push access", err=True) - sys.exit(1) + permissions = repo_info.get("permissions", {}) + if not (permissions.get("push") or permissions.get("admin")): + click.echo(f"Error: You must have commit access to {gh_org_repo}", err=True) + click.echo("\nYou can only submit repositories where you are:", err=True) + click.echo(" - Repository owner", err=True) + click.echo(" - Collaborator with push access", err=True) + sys.exit(1) - click.echo(f"Verified access to {org_repo}") + click.echo(f"Verified access to {gh_org_repo}") + else: + # GitLab/other: verify repo is publicly accessible via git ls-remote + clone_url = f"https://{host}/{full_path}.git" + try: + ls_result = subprocess.run( + ["git", "ls-remote", "--exit-code", clone_url, "HEAD"], + capture_output=True, + text=True, + timeout=SUBPROCESS_TIMEOUT, + ) + if ls_result.returncode != 0: + click.echo( + f"Error: Repository {browse_url} is not publicly accessible", + err=True, + ) + sys.exit(1) + click.echo(f"Verified {browse_url} is publicly accessible") + except subprocess.TimeoutExpired: + click.echo(f"Error: Timed out verifying {browse_url}", err=True) + sys.exit(1) + click.echo( + "Note: Submitter access cannot be verified for non-GitHub repos. " + "Maintainers will verify manually.", + ) # 5. Fork upstream repo (if not already forked) click.echo(f"Found upstream: {UPSTREAM_REPO}") @@ -275,11 +355,12 @@ def submit_with_gh_cli( # Base64 encode the content content_b64 = base64.b64encode(content.encode()).decode() + display_path = full_path or f"{org}/{repo}" submission_path = f"submissions/{org}/{repo}/{timestamp}-assessment.json" commit_message = ( - f"feat: add {org}/{repo} to leaderboard\n\n" + f"feat: add {display_path} to leaderboard\n\n" f"Score: {score:.1f}/100 ({tier})\n" - f"Repository: https://github.com/{org}/{repo}" + f"Repository: {browse_url}" ) result = run_gh_command( @@ -302,8 +383,8 @@ def submit_with_gh_cli( click.echo(f"Committed assessment to {submission_path}") # 8. Create PR - pr_title = f"Leaderboard: {org}/{repo} ({score:.1f}/100 - {tier})" - pr_body = generate_pr_body(org, repo, score, tier, user) + pr_title = f"Leaderboard: {display_path} ({score:.1f}/100 - {tier})" + pr_body = generate_pr_body(org, repo, score, tier, user, host, full_path) result = run_gh_command( [ @@ -350,6 +431,8 @@ def submit_with_token( tier: str, assessment_path: Path, timestamp: str, + host: str = "github.com", + full_path: str = "", ) -> None: """Submit assessment using GITHUB_TOKEN.""" # 1. Validate GitHub token @@ -366,7 +449,8 @@ def submit_with_token( click.echo("\nAlternatively, use --gh flag to submit via gh CLI.", err=True) sys.exit(1) - org_repo = f"{org}/{repo}" + display_path = full_path or f"{org}/{repo}" + browse_url = _repo_browse_url(host, display_path) submission_path = f"submissions/{org}/{repo}/{timestamp}-assessment.json" # 2. Initialize GitHub client @@ -380,36 +464,66 @@ def submit_with_token( sys.exit(1) # 3. Verify user has access to submitted repo - try: - submitted_repo = gh.get_repo(org_repo) - - # Check if user is collaborator or owner - is_collaborator = submitted_repo.has_in_collaborators(user.login) - is_owner = submitted_repo.owner.login == user.login - - if not (is_collaborator or is_owner): - click.echo(f"Error: You must have commit access to {org_repo}", err=True) - click.echo("\nYou can only submit repositories where you are:", err=True) - click.echo(" - Repository owner", err=True) - click.echo(" - Collaborator with push access", err=True) + if host == "github.com": + gh_org_repo = full_path or f"{org}/{repo}" + try: + submitted_repo = gh.get_repo(gh_org_repo) + + is_collaborator = submitted_repo.has_in_collaborators(user.login) + is_owner = submitted_repo.owner.login == user.login + + if not (is_collaborator or is_owner): + click.echo( + f"Error: You must have commit access to {gh_org_repo}", err=True + ) + click.echo( + "\nYou can only submit repositories where you are:", err=True + ) + click.echo(" - Repository owner", err=True) + click.echo(" - Collaborator with push access", err=True) + sys.exit(1) + + if submitted_repo.private: + click.echo( + f"Error: Repository {gh_org_repo} is private. Only public repositories can be submitted to the leaderboard.", + err=True, + ) + sys.exit(1) + + click.echo(f"Verified access to {gh_org_repo}") + + except GithubException as e: + if e.status == 404: + click.echo(f"Error: Repository {gh_org_repo} not found", err=True) + else: + click.echo( + f"Error: Cannot access repository {gh_org_repo}: {e}", err=True + ) sys.exit(1) - - # Verify repository is public - if submitted_repo.private: - click.echo( - f"Error: Repository {org_repo} is private. Only public repositories can be submitted to the leaderboard.", - err=True, + else: + # GitLab/other: verify repo is publicly accessible via git ls-remote + clone_url = f"https://{host}/{full_path}.git" + try: + ls_result = subprocess.run( + ["git", "ls-remote", "--exit-code", clone_url, "HEAD"], + capture_output=True, + text=True, + timeout=SUBPROCESS_TIMEOUT, ) + if ls_result.returncode != 0: + click.echo( + f"Error: Repository {browse_url} is not publicly accessible", + err=True, + ) + sys.exit(1) + click.echo(f"Verified {browse_url} is publicly accessible") + except subprocess.TimeoutExpired: + click.echo(f"Error: Timed out verifying {browse_url}", err=True) sys.exit(1) - - click.echo(f"Verified access to {org_repo}") - - except GithubException as e: - if e.status == 404: - click.echo(f"Error: Repository {org_repo} not found", err=True) - else: - click.echo(f"Error: Cannot access repository {org_repo}: {e}", err=True) - sys.exit(1) + click.echo( + "Note: Submitter access cannot be verified for non-GitHub repos. " + "Maintainers will verify manually.", + ) # 4. Fork ambient-code/agentready (if not already forked) try: @@ -451,9 +565,9 @@ def submit_with_token( content = f.read() commit_message = ( - f"feat: add {org}/{repo} to leaderboard\n\n" + f"feat: add {display_path} to leaderboard\n\n" f"Score: {score:.1f}/100 ({tier})\n" - f"Repository: https://github.com/{org}/{repo}" + f"Repository: {browse_url}" ) fork.create_file( @@ -470,8 +584,8 @@ def submit_with_token( # 7. Create PR try: - pr_title = f"Leaderboard: {org}/{repo} ({score:.1f}/100 - {tier})" - pr_body = generate_pr_body(org, repo, score, tier, user.login) + pr_title = f"Leaderboard: {display_path} ({score:.1f}/100 - {tier})" + pr_body = generate_pr_body(org, repo, score, tier, user.login, host, full_path) pr = upstream.create_pull( title=pr_title, @@ -543,24 +657,31 @@ def submit(repository, assessment_file, dry_run, use_gh_cli): assessment_path = find_assessment_file(repository, assessment_file) assessment_data = load_assessment(assessment_path) - # 2. Extract repo info - org, repo, score, tier = extract_repo_info(assessment_data) + # 2. Extract repo info (now includes host and full_path for GitLab support) + org, repo, score, tier, host, full_path = extract_repo_info(assessment_data) # 3. Generate timestamp timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H-%M-%S") submission_path = f"submissions/{org}/{repo}/{timestamp}-assessment.json" + display_path = full_path or f"{org}/{repo}" + browse_url = _repo_browse_url(host, display_path) # 4. Handle dry-run if dry_run: click.echo("Dry-run mode - no PR will be created\n") click.echo(f"Submission path: {submission_path}") - click.echo(f"Repository: {org}/{repo}") + click.echo(f"Repository: {display_path}") + click.echo(f"URL: {browse_url}") click.echo(f"Score: {score:.1f}/100 ({tier})") click.echo(f"Assessment file: {assessment_path}") return # 5. Submit using appropriate method if use_gh_cli: - submit_with_gh_cli(org, repo, score, tier, assessment_path, timestamp) + submit_with_gh_cli( + org, repo, score, tier, assessment_path, timestamp, host, full_path + ) else: - submit_with_token(org, repo, score, tier, assessment_path, timestamp) + submit_with_token( + org, repo, score, tier, assessment_path, timestamp, host, full_path + ) diff --git a/tests/unit/test_cli_submit.py b/tests/unit/test_cli_submit.py index 64c23fd6..834af317 100644 --- a/tests/unit/test_cli_submit.py +++ b/tests/unit/test_cli_submit.py @@ -14,11 +14,13 @@ def test_extract_repo_info_https_with_git_suffix(self): "certification_level": "Gold", } - org, repo, score, tier = extract_repo_info(assessment_data) + org, repo, score, tier, host, full_path = extract_repo_info(assessment_data) assert org == "feast-dev" assert repo == "feast" # Not "feas" assert score == 85.0 assert tier == "Gold" + assert host == "github.com" + assert full_path == "feast-dev/feast" def test_extract_repo_info_https_without_git_suffix(self): """Test extract_repo_info handles HTTPS URL without .git suffix.""" @@ -28,9 +30,11 @@ def test_extract_repo_info_https_without_git_suffix(self): "certification_level": "Silver", } - org, repo, score, tier = extract_repo_info(assessment_data) + org, repo, score, tier, host, full_path = extract_repo_info(assessment_data) assert org == "org" assert repo == "my-repo" + assert host == "github.com" + assert full_path == "org/my-repo" def test_extract_repo_info_ssh_with_git_suffix(self): """Test extract_repo_info handles SSH .git suffix correctly.""" @@ -40,11 +44,13 @@ def test_extract_repo_info_ssh_with_git_suffix(self): "certification_level": "Silver", } - org, repo, score, tier = extract_repo_info(assessment_data) + org, repo, score, tier, host, full_path = extract_repo_info(assessment_data) assert org == "feast-dev" assert repo == "feast" # Not "feas" assert score == 60.5 assert tier == "Silver" + assert host == "github.com" + assert full_path == "feast-dev/feast" def test_extract_repo_info_ssh_without_git_suffix(self): """Test extract_repo_info handles SSH URL without .git suffix.""" @@ -54,9 +60,43 @@ def test_extract_repo_info_ssh_without_git_suffix(self): "certification_level": "Gold", } - org, repo, score, tier = extract_repo_info(assessment_data) + org, repo, score, tier, host, full_path = extract_repo_info(assessment_data) assert org == "org" assert repo == "my-repo" + assert host == "github.com" + assert full_path == "org/my-repo" + + def test_extract_repo_info_gitlab_ssh(self): + """Test extract_repo_info handles GitLab SSH URLs with deep paths.""" + assessment_data = { + "repository": {"url": "git@gitlab.com:redhat/rhel-ai/wheels/builder.git"}, + "overall_score": 78.6, + "certification_level": "Gold", + } + + org, repo, score, tier, host, full_path = extract_repo_info(assessment_data) + assert org == "redhat" + assert repo == "builder" + assert score == 78.6 + assert tier == "Gold" + assert host == "gitlab.com" + assert full_path == "redhat/rhel-ai/wheels/builder" + + def test_extract_repo_info_gitlab_https(self): + """Test extract_repo_info handles GitLab HTTPS URLs.""" + assessment_data = { + "repository": { + "url": "https://gitlab.com/redhat/rhel-ai/rhai/pipeline.git" + }, + "overall_score": 53.8, + "certification_level": "Bronze", + } + + org, repo, score, tier, host, full_path = extract_repo_info(assessment_data) + assert org == "redhat" + assert repo == "pipeline" + assert host == "gitlab.com" + assert full_path == "redhat/rhel-ai/rhai/pipeline" def test_extract_repo_info_preserves_names_ending_in_git_chars(self): """Regression test: repo names ending in .git characters are preserved. @@ -82,7 +122,9 @@ def test_extract_repo_info_preserves_names_ending_in_git_chars(self): "certification_level": "Gold", } - org, repo, score, tier = extract_repo_info(assessment_data) + org, repo, score, tier, host, full_path = extract_repo_info( + assessment_data + ) assert ( org == expected_org ), f"Expected org '{expected_org}' but got '{org}' for URL: {url_template}" diff --git a/tests/unit/test_generate_leaderboard.py b/tests/unit/test_generate_leaderboard.py new file mode 100644 index 00000000..d6799f54 --- /dev/null +++ b/tests/unit/test_generate_leaderboard.py @@ -0,0 +1,188 @@ +"""Unit tests for leaderboard data generation script. + +Regression tests for GitLab support — verifies that repository URLs +and display names are correctly derived from assessment JSON data. +""" + +import sys +from pathlib import Path + +# Add scripts directory to path so we can import the generation script +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "scripts")) +from importlib import import_module + +# Import the module with hyphens in name +gen = import_module("generate-leaderboard-data") + + +class TestGitUrlToHttps: + """Test git_url_to_https conversion.""" + + def test_github_ssh_url(self): + assert ( + gen.git_url_to_https("git@github.com:org/repo.git") + == "https://github.com/org/repo" + ) + + def test_github_ssh_no_suffix(self): + assert ( + gen.git_url_to_https("git@github.com:org/repo") + == "https://github.com/org/repo" + ) + + def test_github_https_url(self): + assert ( + gen.git_url_to_https("https://github.com/org/repo.git") + == "https://github.com/org/repo" + ) + + def test_github_https_no_suffix(self): + assert ( + gen.git_url_to_https("https://github.com/org/repo") + == "https://github.com/org/repo" + ) + + def test_gitlab_ssh_deep_path(self): + """Regression: GitLab SSH URLs with deep paths must convert correctly.""" + assert ( + gen.git_url_to_https("git@gitlab.com:redhat/rhel-ai/wheels/builder.git") + == "https://gitlab.com/redhat/rhel-ai/wheels/builder" + ) + + def test_gitlab_https_deep_path(self): + assert ( + gen.git_url_to_https("https://gitlab.com/redhat/rhel-ai/rhai/pipeline.git") + == "https://gitlab.com/redhat/rhel-ai/rhai/pipeline" + ) + + def test_preserves_unknown_format(self): + assert gen.git_url_to_https("some-other-url") == "some-other-url" + + +class TestRepoDisplayNameFromUrl: + """Test repo_display_name_from_url extraction.""" + + def test_github_ssh(self): + assert ( + gen.repo_display_name_from_url("git@github.com:org/repo.git") == "org/repo" + ) + + def test_github_https(self): + assert ( + gen.repo_display_name_from_url("https://github.com/org/repo") == "org/repo" + ) + + def test_gitlab_ssh_deep_path(self): + """Regression: Full GitLab path must be preserved for display.""" + assert ( + gen.repo_display_name_from_url( + "git@gitlab.com:redhat/rhel-ai/wheels/builder.git" + ) + == "redhat/rhel-ai/wheels/builder" + ) + + def test_gitlab_https_deep_path(self): + assert ( + gen.repo_display_name_from_url( + "https://gitlab.com/redhat/rhel-ai/rhai/pipeline.git" + ) + == "redhat/rhel-ai/rhai/pipeline" + ) + + def test_returns_none_for_unparseable(self): + assert gen.repo_display_name_from_url("not-a-url") is None + + +class TestGenerateLeaderboardData: + """Test the full leaderboard data generation with GitLab repos.""" + + def _make_assessment(self, url, score=75.0, tier="Silver"): + return { + "repository": { + "url": url, + "primary_language": "Python", + "size_category": "Medium", + }, + "overall_score": score, + "certification_level": tier, + "metadata": { + "agentready_version": "2.30.1", + "research_version": "1.0.1", + }, + } + + def test_gitlab_repo_gets_correct_url(self): + """Regression: GitLab repos must link to GitLab, not GitHub.""" + repos = { + "redhat/builder": [ + { + "assessment": self._make_assessment( + "git@gitlab.com:redhat/rhel-ai/wheels/builder.git", + score=78.6, + tier="Gold", + ), + "timestamp": "2026-03-25T12-00-00", + "path": Path( + "submissions/redhat/builder/2026-03-25T12-00-00-assessment.json" + ), + } + ] + } + + result = gen.generate_leaderboard_data(repos) + entry = result["overall"][0] + + assert entry["url"] == "https://gitlab.com/redhat/rhel-ai/wheels/builder" + assert entry["repo"] == "redhat/rhel-ai/wheels/builder" + assert entry["org"] == "redhat" + assert entry["name"] == "builder" + + def test_github_repo_still_works(self): + """Existing GitHub repos must continue to work correctly.""" + repos = { + "org/repo": [ + { + "assessment": self._make_assessment( + "https://github.com/org/repo", + score=80.0, + tier="Gold", + ), + "timestamp": "2026-01-15T00-00-00", + "path": Path( + "submissions/org/repo/2026-01-15T00-00-00-assessment.json" + ), + } + ] + } + + result = gen.generate_leaderboard_data(repos) + entry = result["overall"][0] + + assert entry["url"] == "https://github.com/org/repo" + assert entry["repo"] == "org/repo" + + def test_fallback_when_no_url(self): + """Repos without repository.url fall back to GitHub directory-derived URL.""" + repos = { + "org/repo": [ + { + "assessment": { + "repository": {}, + "overall_score": 60.0, + "certification_level": "Silver", + "metadata": {}, + }, + "timestamp": "2026-01-15T00-00-00", + "path": Path( + "submissions/org/repo/2026-01-15T00-00-00-assessment.json" + ), + } + ] + } + + result = gen.generate_leaderboard_data(repos) + entry = result["overall"][0] + + # Falls back to GitHub URL constructed from directory path + assert entry["url"] == "https://github.com/org/repo" + assert entry["repo"] == "org/repo"