From 855b34daca05d07c2d1886f194df3ba08ace63b8 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Wed, 22 Jan 2025 12:37:36 +0100 Subject: [PATCH 1/9] Add workflow to check markdown links and comment on PRs --- .github/workflows/check-markdown-links.yml | 18 ++++++ scripts/check_and_comment.py | 69 ++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 .github/workflows/check-markdown-links.yml create mode 100644 scripts/check_and_comment.py diff --git a/.github/workflows/check-markdown-links.yml b/.github/workflows/check-markdown-links.yml new file mode 100644 index 00000000000..b97be8665cc --- /dev/null +++ b/.github/workflows/check-markdown-links.yml @@ -0,0 +1,18 @@ +--- +name: Check Markdown Links +on: + pull_request: + types: [opened, synchronize, reopened] +jobs: + check-links: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.x + - name: Run markdown link checker + run: python scripts/check_and_comment.py + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/scripts/check_and_comment.py b/scripts/check_and_comment.py new file mode 100644 index 00000000000..55eec03b545 --- /dev/null +++ b/scripts/check_and_comment.py @@ -0,0 +1,69 @@ +import json +import os +import sys +from pathlib import Path + +from github import Github + +# Add the docs/book directory to the Python path so we can import brokelinks +sys.path.append(str(Path(__file__).parent.parent / "docs" / "book")) +from brokelinks import check_markdown_links + + +def create_comment_body(broken_links): + if not broken_links: + return "āœ… No broken markdown links found!" + + body = "## šŸ” Broken Markdown Links Found\n\n" + for link in broken_links: + body += f"### In file: `{link['source_file']}`\n" + body += f"- Link text: \"{link['link_text']}\"\n" + body += f"- Broken path: `{link['broken_path']}`\n\n" + + return body + + +def main(): + # Get GitHub token and context + token = os.environ.get("GITHUB_TOKEN") + if not token: + print("Error: GITHUB_TOKEN not set") + sys.exit(1) + + # Parse GitHub context + with open(os.environ["GITHUB_EVENT_PATH"]) as f: + event = json.load(f) + + repo_name = event["repository"]["full_name"] + pr_number = event["pull_request"]["number"] + + # Initialize GitHub client + g = Github(token) + repo = g.get_repo(repo_name) + pr = repo.get_pull(pr_number) + + # Check for broken links in the docs directory + docs_dir = Path(__file__).parent.parent / "docs" + broken_links = check_markdown_links(str(docs_dir)) + + # Create and post comment + comment_body = create_comment_body(broken_links) + + # Check if we already commented + for comment in pr.get_issue_comments(): + if ( + "Broken Markdown Links Found" in comment.body + or "No broken markdown links found!" in comment.body + ): + comment.edit(comment_body) + break + else: + pr.create_issue_comment(comment_body) + + # Exit with error if broken links were found + if broken_links: + sys.exit(1) + + +if __name__ == "__main__": + main() From 5f4fa9dfd6f978cb239c6d6159cb78080b9a5cc0 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Wed, 22 Jan 2025 12:38:56 +0100 Subject: [PATCH 2/9] Add PyGithub dependency for GitHub Actions workflow --- .github/workflows/check-markdown-links.yml | 4 +- scripts/check_and_comment.py | 159 ++++++++++++++------- 2 files changed, 114 insertions(+), 49 deletions(-) diff --git a/.github/workflows/check-markdown-links.yml b/.github/workflows/check-markdown-links.yml index b97be8665cc..f1f68eb517b 100644 --- a/.github/workflows/check-markdown-links.yml +++ b/.github/workflows/check-markdown-links.yml @@ -11,7 +11,9 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.x + python-version: '3.x' + - name: Install dependencies + run: pip install PyGithub - name: Run markdown link checker run: python scripts/check_and_comment.py env: diff --git a/scripts/check_and_comment.py b/scripts/check_and_comment.py index 55eec03b545..1396cd07ff1 100644 --- a/scripts/check_and_comment.py +++ b/scripts/check_and_comment.py @@ -1,69 +1,132 @@ -import json import os +import re import sys +import json from pathlib import Path - from github import Github -# Add the docs/book directory to the Python path so we can import brokelinks -sys.path.append(str(Path(__file__).parent.parent / "docs" / "book")) -from brokelinks import check_markdown_links - +def find_markdown_files(directory): + """Recursively find all markdown files in a directory.""" + return list(Path(directory).rglob("*.md")) + +def extract_relative_links(content): + """Extract all relative markdown links from content.""" + # Match [text](path.md) or [text](../path.md) patterns + # Excluding URLs (http:// or https://) + pattern = r'\[([^\]]+)\]\((?!http[s]?://)(.[^\)]+\.md)\)' + matches = re.finditer(pattern, content) + return [(m.group(1), m.group(2)) for m in matches] + +def validate_link(source_file, target_path): + """Validate if a relative link is valid.""" + try: + # Convert source file and target path to Path objects + source_dir = Path(source_file).parent + # Resolve the target path relative to the source file's directory + full_path = (source_dir / target_path).resolve() + return full_path.exists() + except Exception: + return False + +def check_markdown_links(directory): + """Check all markdown files in directory for broken relative links.""" + broken_links = [] + markdown_files = find_markdown_files(directory) + + for file_path in markdown_files: + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + relative_links = extract_relative_links(content) + + for link_text, link_path in relative_links: + if not validate_link(file_path, link_path): + broken_links.append({ + 'source_file': str(file_path), + 'link_text': link_text, + 'broken_path': link_path + }) + except Exception as e: + print(f"Error processing {file_path}: {str(e)}") + + return broken_links def create_comment_body(broken_links): if not broken_links: return "āœ… No broken markdown links found!" - + body = "## šŸ” Broken Markdown Links Found\n\n" for link in broken_links: body += f"### In file: `{link['source_file']}`\n" body += f"- Link text: \"{link['link_text']}\"\n" body += f"- Broken path: `{link['broken_path']}`\n\n" - + return body - def main(): - # Get GitHub token and context - token = os.environ.get("GITHUB_TOKEN") - if not token: - print("Error: GITHUB_TOKEN not set") - sys.exit(1) - - # Parse GitHub context - with open(os.environ["GITHUB_EVENT_PATH"]) as f: - event = json.load(f) - - repo_name = event["repository"]["full_name"] - pr_number = event["pull_request"]["number"] - - # Initialize GitHub client - g = Github(token) - repo = g.get_repo(repo_name) - pr = repo.get_pull(pr_number) - - # Check for broken links in the docs directory - docs_dir = Path(__file__).parent.parent / "docs" - broken_links = check_markdown_links(str(docs_dir)) - - # Create and post comment - comment_body = create_comment_body(broken_links) - - # Check if we already commented - for comment in pr.get_issue_comments(): - if ( - "Broken Markdown Links Found" in comment.body - or "No broken markdown links found!" in comment.body - ): - comment.edit(comment_body) - break + # Check if running from GitHub Actions + if 'GITHUB_TOKEN' in os.environ: + # Get GitHub token and context + token = os.environ.get('GITHUB_TOKEN') + if not token: + print("Error: GITHUB_TOKEN not set") + sys.exit(1) + + # Parse GitHub context + with open(os.environ['GITHUB_EVENT_PATH']) as f: + event = json.load(f) + + repo_name = event['repository']['full_name'] + pr_number = event['pull_request']['number'] + + # Initialize GitHub client + g = Github(token) + repo = g.get_repo(repo_name) + pr = repo.get_pull(pr_number) + + # Check for broken links in the docs directory + docs_dir = Path(__file__).parent.parent / "docs" + broken_links = check_markdown_links(str(docs_dir)) + + # Create and post comment + comment_body = create_comment_body(broken_links) + + # Check if we already commented + for comment in pr.get_issue_comments(): + if "Broken Markdown Links Found" in comment.body or "No broken markdown links found!" in comment.body: + comment.edit(comment_body) + break + else: + pr.create_issue_comment(comment_body) + + # Exit with error if broken links were found + if broken_links: + sys.exit(1) + else: - pr.create_issue_comment(comment_body) - - # Exit with error if broken links were found - if broken_links: - sys.exit(1) - + # Running locally + if len(sys.argv) != 2: + print("Usage: python check_and_comment.py ") + sys.exit(1) + + directory = sys.argv[1] + if not os.path.isdir(directory): + print(f"Error: {directory} is not a valid directory") + sys.exit(1) + + print(f"Checking markdown links in {directory}...") + broken_links = check_markdown_links(directory) + + if not broken_links: + print("No broken links found!") + return + + print("\nBroken links found:") + for link in broken_links: + print(f"\nSource file: {link['source_file']}") + print(f"Link text: {link['link_text']}") + print(f"Broken path: {link['broken_path']}") if __name__ == "__main__": main() From 5f0d9bb1da0f5eca1f3614e3fb104e5b9b508d20 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Wed, 22 Jan 2025 12:39:05 +0100 Subject: [PATCH 3/9] Fix Python version and update code formatting.- Update Python version to 3.x- Improve code formatting --- .github/workflows/check-markdown-links.yml | 2 +- scripts/check_and_comment.py | 67 +++++++++++++--------- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/.github/workflows/check-markdown-links.yml b/.github/workflows/check-markdown-links.yml index f1f68eb517b..b1972d41e6e 100644 --- a/.github/workflows/check-markdown-links.yml +++ b/.github/workflows/check-markdown-links.yml @@ -11,7 +11,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.x' + python-version: 3.x - name: Install dependencies run: pip install PyGithub - name: Run markdown link checker diff --git a/scripts/check_and_comment.py b/scripts/check_and_comment.py index 1396cd07ff1..fc3fa649da9 100644 --- a/scripts/check_and_comment.py +++ b/scripts/check_and_comment.py @@ -1,22 +1,26 @@ +import json import os import re import sys -import json from pathlib import Path + from github import Github + def find_markdown_files(directory): """Recursively find all markdown files in a directory.""" return list(Path(directory).rglob("*.md")) + def extract_relative_links(content): """Extract all relative markdown links from content.""" # Match [text](path.md) or [text](../path.md) patterns # Excluding URLs (http:// or https://) - pattern = r'\[([^\]]+)\]\((?!http[s]?://)(.[^\)]+\.md)\)' + pattern = r"\[([^\]]+)\]\((?!http[s]?://)(.[^\)]+\.md)\)" matches = re.finditer(pattern, content) return [(m.group(1), m.group(2)) for m in matches] + def validate_link(source_file, target_path): """Validate if a relative link is valid.""" try: @@ -28,57 +32,62 @@ def validate_link(source_file, target_path): except Exception: return False + def check_markdown_links(directory): """Check all markdown files in directory for broken relative links.""" broken_links = [] markdown_files = find_markdown_files(directory) - + for file_path in markdown_files: try: - with open(file_path, 'r', encoding='utf-8') as f: + with open(file_path, "r", encoding="utf-8") as f: content = f.read() - + relative_links = extract_relative_links(content) - + for link_text, link_path in relative_links: if not validate_link(file_path, link_path): - broken_links.append({ - 'source_file': str(file_path), - 'link_text': link_text, - 'broken_path': link_path - }) + broken_links.append( + { + "source_file": str(file_path), + "link_text": link_text, + "broken_path": link_path, + } + ) except Exception as e: print(f"Error processing {file_path}: {str(e)}") - + return broken_links + def create_comment_body(broken_links): if not broken_links: return "āœ… No broken markdown links found!" - + body = "## šŸ” Broken Markdown Links Found\n\n" for link in broken_links: body += f"### In file: `{link['source_file']}`\n" body += f"- Link text: \"{link['link_text']}\"\n" body += f"- Broken path: `{link['broken_path']}`\n\n" - + return body + def main(): # Check if running from GitHub Actions - if 'GITHUB_TOKEN' in os.environ: + if "GITHUB_TOKEN" in os.environ: # Get GitHub token and context - token = os.environ.get('GITHUB_TOKEN') + token = os.environ.get("GITHUB_TOKEN") if not token: print("Error: GITHUB_TOKEN not set") sys.exit(1) # Parse GitHub context - with open(os.environ['GITHUB_EVENT_PATH']) as f: + with open(os.environ["GITHUB_EVENT_PATH"]) as f: event = json.load(f) - - repo_name = event['repository']['full_name'] - pr_number = event['pull_request']['number'] + + repo_name = event["repository"]["full_name"] + pr_number = event["pull_request"]["number"] # Initialize GitHub client g = Github(token) @@ -91,10 +100,13 @@ def main(): # Create and post comment comment_body = create_comment_body(broken_links) - + # Check if we already commented for comment in pr.get_issue_comments(): - if "Broken Markdown Links Found" in comment.body or "No broken markdown links found!" in comment.body: + if ( + "Broken Markdown Links Found" in comment.body + or "No broken markdown links found!" in comment.body + ): comment.edit(comment_body) break else: @@ -103,30 +115,31 @@ def main(): # Exit with error if broken links were found if broken_links: sys.exit(1) - + else: # Running locally if len(sys.argv) != 2: print("Usage: python check_and_comment.py ") sys.exit(1) - + directory = sys.argv[1] if not os.path.isdir(directory): print(f"Error: {directory} is not a valid directory") sys.exit(1) - + print(f"Checking markdown links in {directory}...") broken_links = check_markdown_links(directory) - + if not broken_links: print("No broken links found!") return - + print("\nBroken links found:") for link in broken_links: print(f"\nSource file: {link['source_file']}") print(f"Link text: {link['link_text']}") print(f"Broken path: {link['broken_path']}") + if __name__ == "__main__": main() From 7b9fe3362a42299fee46994fa91b139319410af6 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Wed, 22 Jan 2025 12:50:09 +0100 Subject: [PATCH 4/9] Update markdown link checker script to use shell script --- .github/workflows/check-markdown-links.yml | 2 +- scripts/check_and_comment.py | 137 ++++++++++++--------- 2 files changed, 82 insertions(+), 57 deletions(-) diff --git a/.github/workflows/check-markdown-links.yml b/.github/workflows/check-markdown-links.yml index b1972d41e6e..bdfdb7a5795 100644 --- a/.github/workflows/check-markdown-links.yml +++ b/.github/workflows/check-markdown-links.yml @@ -15,6 +15,6 @@ jobs: - name: Install dependencies run: pip install PyGithub - name: Run markdown link checker - run: python scripts/check_and_comment.py + run: ./scripts/check_and_comment.sh docs env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/scripts/check_and_comment.py b/scripts/check_and_comment.py index fc3fa649da9..6a7497c7a40 100644 --- a/scripts/check_and_comment.py +++ b/scripts/check_and_comment.py @@ -4,8 +4,17 @@ import sys from pathlib import Path -from github import Github - +def format_path_for_display(path): + """Convert absolute path to relative path from repo root.""" + try: + # Get the repo root (parent of scripts directory) + repo_root = Path(__file__).parent.parent + # First resolve the path to remove any ../ components + full_path = Path(path).resolve() + return str(full_path.relative_to(repo_root)) + except ValueError: + # If path is not relative to repo root, return as is + return str(path) def find_markdown_files(directory): """Recursively find all markdown files in a directory.""" @@ -64,81 +73,97 @@ def create_comment_body(broken_links): if not broken_links: return "āœ… No broken markdown links found!" - body = "## šŸ” Broken Markdown Links Found\n\n" + # Calculate statistics + total_files = len({link["source_file"] for link in broken_links}) + total_broken = len(broken_links) + + body = [ + "# šŸ” Broken Links Report", + "", + "### Summary", + f"- šŸ“ Files with broken links: **{total_files}**", + f"- šŸ”— Total broken links: **{total_broken}**", + "", + "### Details", + "| File | Link Text | Broken Path |", + "|------|-----------|-------------|", + ] + + # Add each broken link as a table row + for link in broken_links: + file_name = Path(link["source_file"]).name + full_path = link["source_file"] + body.append( + f"| `{file_name}` | \"{link['link_text']}\" | `{link['broken_path']}` |" + ) + + body.append("") + body.append("
šŸ“‚ Full file paths") + body.append("") for link in broken_links: - body += f"### In file: `{link['source_file']}`\n" - body += f"- Link text: \"{link['link_text']}\"\n" - body += f"- Broken path: `{link['broken_path']}`\n\n" + body.append(f"- `{link['source_file']}`") + body.append("") + body.append("
") - return body + return "\n".join(body) def main(): - # Check if running from GitHub Actions - if "GITHUB_TOKEN" in os.environ: - # Get GitHub token and context - token = os.environ.get("GITHUB_TOKEN") + # Get the directory to check from command line argument + if len(sys.argv) != 2: + print("Usage: python check_and_comment.py ") + sys.exit(1) + + directory = sys.argv[1] + if not os.path.isdir(directory): + print(f"Error: {directory} is not a valid directory") + sys.exit(1) + + print(f"Checking markdown links in {directory}...") + broken_links = check_markdown_links(directory) + + # If running in GitHub Actions, handle PR comment + if 'GITHUB_TOKEN' in os.environ: + # Only import github when needed + from github import Github + + token = os.environ.get('GITHUB_TOKEN') if not token: print("Error: GITHUB_TOKEN not set") sys.exit(1) - # Parse GitHub context - with open(os.environ["GITHUB_EVENT_PATH"]) as f: + with open(os.environ['GITHUB_EVENT_PATH']) as f: event = json.load(f) + + repo_name = event['repository']['full_name'] + pr_number = event['pull_request']['number'] - repo_name = event["repository"]["full_name"] - pr_number = event["pull_request"]["number"] - - # Initialize GitHub client g = Github(token) repo = g.get_repo(repo_name) pr = repo.get_pull(pr_number) - # Check for broken links in the docs directory - docs_dir = Path(__file__).parent.parent / "docs" - broken_links = check_markdown_links(str(docs_dir)) - - # Create and post comment comment_body = create_comment_body(broken_links) - - # Check if we already commented + for comment in pr.get_issue_comments(): - if ( - "Broken Markdown Links Found" in comment.body - or "No broken markdown links found!" in comment.body - ): + if "Broken Links Found" in comment.body or "No broken markdown links found!" in comment.body: comment.edit(comment_body) break else: pr.create_issue_comment(comment_body) - - # Exit with error if broken links were found - if broken_links: - sys.exit(1) - - else: - # Running locally - if len(sys.argv) != 2: - print("Usage: python check_and_comment.py ") - sys.exit(1) - - directory = sys.argv[1] - if not os.path.isdir(directory): - print(f"Error: {directory} is not a valid directory") - sys.exit(1) - - print(f"Checking markdown links in {directory}...") - broken_links = check_markdown_links(directory) - - if not broken_links: - print("No broken links found!") - return - - print("\nBroken links found:") - for link in broken_links: - print(f"\nSource file: {link['source_file']}") - print(f"Link text: {link['link_text']}") - print(f"Broken path: {link['broken_path']}") + + # Always print results locally + if not broken_links: + print("āœ… No broken links found!") + sys.exit(0) + + print("\nšŸ” Broken links found:") + for link in broken_links: + relative_path = format_path_for_display(link['source_file']) + print(f"\nšŸ“„ File: {relative_path}") + print(f"šŸ“ Link text: \"{link['link_text']}\"") + print(f"āŒ Broken path: {link['broken_path']}") + + sys.exit(1) if __name__ == "__main__": From 5b50bd6a84204e18cc3c652b2d73c2d187dbe969 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Wed, 22 Jan 2025 12:51:14 +0100 Subject: [PATCH 5/9] Add script for checking and commenting on GitHub --- scripts/check_and_comment.sh | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100755 scripts/check_and_comment.sh diff --git a/scripts/check_and_comment.sh b/scripts/check_and_comment.sh new file mode 100755 index 00000000000..95826942057 --- /dev/null +++ b/scripts/check_and_comment.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# Exit on error +set -e + +# Get the directory containing this script +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Default to docs directory if no argument provided +CHECK_DIR="${1:-docs}" + +# Convert to absolute path if relative path provided +if [[ ! "$CHECK_DIR" = /* ]]; then + CHECK_DIR="$SCRIPT_DIR/../$CHECK_DIR" +fi + +# Ensure the directory exists +if [ ! -d "$CHECK_DIR" ]; then + echo "Error: Directory '$CHECK_DIR' does not exist" + exit 1 +fi + +# Only install PyGithub if we're running in GitHub Actions +if [ -n "$GITHUB_TOKEN" ]; then + pip install PyGithub +fi + +# Run the Python script +python "$SCRIPT_DIR/check_and_comment.py" "$CHECK_DIR" \ No newline at end of file From 857f04c8aae91c1f53c471407c66075bcc8854f0 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Wed, 22 Jan 2025 12:53:32 +0100 Subject: [PATCH 6/9] Update default directory to "docs/book" in check_and_comment.sh --- scripts/check_and_comment.py | 37 ++++++++++++++++++++---------------- scripts/check_and_comment.sh | 2 +- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/scripts/check_and_comment.py b/scripts/check_and_comment.py index 6a7497c7a40..3b3477761c9 100644 --- a/scripts/check_and_comment.py +++ b/scripts/check_and_comment.py @@ -4,6 +4,7 @@ import sys from pathlib import Path + def format_path_for_display(path): """Convert absolute path to relative path from repo root.""" try: @@ -16,6 +17,7 @@ def format_path_for_display(path): # If path is not relative to repo root, return as is return str(path) + def find_markdown_files(directory): """Recursively find all markdown files in a directory.""" return list(Path(directory).rglob("*.md")) @@ -113,56 +115,59 @@ def main(): if len(sys.argv) != 2: print("Usage: python check_and_comment.py ") sys.exit(1) - + directory = sys.argv[1] if not os.path.isdir(directory): print(f"Error: {directory} is not a valid directory") sys.exit(1) - + print(f"Checking markdown links in {directory}...") broken_links = check_markdown_links(directory) - + # If running in GitHub Actions, handle PR comment - if 'GITHUB_TOKEN' in os.environ: + if "GITHUB_TOKEN" in os.environ: # Only import github when needed from github import Github - - token = os.environ.get('GITHUB_TOKEN') + + token = os.environ.get("GITHUB_TOKEN") if not token: print("Error: GITHUB_TOKEN not set") sys.exit(1) - with open(os.environ['GITHUB_EVENT_PATH']) as f: + with open(os.environ["GITHUB_EVENT_PATH"]) as f: event = json.load(f) - - repo_name = event['repository']['full_name'] - pr_number = event['pull_request']['number'] + + repo_name = event["repository"]["full_name"] + pr_number = event["pull_request"]["number"] g = Github(token) repo = g.get_repo(repo_name) pr = repo.get_pull(pr_number) comment_body = create_comment_body(broken_links) - + for comment in pr.get_issue_comments(): - if "Broken Links Found" in comment.body or "No broken markdown links found!" in comment.body: + if ( + "Broken Links Found" in comment.body + or "No broken markdown links found!" in comment.body + ): comment.edit(comment_body) break else: pr.create_issue_comment(comment_body) - + # Always print results locally if not broken_links: print("āœ… No broken links found!") sys.exit(0) - + print("\nšŸ” Broken links found:") for link in broken_links: - relative_path = format_path_for_display(link['source_file']) + relative_path = format_path_for_display(link["source_file"]) print(f"\nšŸ“„ File: {relative_path}") print(f"šŸ“ Link text: \"{link['link_text']}\"") print(f"āŒ Broken path: {link['broken_path']}") - + sys.exit(1) diff --git a/scripts/check_and_comment.sh b/scripts/check_and_comment.sh index 95826942057..ca3b5123532 100755 --- a/scripts/check_and_comment.sh +++ b/scripts/check_and_comment.sh @@ -7,7 +7,7 @@ set -e SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # Default to docs directory if no argument provided -CHECK_DIR="${1:-docs}" +CHECK_DIR="${1:-docs/book}" # Convert to absolute path if relative path provided if [[ ! "$CHECK_DIR" = /* ]]; then From 70c846789a79cb2e007c45937deee11aeaf8d99a Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Wed, 22 Jan 2025 12:58:44 +0100 Subject: [PATCH 7/9] Update broken links comment creation logic --- scripts/check_and_comment.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/scripts/check_and_comment.py b/scripts/check_and_comment.py index 3b3477761c9..e2c53fd5dbe 100644 --- a/scripts/check_and_comment.py +++ b/scripts/check_and_comment.py @@ -93,10 +93,14 @@ def create_comment_body(broken_links): # Add each broken link as a table row for link in broken_links: - file_name = Path(link["source_file"]).name - full_path = link["source_file"] + # Get parent folder and file name + path = Path(link["source_file"]) + parent = path.parent.name + file_name = path.name + display_name = f"{parent}/{file_name}" # Combine parent folder and filename + body.append( - f"| `{file_name}` | \"{link['link_text']}\" | `{link['broken_path']}` |" + f"| `{display_name}` | \"{link['link_text']}\" | `{link['broken_path']}` |" ) body.append("") @@ -145,16 +149,21 @@ def main(): pr = repo.get_pull(pr_number) comment_body = create_comment_body(broken_links) - + + # Find existing comment by looking for our specific header + existing_comment = None for comment in pr.get_issue_comments(): - if ( - "Broken Links Found" in comment.body - or "No broken markdown links found!" in comment.body - ): - comment.edit(comment_body) + if "# šŸ” Broken Links Report" in comment.body or "āœ… No broken markdown links found!" in comment.body: + existing_comment = comment break + + # Update existing comment or create new one + if existing_comment: + existing_comment.edit(comment_body) + print("Updated existing broken links report comment") else: pr.create_issue_comment(comment_body) + print("Created new broken links report comment") # Always print results locally if not broken_links: From ff89a3053f18ca2dcf628647ed168349c9520c33 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Wed, 22 Jan 2025 13:02:34 +0100 Subject: [PATCH 8/9] Ensure proper exit codes for GitHub Actions --- scripts/check_and_comment.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/check_and_comment.py b/scripts/check_and_comment.py index e2c53fd5dbe..cd5c40c6efd 100644 --- a/scripts/check_and_comment.py +++ b/scripts/check_and_comment.py @@ -165,7 +165,10 @@ def main(): pr.create_issue_comment(comment_body) print("Created new broken links report comment") - # Always print results locally + # In GitHub Actions, always exit with 0 after commenting + sys.exit(0) + + # For local runs, print results and exit with appropriate code if not broken_links: print("āœ… No broken links found!") sys.exit(0) @@ -177,6 +180,7 @@ def main(): print(f"šŸ“ Link text: \"{link['link_text']}\"") print(f"āŒ Broken path: {link['broken_path']}") + # Only exit with error code in local mode sys.exit(1) From 10d6584fed22e0fb0b22a4c4119b4cdd26b9782a Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Thu, 23 Jan 2025 14:08:10 +0100 Subject: [PATCH 9/9] Add functionality to check and comment on broken markdown links --- scripts/check_and_comment.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/scripts/check_and_comment.py b/scripts/check_and_comment.py index cd5c40c6efd..b96a7ddeb7e 100644 --- a/scripts/check_and_comment.py +++ b/scripts/check_and_comment.py @@ -1,3 +1,18 @@ +# Copyright (c) ZenML GmbH 2025. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Checks for broken markdown links in a directory and comments on a PR if found.""" + import json import os import re @@ -97,8 +112,10 @@ def create_comment_body(broken_links): path = Path(link["source_file"]) parent = path.parent.name file_name = path.name - display_name = f"{parent}/{file_name}" # Combine parent folder and filename - + display_name = ( + f"{parent}/{file_name}" # Combine parent folder and filename + ) + body.append( f"| `{display_name}` | \"{link['link_text']}\" | `{link['broken_path']}` |" ) @@ -149,14 +166,17 @@ def main(): pr = repo.get_pull(pr_number) comment_body = create_comment_body(broken_links) - + # Find existing comment by looking for our specific header existing_comment = None for comment in pr.get_issue_comments(): - if "# šŸ” Broken Links Report" in comment.body or "āœ… No broken markdown links found!" in comment.body: + if ( + "# šŸ” Broken Links Report" in comment.body + or "āœ… No broken markdown links found!" in comment.body + ): existing_comment = comment break - + # Update existing comment or create new one if existing_comment: existing_comment.edit(comment_body)