Skip to content

Commit

Permalink
Update markdown link checker script to use shell script
Browse files Browse the repository at this point in the history
  • Loading branch information
htahir1 committed Jan 22, 2025
1 parent 5f0d9bb commit 7b9fe33
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 57 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check-markdown-links.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ jobs:
- name: Install dependencies
run: pip install PyGithub
- name: Run markdown link checker
run: python scripts/check_and_comment.py
run: ./scripts/check_and_comment.sh docs
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
137 changes: 81 additions & 56 deletions scripts/check_and_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,17 @@
import sys
from pathlib import Path

from github import Github

def format_path_for_display(path):
"""Convert absolute path to relative path from repo root."""
try:
# Get the repo root (parent of scripts directory)
repo_root = Path(__file__).parent.parent
# First resolve the path to remove any ../ components
full_path = Path(path).resolve()
return str(full_path.relative_to(repo_root))
except ValueError:
# If path is not relative to repo root, return as is
return str(path)

def find_markdown_files(directory):
"""Recursively find all markdown files in a directory."""
Expand Down Expand Up @@ -64,81 +73,97 @@ def create_comment_body(broken_links):
if not broken_links:
return "✅ No broken markdown links found!"

body = "## 🔍 Broken Markdown Links Found\n\n"
# Calculate statistics
total_files = len({link["source_file"] for link in broken_links})
total_broken = len(broken_links)

body = [
"# 🔍 Broken Links Report",
"",
"### Summary",
f"- 📁 Files with broken links: **{total_files}**",
f"- 🔗 Total broken links: **{total_broken}**",
"",
"### Details",
"| File | Link Text | Broken Path |",
"|------|-----------|-------------|",
]

# Add each broken link as a table row
for link in broken_links:
file_name = Path(link["source_file"]).name
full_path = link["source_file"]
body.append(
f"| `{file_name}` | \"{link['link_text']}\" | `{link['broken_path']}` |"
)

body.append("")
body.append("<details><summary>📂 Full file paths</summary>")
body.append("")
for link in broken_links:
body += f"### In file: `{link['source_file']}`\n"
body += f"- Link text: \"{link['link_text']}\"\n"
body += f"- Broken path: `{link['broken_path']}`\n\n"
body.append(f"- `{link['source_file']}`")
body.append("")
body.append("</details>")

return body
return "\n".join(body)


def main():
# Check if running from GitHub Actions
if "GITHUB_TOKEN" in os.environ:
# Get GitHub token and context
token = os.environ.get("GITHUB_TOKEN")
# Get the directory to check from command line argument
if len(sys.argv) != 2:
print("Usage: python check_and_comment.py <directory>")
sys.exit(1)

directory = sys.argv[1]
if not os.path.isdir(directory):
print(f"Error: {directory} is not a valid directory")
sys.exit(1)

print(f"Checking markdown links in {directory}...")
broken_links = check_markdown_links(directory)

# If running in GitHub Actions, handle PR comment
if 'GITHUB_TOKEN' in os.environ:
# Only import github when needed
from github import Github

token = os.environ.get('GITHUB_TOKEN')
if not token:
print("Error: GITHUB_TOKEN not set")
sys.exit(1)

# Parse GitHub context
with open(os.environ["GITHUB_EVENT_PATH"]) as f:
with open(os.environ['GITHUB_EVENT_PATH']) as f:
event = json.load(f)

repo_name = event['repository']['full_name']
pr_number = event['pull_request']['number']

repo_name = event["repository"]["full_name"]
pr_number = event["pull_request"]["number"]

# Initialize GitHub client
g = Github(token)
repo = g.get_repo(repo_name)
pr = repo.get_pull(pr_number)

# Check for broken links in the docs directory
docs_dir = Path(__file__).parent.parent / "docs"
broken_links = check_markdown_links(str(docs_dir))

# Create and post comment
comment_body = create_comment_body(broken_links)

# Check if we already commented

for comment in pr.get_issue_comments():
if (
"Broken Markdown Links Found" in comment.body
or "No broken markdown links found!" in comment.body
):
if "Broken Links Found" in comment.body or "No broken markdown links found!" in comment.body:
comment.edit(comment_body)
break
else:
pr.create_issue_comment(comment_body)

# Exit with error if broken links were found
if broken_links:
sys.exit(1)

else:
# Running locally
if len(sys.argv) != 2:
print("Usage: python check_and_comment.py <directory>")
sys.exit(1)

directory = sys.argv[1]
if not os.path.isdir(directory):
print(f"Error: {directory} is not a valid directory")
sys.exit(1)

print(f"Checking markdown links in {directory}...")
broken_links = check_markdown_links(directory)

if not broken_links:
print("No broken links found!")
return

print("\nBroken links found:")
for link in broken_links:
print(f"\nSource file: {link['source_file']}")
print(f"Link text: {link['link_text']}")
print(f"Broken path: {link['broken_path']}")

# Always print results locally
if not broken_links:
print("✅ No broken links found!")
sys.exit(0)

print("\n🔍 Broken links found:")
for link in broken_links:
relative_path = format_path_for_display(link['source_file'])
print(f"\n📄 File: {relative_path}")
print(f"📝 Link text: \"{link['link_text']}\"")
print(f"❌ Broken path: {link['broken_path']}")

sys.exit(1)


if __name__ == "__main__":
Expand Down

0 comments on commit 7b9fe33

Please sign in to comment.