Skip to content

Scrape & Update Docs #18

Scrape & Update Docs

Scrape & Update Docs #18

Workflow file for this run

name: Scrape & Update Docs
on:
schedule:
- cron: "0 3 * * *"
workflow_dispatch:
inputs:
scraper:
description: "Which scraper to run"
required: false
default: "all"
type: choice
options:
- all
- paper_docs
- paper_javadoc
- bukkit
- minecraft_wiki
- plugins
- index_only
jobs:
scrape:
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout scraper repo
uses: actions/checkout@v4
with:
path: minecraft-mcp-scraper
- name: Checkout docs repo
uses: actions/checkout@v4
with:
repository: Vortex-SMP/minecraft-mcp-docs
path: minecraft-mcp-docs
token: ${{ secrets.DOCS_REPO_TOKEN }}
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"
cache-dependency-path: minecraft-mcp-scraper/requirements.txt
- name: Install dependencies
working-directory: minecraft-mcp-scraper
run: pip install -r requirements.txt
- name: Run scrapers
if: ${{ github.event.inputs.scraper != 'index_only' }}
working-directory: minecraft-mcp-scraper
run: |
SCRAPER="${{ github.event.inputs.scraper || 'all' }}"
if [ "$SCRAPER" = "plugins" ]; then
python run_plugins.py
else
python run.py --scraper "$SCRAPER" --no-index
fi
- name: Run plugin scrapers (on full run)
if: ${{ github.event.inputs.scraper == 'all' || github.event.inputs.scraper == '' }}
working-directory: minecraft-mcp-scraper
run: python run_plugins.py
- name: Build SQLite index
working-directory: minecraft-mcp-scraper
run: python run.py --scraper index_only
- name: Check for changes
id: changes
working-directory: minecraft-mcp-docs
run: |
git add -A
if git diff --staged --quiet; then
echo "has_changes=false" >> $GITHUB_OUTPUT
echo "No changes detected."
else
echo "has_changes=true" >> $GITHUB_OUTPUT
# Build changelog from git diff stats
STATS=$(git diff --staged --stat | tail -1)
echo "stats=$STATS" >> $GITHUB_OUTPUT
# List changed sources
CHANGED=$(git diff --staged --name-only | grep -oP 'sources/[^/]+' | sort -u | tr '\n' ', ' | sed 's/,$//')
echo "changed_sources=$CHANGED" >> $GITHUB_OUTPUT
fi
- name: Create Pull Request
if: steps.changes.outputs.has_changes == 'true'
working-directory: minecraft-mcp-docs
env:
GH_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }}
run: |
BRANCH="scraper/update-$(date -u '+%Y%m%d-%H%M')"
SCRAPER="${{ github.event.inputs.scraper || 'all' }}"
DATE=$(date -u '+%Y-%m-%d %H:%M UTC')
STATS="${{ steps.changes.outputs.stats }}"
SOURCES="${{ steps.changes.outputs.changed_sources }}"
git config user.name "VortexSMP-bot"
git config user.email "bot@vortex-smp.fr"
git checkout -b "$BRANCH"
git commit -m "chore: update scraped docs [$DATE]"
git push origin "$BRANCH"
# Create PR with detailed body
gh pr create \
--repo Vortex-SMP/minecraft-mcp-docs \
--title "📚 Docs update — $DATE" \
--body "## Automated documentation update
**Triggered by:** \`$SCRAPER\` scraper
**Date:** $DATE
**Changes:** $STATS
### Updated sources
$SOURCES
### What changed
$(git diff HEAD~1 --stat | head -30)
---
*This PR was automatically created by the [minecraft-mcp-scraper](https://github.com/Vortex-SMP/minecraft-mcp-scraper) workflow.*
*Review and merge if the changes look correct.*" \
--base main \
--head "$BRANCH"
- name: No changes
if: steps.changes.outputs.has_changes == 'false'
run: echo "✓ Documentation is already up to date."
- name: Notify Discord on failure
if: failure()
run: |
# Uncomment and set DISCORD_WEBHOOK in repo secrets to enable
# curl -X POST "${{ secrets.DISCORD_WEBHOOK }}" \
# -H "Content-Type: application/json" \
# -d "{\"content\": \"⚠️ minecraft-mcp scraper failed! See: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\"}"
echo "::error::Scraper failed."