Skip to content

Commit

Permalink
wip: check links
Browse files Browse the repository at this point in the history
  • Loading branch information
Ned Batchelder committed Oct 13, 2023
1 parent 41582e1 commit 2554e59
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 2 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Added
Thanks `Javier Sagredo <pull 91_>`_.

- Use the git config ``scriv.user_nick`` for the user nick part
of the fragment file. Thanks to `Ronny Pfannschmidt <pull 106_>`_,
of the fragment file. Thanks to `Ronny Pfannschmidt <pull 106>`_,
fixing `issue 103`_.

- Settings can now be prefixed with ``command:`` to execute the rest of the
Expand Down
3 changes: 2 additions & 1 deletion docs/commands.rst
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,13 @@ scriv github-release
Options:
--all Use all of the changelog entries.
--check-links Check that links are valid (EXPERIMENTAL).
--dry-run Don't post to GitHub, just show what would be done.
--repo TEXT The GitHub repo (owner/reponame) to create the
release in.
-v, --verbosity LVL Either CRITICAL, ERROR, WARNING, INFO or DEBUG
--help Show this message and exit.
.. [[[end]]] (checksum: eaf0f9e06575bf06499354b22928696b)
.. [[[end]]] (checksum: 8c9e189a0429de474313b13afc36612f)
The ``github-release`` command reads the changelog file, parses it into
entries, and then creates or updates GitHub releases to match. Only the most
Expand Down
10 changes: 10 additions & 0 deletions src/scriv/ghrel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from .github import create_release, get_releases, update_release
from .gitinfo import get_github_repos
from .linkcheck import check_markdown_links
from .scriv import Scriv
from .shell import run_simple_command
from .util import Version
Expand All @@ -25,6 +26,11 @@
is_flag=True,
help="Use all of the changelog entries.",
)
@click.option(
"--check-links",
is_flag=True,
help="Check that links are valid (EXPERIMENTAL).",
)
@click.option(
"--dry-run",
is_flag=True,
Expand All @@ -37,6 +43,7 @@
@click_log.simple_verbosity_option()
def github_release(
all_entries: bool,
check_links: bool,
dry_run: bool,
repo: Optional[str] = None,
) -> None:
Expand Down Expand Up @@ -100,6 +107,9 @@ def github_release(
)
release_data["body"] = md

if check_links:
check_markdown_links(md)

if version in releases:
release = releases[version]
if release["body"] != md:
Expand Down
49 changes: 49 additions & 0 deletions src/scriv/linkcheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Extracting and checking links."""

import concurrent.futures
import logging
from typing import Iterable

import markdown_it
import requests


logger = logging.getLogger(__name__)


def find_links(markdown_text: str) -> Iterable[str]:
def walk_tokens(tokens):
for token in tokens:
if token.type == "link_open":
yield token.attrs["href"]
if token.children:
yield from walk_tokens(token.children)

yield from walk_tokens(markdown_it.MarkdownIt().parse(markdown_text))


def check_markdown_links(markdown_text: str) -> None:
links = set(find_links(markdown_text))
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
# Start the load operations and mark each future with its URL
futures = [executor.submit(check_one_link, url) for url in links]
concurrent.futures.wait(futures)


def check_one_link(url):
while True:
try:
resp = requests.head(url, timeout=60, allow_redirects=True)
except requests.RequestException as exc:
logger.warning(f"Failed check for {url!r}: {exc}")
return
if resp.status_code == 429:
wait = int(resp.headers.get("Retry-After", 10))
time.sleep(wait + 1)
else:
break

if resp.status_code == 200:
logger.debug(f"OK link: {url!r}")
else:
logger.warning(f"Failed check for {url!r}: status code {resp.status_code}")

0 comments on commit 2554e59

Please sign in to comment.