From 2554e59832cbb77dc69975ad9675866de4015807 Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Fri, 13 Oct 2023 10:55:01 -0400 Subject: [PATCH] wip: check links --- CHANGELOG.rst | 2 +- docs/commands.rst | 3 ++- src/scriv/ghrel.py | 10 +++++++++ src/scriv/linkcheck.py | 49 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 src/scriv/linkcheck.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ae5d302..46dfa83 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -33,7 +33,7 @@ Added Thanks `Javier Sagredo `_. - Use the git config ``scriv.user_nick`` for the user nick part - of the fragment file. Thanks to `Ronny Pfannschmidt `_, + of the fragment file. Thanks to `Ronny Pfannschmidt `_, fixing `issue 103`_. - Settings can now be prefixed with ``command:`` to execute the rest of the diff --git a/docs/commands.rst b/docs/commands.rst index 627722a..c18e7d5 100644 --- a/docs/commands.rst +++ b/docs/commands.rst @@ -188,12 +188,13 @@ scriv github-release Options: --all Use all of the changelog entries. + --check-links Check that links are valid (EXPERIMENTAL). --dry-run Don't post to GitHub, just show what would be done. --repo TEXT The GitHub repo (owner/reponame) to create the release in. -v, --verbosity LVL Either CRITICAL, ERROR, WARNING, INFO or DEBUG --help Show this message and exit. -.. [[[end]]] (checksum: eaf0f9e06575bf06499354b22928696b) +.. [[[end]]] (checksum: 8c9e189a0429de474313b13afc36612f) The ``github-release`` command reads the changelog file, parses it into entries, and then creates or updates GitHub releases to match. Only the most diff --git a/src/scriv/ghrel.py b/src/scriv/ghrel.py index dfb79ee..a8e861b 100644 --- a/src/scriv/ghrel.py +++ b/src/scriv/ghrel.py @@ -11,6 +11,7 @@ from .github import create_release, get_releases, update_release from .gitinfo import get_github_repos +from .linkcheck import check_markdown_links from .scriv import Scriv from .shell import run_simple_command from .util import Version @@ -25,6 +26,11 @@ is_flag=True, help="Use all of the changelog entries.", ) +@click.option( + "--check-links", + is_flag=True, + help="Check that links are valid (EXPERIMENTAL).", +) @click.option( "--dry-run", is_flag=True, @@ -37,6 +43,7 @@ @click_log.simple_verbosity_option() def github_release( all_entries: bool, + check_links: bool, dry_run: bool, repo: Optional[str] = None, ) -> None: @@ -100,6 +107,9 @@ def github_release( ) release_data["body"] = md + if check_links: + check_markdown_links(md) + if version in releases: release = releases[version] if release["body"] != md: diff --git a/src/scriv/linkcheck.py b/src/scriv/linkcheck.py new file mode 100644 index 0000000..87ef489 --- /dev/null +++ b/src/scriv/linkcheck.py @@ -0,0 +1,49 @@ +"""Extracting and checking links.""" + +import concurrent.futures +import logging +from typing import Iterable + +import markdown_it +import requests + + +logger = logging.getLogger(__name__) + + +def find_links(markdown_text: str) -> Iterable[str]: + def walk_tokens(tokens): + for token in tokens: + if token.type == "link_open": + yield token.attrs["href"] + if token.children: + yield from walk_tokens(token.children) + + yield from walk_tokens(markdown_it.MarkdownIt().parse(markdown_text)) + + +def check_markdown_links(markdown_text: str) -> None: + links = set(find_links(markdown_text)) + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + # Start the load operations and mark each future with its URL + futures = [executor.submit(check_one_link, url) for url in links] + concurrent.futures.wait(futures) + + +def check_one_link(url): + while True: + try: + resp = requests.head(url, timeout=60, allow_redirects=True) + except requests.RequestException as exc: + logger.warning(f"Failed check for {url!r}: {exc}") + return + if resp.status_code == 429: + wait = int(resp.headers.get("Retry-After", 10)) + time.sleep(wait + 1) + else: + break + + if resp.status_code == 200: + logger.debug(f"OK link: {url!r}") + else: + logger.warning(f"Failed check for {url!r}: status code {resp.status_code}")