From 268b21fc6c4eca78447f16d1a1ab0b561337426c Mon Sep 17 00:00:00 2001
From: Julian Berman <Julian@GrayVines.com>
Date: Tue, 16 Aug 2022 11:47:57 +0300
Subject: [PATCH] Update the documentation interlinks to point to 2020-12.

---
 sphinx_json_schema_spec/__init__.py | 77 ++++++++++++++++-------------
 1 file changed, 42 insertions(+), 35 deletions(-)

diff --git a/sphinx_json_schema_spec/__init__.py b/sphinx_json_schema_spec/__init__.py
index 72c6447..c749fb4 100644
--- a/sphinx_json_schema_spec/__init__.py
+++ b/sphinx_json_schema_spec/__init__.py
@@ -1,20 +1,26 @@
 from contextlib import suppress
 from datetime import datetime
 from importlib import metadata
+from pathlib import Path
 from urllib.parse import urljoin
-import os
 import urllib.request
 
 from docutils import nodes
 from lxml import html
 import certifi
 
-__version__ = "1.2.0"
+__version__ = "2.0.0"
 
-BASE_URL = "https://json-schema.org/draft-07/"
-VALIDATION_SPEC = urljoin(BASE_URL, "json-schema-validation.html")
-REF_URL = urljoin(BASE_URL, "json-schema-core.html#rfc.section.8.3")
-SCHEMA_URL = urljoin(BASE_URL, "json-schema-core.html#rfc.section.7")
+BASE_URL = "https://json-schema.org/draft/2020-12/"
+VOCABULARIES = {
+    "core": urljoin(BASE_URL, "json-schema-core.html"),
+    "validation": urljoin(BASE_URL, "json-schema-validation.html"),
+}
+HARDCODED = {
+    "$ref": "https://json-schema.org/draft/2020-12/json-schema-core.html#ref",
+    "$schema": "https://json-schema.org/draft/2020-12/json-schema-core.html#name-the-schema-keyword",                # noqa: E501
+    "format": "https://json-schema.org/draft/2020-12/json-schema-validation.html#name-implementation-requirements",  # noqa: E501
+}
 
 
 def setup(app):
@@ -30,24 +36,31 @@ def setup(app):
 
     app.add_config_value("cache_path", "_cache", "")
 
-    os.makedirs(app.config.cache_path, exist_ok=True)
+    CACHE = Path(app.config.cache_path)
+    CACHE.mkdir(exist_ok=True)
 
-    path = os.path.join(app.config.cache_path, "spec.html")
-    spec = fetch_or_load(path)
-    app.add_role("kw", docutils_does_not_allow_using_classes(spec))
+    documents = {
+        url: fetch_or_load(vocabulary_path=CACHE / f"{name}.html", url=url)
+        for name, url in VOCABULARIES.items()
+    }
+    app.add_role("kw", docutils_does_not_allow_using_classes(documents))
 
     return dict(version=__version__, parallel_read_safe=True)
 
 
-def fetch_or_load(spec_path):
+def fetch_or_load(vocabulary_path, url):
     """
     Fetch a new specification or use the cache if it's current.
 
     Arguments:
 
-        cache_path:
+        vocabulary_path:
+
+            the local path to a cached vocabulary document
+
+        url:
 
-            the path to a cached specification
+            the URL of the vocabulary document
     """
 
     headers = {
@@ -58,24 +71,23 @@ def fetch_or_load(spec_path):
     }
 
     with suppress(FileNotFoundError):
-        modified = datetime.utcfromtimestamp(os.path.getmtime(spec_path))
+        modified = datetime.utcfromtimestamp(vocabulary_path.stat().st_mtime)
         date = modified.strftime("%a, %d %b %Y %I:%M:%S UTC")
         headers["If-Modified-Since"] = date
 
-    request = urllib.request.Request(VALIDATION_SPEC, headers=headers)
+    request = urllib.request.Request(url, headers=headers)
     response = urllib.request.urlopen(request, cafile=certifi.where())
 
     if response.code == 200:
-        with open(spec_path, "w+b") as spec:
+        with vocabulary_path.open("w+b") as spec:
             spec.writelines(response)
             spec.seek(0)
-            return html.parse(spec)
+            return html.parse(spec).getroot()
 
-    with open(spec_path) as spec:
-        return html.parse(spec)
+    return html.parse(vocabulary_path.read_bytes()).getroot()
 
 
-def docutils_does_not_allow_using_classes(spec):
+def docutils_does_not_allow_using_classes(vocabularies):
     """
     Yeah.
 
@@ -118,27 +130,22 @@ def keyword(name, raw_text, text, lineno, inliner):
                 iterable of system messages, both possibly empty
         """
 
-        if text == "$ref":
-            return [nodes.reference(raw_text, text, refuri=REF_URL)], []
-        elif text == "$schema":
-            return [nodes.reference(raw_text, text, refuri=SCHEMA_URL)], []
+        hardcoded = HARDCODED.get(text)
+        if hardcoded is not None:
+            return [nodes.reference(raw_text, text, refuri=hardcoded)], []
 
         # find the header in the validation spec containing matching text
-        header = spec.xpath("//h1[contains(text(), '{0}')]".format(text))
+        for vocabulary_url, spec in vocabularies.items():
+            header = spec.get_element_by_id(f"name-{text.lower()}", None)
 
-        if len(header) == 0:
+            if header is not None:
+                uri = urljoin(vocabulary_url, header.find("a").attrib["href"])
+                break
+        else:
             inliner.reporter.warning(
                 "Didn't find a target for {0}".format(text),
             )
-            uri = VALIDATION_SPEC
-        else:
-            if len(header) > 1:
-                inliner.reporter.info(
-                    "Found multiple targets for {0}".format(text),
-                )
-
-            # get the href from link in the header
-            uri = urljoin(VALIDATION_SPEC, header[0].find("a").attrib["href"])
+            uri = BASE_URL
 
         reference = nodes.reference(raw_text, text, refuri=uri)
         return [reference], []