diff --git a/docs/source/clean_autodocs.py b/docs/source/clean_autodocs.py index 7225253..d9147d8 100644 --- a/docs/source/clean_autodocs.py +++ b/docs/source/clean_autodocs.py @@ -6,6 +6,12 @@ 2. Remove hidden modules (configured in conf.py) 3. Update toctree entries to use short display names in the sidebar while keeping full module paths as page titles +4. Replace automodule directives with autosummary tables (optional) +5. Rename page headings by stripping "package"/"module" suffixes (optional) +6. Inject :titlesonly: into toctree directives (optional) + +All steps are individually controlled via flags in conf.py — see the +feature-flag block there for the full list and defaults. """ import importlib @@ -14,18 +20,30 @@ import types from pathlib import Path -# Ensure the project source tree takes precedence over any installed version of -# the package so that __all__ reflects the current source code. -_SRC_DIR = str(Path(__file__).resolve().parent.parent.parent / "src") -if _SRC_DIR not in sys.path: - sys.path.insert(0, _SRC_DIR) +# Ensure conf.py's directory is on sys.path so it can be imported when this +# script is run directly from any working directory. +_DOCS_SOURCE_DIR = str(Path(__file__).resolve().parent) +if _DOCS_SOURCE_DIR not in sys.path: + sys.path.insert(0, _DOCS_SOURCE_DIR) -# Import configuration from conf.py +# Import configuration from conf.py. +# conf.py owns the sys.path setup for the project src directory. import conf DOCS_DIR = Path(__file__).parent -AUTODOCS_DIR = DOCS_DIR / "autodocs" -MODULE_PREFIX = "earthkit.transforms" +AUTODOCS_DIR = DOCS_DIR / getattr(conf, "autodocs_dir", "autodocs") +MODULE_PREFIX: str = conf.module_prefix # e.g., 'earthkit.transforms' + +# --------------------------------------------------------------------------- +# Feature flags — read from conf.py with backward-compatible defaults +# --------------------------------------------------------------------------- +_delete_hidden: bool = getattr(conf, "autodocs_delete_hidden", True) +_replace_automodule: bool = getattr(conf, "autodocs_replace_automodule", True) +_short_display_names: bool = getattr(conf, "autodocs_short_display_names", True) +_top_level_maxdepth: int | None = getattr(conf, "autodocs_top_level_maxdepth", 1) +_rename_titles: bool = getattr(conf, "autodocs_rename_titles", False) +_top_level_title: str = getattr(conf, "autodocs_top_level_title", "API Reference") +_titlesonly: bool = getattr(conf, "autodocs_titlesonly", False) def get_hidden_modules() -> list[str]: @@ -108,6 +126,63 @@ def get_module_api(module_name: str) -> list[str]: return [] +def _is_rst_underline(line: str) -> bool: + """Return True if *line* looks like an RST heading underline.""" + s = line.strip() + return len(s) >= 2 and len(set(s)) == 1 and s[0] in "=-~^*+#" + + +def rename_titles(content: str) -> str: + """Rename RST page headings generated by sphinx-apidoc. + + - Replaces the top-level package heading (``MODULE_PREFIX package``) with + ``_top_level_title``. + - Strips the " package" / " module" suffix from all other headings. + + The RST underline is resized to match the new title length. + + Args: + content: RST file content. + + Returns: + Updated RST content. + + """ + lines = content.split("\n") + result: list[str] = [] + i = 0 + + while i < len(lines): + line = lines[i] + stripped = line.strip() + + # Only attempt title processing when the next line is an RST underline. + if i + 1 < len(lines) and _is_rst_underline(lines[i + 1]): + underline_char = lines[i + 1].strip()[0] + + # Top-level package/module heading → custom title. + if re.match(rf"^{re.escape(MODULE_PREFIX)}\s+(package|module)$", stripped): + new_title = _top_level_title + result.append(new_title) + result.append(underline_char * len(new_title)) + i += 2 + continue + + # Submodule/subpackage heading → strip " package"/" module" suffix. + match = re.match(r"^([\w\.\\]+)\s+(package|module)$", stripped) + if match: + new_title = match.group(1) + result.append(new_title) + result.append(underline_char * len(new_title)) + i += 2 + continue + + result.append(line) + i += 1 + + return "\n".join(result) + + def replace_automodule_with_autosummary(content: str, module_name: str) -> str: """Replace the automodule :members: block with an autosummary table. @@ -164,87 +239,119 @@ def replace_automodule_with_autosummary(content: str, module_name: str) -> str: return content -def clean_toctree(content: str, hidden_modules: list[str], max_depth: int | None = None) -> str: +def clean_toctree( + content: str, + hidden_modules: list[str], + max_depth: int | None = None, + short_names: bool = True, + titlesonly: bool = False, +) -> str: """Clean up toctree entries in RST content. - Removes entries for hidden/private modules - - Updates entries to use short display names: 'temporal ' - - Optionally overrides :maxdepth: to control how deep the TOC is rendered on the page + - Optionally rewrites entries to use short display names: + ``temporal `` + - Optionally overrides :maxdepth: to control how deep the TOC renders + - Optionally injects :titlesonly: when absent Args: content: RST file content hidden_modules: List of module names to hide max_depth: If set, replaces :maxdepth: in every toctree with this number. - This only affects the TOC rendering depth on the page; all linked pages - are still fully built and reachable. + This only affects rendering depth; all linked pages are still built. + short_names: If True, rewrite toctree entries to use the short module + name as the display label. + titlesonly: If True, inject ``:titlesonly:`` into toctrees that lack it. Returns: Updated RST content """ lines = content.split("\n") - result_lines = [] + result_lines: list[str] = [] in_toctree = False + in_options_block = False # True while we are still in the options section + toctree_has_titlesonly = False for line in lines: # Detect start of toctree directive if ".. toctree::" in line: in_toctree = True + in_options_block = True + toctree_has_titlesonly = False result_lines.append(line) continue - if in_toctree: - # Check if we're still in the toctree (indented content or empty line) - stripped = line.lstrip() - current_indent = line[: len(line) - len(stripped)] + if not in_toctree: + result_lines.append(line) + continue - # If line is not empty and not indented, we've left the toctree - if stripped and not line.startswith(" ") and not line.startswith("\t"): - in_toctree = False - result_lines.append(line) - continue + # ---- Inside a toctree ---- + stripped = line.lstrip() + current_indent = line[: len(line) - len(stripped)] if stripped else " " + + # Non-indented, non-empty line → we have left the toctree + if stripped and not line.startswith((" ", "\t")): + in_toctree = False + in_options_block = False + result_lines.append(line) + continue - # Skip empty lines within toctree - if not stripped: + # Blank line — marks the end of the options block + if not stripped: + if in_options_block: + in_options_block = False + if titlesonly and not toctree_has_titlesonly: + result_lines.append(" :titlesonly:") + result_lines.append(line) + continue + + # Option line (starts with ':') + if stripped.startswith(":"): + if stripped.startswith(":titlesonly:"): + toctree_has_titlesonly = True + if max_depth is not None and stripped.startswith(":maxdepth:"): + result_lines.append(f"{current_indent}:maxdepth: {max_depth}") + else: result_lines.append(line) - continue + continue - # Check if this is a toctree option (starts with :) - if stripped.startswith(":"): - if max_depth is not None and stripped.startswith(":maxdepth:"): - result_lines.append(f"{current_indent}:maxdepth: {max_depth}") - else: - result_lines.append(line) - continue + # First entry line with no preceding blank separator — close options block + if in_options_block: + in_options_block = False + if titlesonly and not toctree_has_titlesonly: + result_lines.append(" :titlesonly:") - # This is a toctree entry - extract the module name - # Entries can be in format: "module.name" or "display " - match = re.match(r"^\s*(?:.*\s+<)?([^<>\s]+)>?\s*$", line) - if match: - module_name = match.group(1) + # Toctree entry — may be "module.name" or "display " + match = re.match(r"^\s*(?:.*\s+<)?([^<>\s]+)>?\s*$", line) + if match: + module_name = match.group(1) - # Skip hidden modules - if should_hide_module(module_name, hidden_modules): - continue + # Skip hidden modules + if should_hide_module(module_name, hidden_modules): + continue - # Create short display name entry + # Rewrite to short display name + if short_names: short_name = get_short_name(module_name) if short_name != module_name: result_lines.append(f"{current_indent}{short_name} <{module_name}>") - else: - result_lines.append(line) - else: - result_lines.append(line) - else: - result_lines.append(line) + continue + + result_lines.append(line) return "\n".join(result_lines) -def clean_autodocs(): +def clean_autodocs() -> None: """Clean up autodocs RST files.""" hidden_modules = get_hidden_modules() print(f"Hidden modules: {hidden_modules}") + print( + f"Flags: delete_hidden={_delete_hidden}, replace_automodule={_replace_automodule}, " + f"short_display_names={_short_display_names}, top_level_maxdepth={_top_level_maxdepth}, " + f"rename_titles={_rename_titles}, titlesonly={_titlesonly}" + ) if not AUTODOCS_DIR.exists(): print(f"Autodocs directory not found: {AUTODOCS_DIR}") @@ -260,20 +367,35 @@ def clean_autodocs(): for rst_file in rst_files: module_name = rst_file.stem # e.g., 'earthkit.transforms.temporal' - # Delete files for hidden/private modules - if should_hide_module(module_name, hidden_modules): + # --- Step 1: delete hidden/private modules ------------------------- + if _delete_hidden and should_hide_module(module_name, hidden_modules): print(f"Deleting: {rst_file.name}") rst_file.unlink() files_deleted += 1 continue - # Clean up toctree entries and replace automodule with autosummary content = rst_file.read_text() - # Limit the toctree depth on the top-level package page to 1 so it only - # lists subpackage names. All child pages are still fully built. - toctree_depth = 1 if module_name == MODULE_PREFIX else None - new_content = clean_toctree(content, hidden_modules, max_depth=toctree_depth) - new_content = replace_automodule_with_autosummary(new_content, module_name) + new_content = content + + # --- Step 2: clean toctree entries --------------------------------- + # Limit the toctree depth on the top-level package page to + # _top_level_maxdepth so it only lists subpackage names. + toctree_depth = _top_level_maxdepth if module_name == MODULE_PREFIX else None + new_content = clean_toctree( + new_content, + hidden_modules, + max_depth=toctree_depth, + short_names=_short_display_names, + titlesonly=_titlesonly, + ) + + # --- Step 3: replace automodule with autosummary ------------------ + if _replace_automodule: + new_content = replace_automodule_with_autosummary(new_content, module_name) + + # --- Step 4: rename page headings --------------------------------- + if _rename_titles: + new_content = rename_titles(new_content) if new_content != content: rst_file.write_text(new_content) diff --git a/docs/source/conf.py b/docs/source/conf.py index 932f3bd..fd19e3a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -30,9 +30,14 @@ else: source_branch = "main" -sys.path.insert(0, os.path.abspath("../../src")) +src_path = os.path.normpath( + os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "src") +) +sys.path.insert(0, src_path) project = "earthkit-transforms" +module_prefix = project.replace("-", ".") +autodocs_dir = "autodocs" copyright = f"{datetime.datetime.now().year}, European Centre for Medium-Range Weather Forecasts (ECMWF)" author = "European Centre for Medium-Range Weather Forecasts (ECMWF)" @@ -118,6 +123,16 @@ "version" ] +# clean_autodocs.py feature flags +# Set to False/None to disable or soften the corresponding processing step. +autodocs_delete_hidden = True # delete RST files for private/hidden modules +autodocs_replace_automodule = True # replace automodule directives with autosummary tables +autodocs_short_display_names = True # shorten toctree labels to the last module component +autodocs_top_level_maxdepth = 1 # :maxdepth: on top-level page (None = keep sphinx-apidoc value) +autodocs_rename_titles = False # strip " package"/" module" from RST page headings +autodocs_top_level_title = "API Reference" # top-level page heading (used when rename_titles=True) +autodocs_titlesonly = False # inject :titlesonly: into toctree directives + # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path.