Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 179 additions & 57 deletions docs/source/clean_autodocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
2. Remove hidden modules (configured in conf.py)
3. Update toctree entries to use short display names in the sidebar
while keeping full module paths as page titles
4. Replace automodule directives with autosummary tables (optional)
5. Rename page headings by stripping "package"/"module" suffixes (optional)
6. Inject :titlesonly: into toctree directives (optional)

All steps are individually controlled via flags in conf.py — see the
feature-flag block there for the full list and defaults.
"""

import importlib
Expand All @@ -14,18 +20,30 @@
import types
from pathlib import Path

# Ensure the project source tree takes precedence over any installed version of
# the package so that __all__ reflects the current source code.
_SRC_DIR = str(Path(__file__).resolve().parent.parent.parent / "src")
if _SRC_DIR not in sys.path:
sys.path.insert(0, _SRC_DIR)
# Ensure conf.py's directory is on sys.path so it can be imported when this
# script is run directly from any working directory.
_DOCS_SOURCE_DIR = str(Path(__file__).resolve().parent)
if _DOCS_SOURCE_DIR not in sys.path:
sys.path.insert(0, _DOCS_SOURCE_DIR)

# Import configuration from conf.py
# Import configuration from conf.py.
# conf.py owns the sys.path setup for the project src directory.
import conf

DOCS_DIR = Path(__file__).parent
AUTODOCS_DIR = DOCS_DIR / "autodocs"
MODULE_PREFIX = "earthkit.transforms"
AUTODOCS_DIR = DOCS_DIR / getattr(conf, "autodocs_dir", "autodocs")
MODULE_PREFIX: str = conf.module_prefix # e.g., 'earthkit.transforms'

# ---------------------------------------------------------------------------
# Feature flags — read from conf.py with backward-compatible defaults
# ---------------------------------------------------------------------------
_delete_hidden: bool = getattr(conf, "autodocs_delete_hidden", True)
_replace_automodule: bool = getattr(conf, "autodocs_replace_automodule", True)
_short_display_names: bool = getattr(conf, "autodocs_short_display_names", True)
_top_level_maxdepth: int | None = getattr(conf, "autodocs_top_level_maxdepth", 1)
_rename_titles: bool = getattr(conf, "autodocs_rename_titles", False)
_top_level_title: str = getattr(conf, "autodocs_top_level_title", "API Reference")
_titlesonly: bool = getattr(conf, "autodocs_titlesonly", False)


def get_hidden_modules() -> list[str]:
Expand Down Expand Up @@ -108,6 +126,63 @@ def get_module_api(module_name: str) -> list[str]:
return []


def _is_rst_underline(line: str) -> bool:
"""Return True if *line* looks like an RST heading underline."""
s = line.strip()
return len(s) >= 2 and len(set(s)) == 1 and s[0] in "=-~^*+#"


def rename_titles(content: str) -> str:
"""Rename RST page headings generated by sphinx-apidoc.

- Replaces the top-level package heading (``MODULE_PREFIX package``) with
``_top_level_title``.
- Strips the " package" / " module" suffix from all other headings.

The RST underline is resized to match the new title length.

Args:
content: RST file content.

Returns:
Updated RST content.

"""
lines = content.split("\n")
result: list[str] = []
i = 0

while i < len(lines):
line = lines[i]
stripped = line.strip()

# Only attempt title processing when the next line is an RST underline.
if i + 1 < len(lines) and _is_rst_underline(lines[i + 1]):
underline_char = lines[i + 1].strip()[0]

# Top-level package/module heading → custom title.
if re.match(rf"^{re.escape(MODULE_PREFIX)}\s+(package|module)$", stripped):
new_title = _top_level_title
result.append(new_title)
result.append(underline_char * len(new_title))
i += 2
continue

# Submodule/subpackage heading → strip " package"/" module" suffix.
match = re.match(r"^([\w\.\\]+)\s+(package|module)$", stripped)
if match:
new_title = match.group(1)
result.append(new_title)
result.append(underline_char * len(new_title))
i += 2
continue

result.append(line)
i += 1

return "\n".join(result)


def replace_automodule_with_autosummary(content: str, module_name: str) -> str:
"""Replace the automodule :members: block with an autosummary table.

Expand Down Expand Up @@ -164,87 +239,119 @@ def replace_automodule_with_autosummary(content: str, module_name: str) -> str:
return content


def clean_toctree(content: str, hidden_modules: list[str], max_depth: int | None = None) -> str:
def clean_toctree(
content: str,
hidden_modules: list[str],
max_depth: int | None = None,
short_names: bool = True,
titlesonly: bool = False,
) -> str:
"""Clean up toctree entries in RST content.

- Removes entries for hidden/private modules
- Updates entries to use short display names: 'temporal <earthkit.transforms.temporal>'
- Optionally overrides :maxdepth: to control how deep the TOC is rendered on the page
- Optionally rewrites entries to use short display names:
``temporal <earthkit.transforms.temporal>``
- Optionally overrides :maxdepth: to control how deep the TOC renders
- Optionally injects :titlesonly: when absent

Args:
content: RST file content
hidden_modules: List of module names to hide
max_depth: If set, replaces :maxdepth: in every toctree with this number.
This only affects the TOC rendering depth on the page; all linked pages
are still fully built and reachable.
This only affects rendering depth; all linked pages are still built.
short_names: If True, rewrite toctree entries to use the short module
name as the display label.
titlesonly: If True, inject ``:titlesonly:`` into toctrees that lack it.

Returns:
Updated RST content

"""
lines = content.split("\n")
result_lines = []
result_lines: list[str] = []
in_toctree = False
in_options_block = False # True while we are still in the options section
toctree_has_titlesonly = False

for line in lines:
# Detect start of toctree directive
if ".. toctree::" in line:
in_toctree = True
in_options_block = True
toctree_has_titlesonly = False
result_lines.append(line)
continue

if in_toctree:
# Check if we're still in the toctree (indented content or empty line)
stripped = line.lstrip()
current_indent = line[: len(line) - len(stripped)]
if not in_toctree:
result_lines.append(line)
continue

# If line is not empty and not indented, we've left the toctree
if stripped and not line.startswith(" ") and not line.startswith("\t"):
in_toctree = False
result_lines.append(line)
continue
# ---- Inside a toctree ----
stripped = line.lstrip()
current_indent = line[: len(line) - len(stripped)] if stripped else " "

# Non-indented, non-empty line → we have left the toctree
if stripped and not line.startswith((" ", "\t")):
in_toctree = False
in_options_block = False
result_lines.append(line)
continue

# Skip empty lines within toctree
if not stripped:
# Blank line — marks the end of the options block
if not stripped:
if in_options_block:
in_options_block = False
if titlesonly and not toctree_has_titlesonly:
result_lines.append(" :titlesonly:")
result_lines.append(line)
continue

# Option line (starts with ':')
if stripped.startswith(":"):
if stripped.startswith(":titlesonly:"):
toctree_has_titlesonly = True
if max_depth is not None and stripped.startswith(":maxdepth:"):
result_lines.append(f"{current_indent}:maxdepth: {max_depth}")
else:
result_lines.append(line)
continue
continue

# Check if this is a toctree option (starts with :)
if stripped.startswith(":"):
if max_depth is not None and stripped.startswith(":maxdepth:"):
result_lines.append(f"{current_indent}:maxdepth: {max_depth}")
else:
result_lines.append(line)
continue
# First entry line with no preceding blank separator — close options block
if in_options_block:
in_options_block = False
if titlesonly and not toctree_has_titlesonly:
result_lines.append(" :titlesonly:")

# This is a toctree entry - extract the module name
# Entries can be in format: "module.name" or "display <module.name>"
match = re.match(r"^\s*(?:.*\s+<)?([^<>\s]+)>?\s*$", line)
if match:
module_name = match.group(1)
# Toctree entry — may be "module.name" or "display <module.name>"
match = re.match(r"^\s*(?:.*\s+<)?([^<>\s]+)>?\s*$", line)
if match:
module_name = match.group(1)

# Skip hidden modules
if should_hide_module(module_name, hidden_modules):
continue
# Skip hidden modules
if should_hide_module(module_name, hidden_modules):
continue

# Create short display name entry
# Rewrite to short display name
if short_names:
short_name = get_short_name(module_name)
if short_name != module_name:
result_lines.append(f"{current_indent}{short_name} <{module_name}>")
else:
result_lines.append(line)
else:
result_lines.append(line)
else:
result_lines.append(line)
continue

result_lines.append(line)

return "\n".join(result_lines)


def clean_autodocs():
def clean_autodocs() -> None:
"""Clean up autodocs RST files."""
hidden_modules = get_hidden_modules()
print(f"Hidden modules: {hidden_modules}")
print(
f"Flags: delete_hidden={_delete_hidden}, replace_automodule={_replace_automodule}, "
f"short_display_names={_short_display_names}, top_level_maxdepth={_top_level_maxdepth}, "
f"rename_titles={_rename_titles}, titlesonly={_titlesonly}"
)

if not AUTODOCS_DIR.exists():
print(f"Autodocs directory not found: {AUTODOCS_DIR}")
Expand All @@ -260,20 +367,35 @@ def clean_autodocs():
for rst_file in rst_files:
module_name = rst_file.stem # e.g., 'earthkit.transforms.temporal'

# Delete files for hidden/private modules
if should_hide_module(module_name, hidden_modules):
# --- Step 1: delete hidden/private modules -------------------------
if _delete_hidden and should_hide_module(module_name, hidden_modules):
print(f"Deleting: {rst_file.name}")
rst_file.unlink()
files_deleted += 1
continue

# Clean up toctree entries and replace automodule with autosummary
content = rst_file.read_text()
# Limit the toctree depth on the top-level package page to 1 so it only
# lists subpackage names. All child pages are still fully built.
toctree_depth = 1 if module_name == MODULE_PREFIX else None
new_content = clean_toctree(content, hidden_modules, max_depth=toctree_depth)
new_content = replace_automodule_with_autosummary(new_content, module_name)
new_content = content

# --- Step 2: clean toctree entries ---------------------------------
# Limit the toctree depth on the top-level package page to
# _top_level_maxdepth so it only lists subpackage names.
toctree_depth = _top_level_maxdepth if module_name == MODULE_PREFIX else None
new_content = clean_toctree(
new_content,
hidden_modules,
max_depth=toctree_depth,
short_names=_short_display_names,
titlesonly=_titlesonly,
)

# --- Step 3: replace automodule with autosummary ------------------
if _replace_automodule:
new_content = replace_automodule_with_autosummary(new_content, module_name)

# --- Step 4: rename page headings ---------------------------------
if _rename_titles:
new_content = rename_titles(new_content)

if new_content != content:
rst_file.write_text(new_content)
Expand Down
17 changes: 16 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,14 @@
else:
source_branch = "main"

sys.path.insert(0, os.path.abspath("../../src"))
src_path = os.path.normpath(
os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "src")
)
sys.path.insert(0, src_path)

project = "earthkit-transforms"
module_prefix = project.replace("-", ".")
autodocs_dir = "autodocs"

copyright = f"{datetime.datetime.now().year}, European Centre for Medium-Range Weather Forecasts (ECMWF)"
author = "European Centre for Medium-Range Weather Forecasts (ECMWF)"
Expand Down Expand Up @@ -118,6 +123,16 @@
"version"
]

# clean_autodocs.py feature flags
# Set to False/None to disable or soften the corresponding processing step.
autodocs_delete_hidden = True # delete RST files for private/hidden modules
autodocs_replace_automodule = True # replace automodule directives with autosummary tables
autodocs_short_display_names = True # shorten toctree labels to the last module component
autodocs_top_level_maxdepth = 1 # :maxdepth: on top-level page (None = keep sphinx-apidoc value)
autodocs_rename_titles = False # strip " package"/" module" from RST page headings
autodocs_top_level_title = "API Reference" # top-level page heading (used when rename_titles=True)
autodocs_titlesonly = False # inject :titlesonly: into toctree directives

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
Expand Down
Loading