Skip to content

Commit

Permalink
docs: add scripts for notebook conversion (#3406)
Browse files Browse the repository at this point in the history
* Update notebook conversion code
* Convert one more file

---------

Co-authored-by: Ben Burns <[email protected]>
  • Loading branch information
eyurtsev and benjamincburns authored Feb 13, 2025
1 parent 65976f3 commit b310ce0
Show file tree
Hide file tree
Showing 13 changed files with 1,234 additions and 475 deletions.
97 changes: 75 additions & 22 deletions docs/_scripts/generate_api_reference_links.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import importlib
from importlib.machinery import ModuleSpec
import importlib.util
import inspect
import logging
import re
from functools import lru_cache
import sys
from typing import List, Literal, Optional

from typing_extensions import TypedDict
Expand Down Expand Up @@ -72,9 +75,8 @@ def _make_regular_expression(pkg_prefix: str) -> re.Pattern:
if not pkg_prefix.isidentifier():
raise ValueError(f"Invalid package prefix: {pkg_prefix}")
return re.compile(
r"from\s+(" + pkg_prefix + "(?:_\w+)?(?:\.\w+)*?)\s+import\s+"
r"((?:\w+(?:,\s*)?)*" # Match zero or more words separated by a comma+optional ws
r"(?:\s*\(.*?\))?)", # Match optional parentheses block
r"from\s+(" + pkg_prefix + r"(?:_\w+)?(?:\.\w+)*?)\s+import\s+\(?"
r"((?:\w+(?:,\s*)?)*)\s*\)?", # Match zero or more words separated by a comma+optional ws
re.DOTALL, # Match newlines as well
)

Expand All @@ -85,22 +87,57 @@ def _make_regular_expression(pkg_prefix: str) -> re.Pattern:


@lru_cache(maxsize=10_000)
def _get_full_module_name(module_path: str, class_name: str) -> Optional[str]:
def _get_full_module_name(
module_path: str, class_name: str | None, doc_title: str
) -> Optional[str]:
"""Get full module name using inspect, with LRU cache to memoize results."""
try:
module = importlib.import_module(module_path)
class_ = getattr(module, class_name)
module = inspect.getmodule(class_)
if module is None:
# For constants, inspect.getmodule() might return None
# In this case, we'll return the original module_path
return module_path
if module_path in sys.modules:
module = sys.modules[module_path]
else:
spec: ModuleSpec | None = importlib.util.find_spec(module_path)
if spec is not None:
module = importlib.util.module_from_spec(spec)
sys.modules[module_path] = module
spec.loader.exec_module(module)

if class_name is not None:
class_ = getattr(module, class_name)

if re.match(r"\w+\s+as\s+\w+", class_name):
# Handle cases like "A as B"
class_name, _ = class_name.split(" as ")

module = inspect.getmodule(class_)
if module is None:
# For constants, inspect.getmodule() might return None
# In this case, we'll return the original module_path
return module_path
return module.__name__
except AttributeError as e:
logger.warning(f"API Reference: Could not find module for {class_name}, {e}")
if class_name is not None:
# the class_name might actually be a module
# e.g. from langchain import hub
# try to import it as a module, and if that doesn't work, throw
if class_name is not None:
module_name = _get_full_module_name(
f"{module_path}.{class_name}", None, doc_title
)
if module_name is not None:
# return the name of the parent module, rather than the name of the class as though it were a module
return module.__name__
logger.warning(
f"API Reference: Could not find module for {class_name} in {module_path}, imported in doc {doc_title}, {e}"
)
# don't log if we're trying to import the "hub" part as though it were a module
logger.warning(
f"API Reference: Could not find module for {module_path}, imported in doc {doc_title}, {e}"
)
return None
except ImportError as e:
logger.warning(f"API Reference: Failed to load for class {class_name}, {e}")
logger.warning(
f"API Reference: Failed to import module {module_path} {doc_title}, {e}"
)
return None


Expand Down Expand Up @@ -160,7 +197,22 @@ def _get_imports(
if imp.strip()
]
for class_name in imported_classes:
module_path = _get_full_module_name(module, class_name)
if module == "langchain_core.messages" and class_name == ")":
print("WARNING: ", file=sys.stderr)
print(
f"WARNING: Trying to import {class_name} from {module} in doc {doc_title}",
file=sys.stderr,
)
print("WARNING: ", file=sys.stderr)
print("WARNING:", import_match.group(0), file=sys.stderr)
print("WARNING: ", file=sys.stderr)
print(
"\n".join([f"WARNING: {line}" for line in code.splitlines()]),
file=sys.stderr,
)
print("WARNING: ", file=sys.stderr)

module_path = _get_full_module_name(module, class_name, doc_title)
if not module_path:
continue
if len(module_path.split(".")) < 2:
Expand Down Expand Up @@ -230,7 +282,7 @@ def get_imports(code: str, doc_title: str) -> List[ImportInformation]:
return all_imports


def update_markdown_with_imports(markdown: str) -> str:
def update_markdown_with_imports(markdown: str, file_name: str) -> str:
"""Update markdown to include API reference links for imports in Python code blocks.
This function scans the markdown content for Python code blocks, extracts any imports, and appends links to their API documentation.
Expand All @@ -250,7 +302,8 @@ def update_markdown_with_imports(markdown: str) -> str:
This function will append an API reference link to the `TextGenerator` class from the `langchain.nlp` module if it's recognized.
"""
code_block_pattern = re.compile(
r'(?P<indent>[ \t]*)```(?P<language>python|py)\n(?P<code>.*?)\n(?P=indent)```', re.DOTALL
r"(?P<indent>[ \t]*)```(?P<language>python|py)\n(?P<code>.*?)\n(?P=indent)```",
re.DOTALL,
)

def replace_code_block(match: re.Match) -> str:
Expand All @@ -262,23 +315,23 @@ def replace_code_block(match: re.Match) -> str:
Returns:
str: The modified code block with API reference links appended if applicable.
"""
indent = match.group('indent')
code_block = match.group('code')
language = match.group('language') # Preserve the language from the regex match
indent = match.group("indent")
code_block = match.group("code")
language = match.group("language") # Preserve the language from the regex match
# Retrieve import information from the code block
imports = get_imports(code_block, "__unused__")
imports = get_imports(code_block, file_name)

original_code_block = match.group(0)
# If no imports are found, return the original code block
if not imports:
return original_code_block

# Generate API reference links for each import
api_links = ' | '.join(
api_links = " | ".join(
f'<a href="{imp["docs"]}">{imp["imported"]}</a>' for imp in imports
)
# Return the code block with appended API reference links
return f'{original_code_block}\n\n{indent}API Reference: {api_links}'
return f"{original_code_block}\n\n{indent}API Reference: {api_links}"

# Apply the replace_code_block function to all matches in the markdown
updated_markdown = code_block_pattern.sub(replace_code_block, markdown)
Expand Down
17 changes: 13 additions & 4 deletions docs/_scripts/notebook_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import nbformat
from nbconvert.exporters import MarkdownExporter
from nbconvert.preprocessors import Preprocessor
import glob


class EscapePreprocessor(Preprocessor):
Expand Down Expand Up @@ -181,11 +182,19 @@ def _convert_notebooks(
raise ValueError("Either --output_dir or --replace must be specified")

output_dir_path = DOCS if replace else Path(output_dir)
notebooks = list(DOCS.rglob(pattern))

file_names = [notebook.name for notebook in notebooks]
# Get the directory where the script was executed
base_dir = os.getcwd()
# Build the full search pattern using the current working directory as the base
full_pattern = os.path.join(base_dir, args.pattern)

for notebook in notebooks:
# Use glob with recursive search enabled
matching_files = glob.glob(full_pattern, recursive=True)
paths = [Path(file) for file in matching_files]

file_names = [notebook.name for notebook in paths]

for notebook in paths:
markdown = convert_notebook(notebook, mode="exec")
markdown_path = output_dir_path / notebook.relative_to(DOCS).with_suffix(".md")
markdown_path.parent.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -215,7 +224,7 @@ def replace_link(match: re.Match) -> str:
return match.group(0)

# Process all markdown files in the output directory.
for path in output_dir_path.rglob("*.md"):
for path in output_dir_path.rglob("**/*.md"):
with open(path, "r", encoding="utf-8") as f:
content = f.read()
new_content = re.sub(link_pattern, replace_link, content)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
{%- if 'magics_language' in cell.metadata -%}
{{ cell.metadata.magics_language}}
{%- elif 'name' in nb.metadata.get('language_info', {}) -%}
{{ nb.metadata.language_info.name }} exec="on" source="above" session="1"
{{ nb.metadata.language_info.name }} exec="on" source="above" session="1" result="ansi"
{%- endif %}
{{ cell.source}}
```
Expand Down
7 changes: 3 additions & 4 deletions docs/_scripts/notebook_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def handle_vcr_setup(
id = _hash_string(code)

if session is not None and session != "":
logger.info(f"new session {session} on page {document_filename}")
logger.info(f"new {language} session {session} on page {document_filename}")

cassette_prefix = document_filename.replace(".md", "").replace(os.path.sep, "_")

Expand Down Expand Up @@ -239,8 +239,7 @@ def handle_vcr_teardown(
if document_filename is None:
logger.warning(f"no document filename found while tearing down {session}!")
else:
logger.info(f"tearing down {session} on {document_filename}")
logger.info(traceback.format_stack())
logger.info(f"tearing down {language} {session} on {document_filename}")

kwargs = dict(
code=code,
Expand Down Expand Up @@ -274,7 +273,7 @@ def _on_page_markdown_with_config(

# Append API reference links to code blocks
if add_api_references:
markdown = update_markdown_with_imports(markdown)
markdown = update_markdown_with_imports(markdown, page.file.src_path)
# Apply highlight comments to code blocks
markdown = _highlight_code_blocks(markdown)

Expand Down
2 changes: 1 addition & 1 deletion docs/docs/concepts/breakpoints.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ We recommend that you [**use the `interrupt` function instead**](#the-interrupt-

??? node "`NodeInterrupt` exception"

The developer can define some *condition* that must be met for a breakpoint to be triggered. This concept of [dynamic breakpoints](./low_level.md#dynamic-breakpoints) is useful when the developer wants to halt the graph under *a particular condition*. This uses a `NodeInterrupt`, which is a special type of exception that can be raised from within a node based upon some condition. As an example, we can define a dynamic breakpoint that triggers when the `input` is longer than 5 characters.
The developer can define some *condition* that must be met for a breakpoint to be triggered. This concept of _dynamic breakpoints_ is useful when the developer wants to halt the graph under *a particular condition*. This uses a `NodeInterrupt`, which is a special type of exception that can be raised from within a node based upon some condition. As an example, we can define a dynamic breakpoint that triggers when the `input` is longer than 5 characters.

```python
def my_node(state: State) -> State:
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/concepts/low_level.md
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ Read more about how the `interrupt` is used for **human-in-the-loop** workflows

## Breakpoints

Breakpoints pause graph execution at specific points and enable stepping through execution step by step. Breakpoints are powered by LangGraph's [**persistence layer**](./persistence.md), which saves the state after each graph step. Breakpoints can also be used to enable [**human-in-the-loop**](./human_in_the_loop.md) workflows, though we recommend using the [`interrupt` function](#interrupt-function) for this purpose.
Breakpoints pause graph execution at specific points and enable stepping through execution step by step. Breakpoints are powered by LangGraph's [**persistence layer**](./persistence.md), which saves the state after each graph step. Breakpoints can also be used to enable [**human-in-the-loop**](./human_in_the_loop.md) workflows, though we recommend using the [`interrupt` function](#interrupt) for this purpose.

Read more about breakpoints in the [Breakpoints conceptual guide](./breakpoints.md).

Expand Down
2 changes: 1 addition & 1 deletion docs/docs/how-tos/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Here you’ll find answers to “How do I...?” types of questions. These guide

### Graph API Basics

- [How to update graph state from nodes](state-reducers.ipynb)
- [How to update graph state from nodes](state-reducers.md)
- [How to create a sequence of steps](sequence.ipynb)
- [How to create branches for parallel execution](branching.ipynb)
- [How to create and control loops with recursion limits](recursion-limit.ipynb)
Expand Down
Loading

0 comments on commit b310ce0

Please sign in to comment.