docs: add scripts for notebook conversion (#3406)

* Update notebook conversion code * Convert one more file --------- Co-authored-by: Ben Burns <[email protected]>
langchain-ai · Feb 13, 2025 · b310ce0 · b310ce0
1 parent 65976f3
commit b310ce0
Show file tree

Hide file tree

Showing 13 changed files with 1,234 additions and 475 deletions.
diff --git a/docs/_scripts/generate_api_reference_links.py b/docs/_scripts/generate_api_reference_links.py
@@ -1,8 +1,11 @@
 import importlib
+from importlib.machinery import ModuleSpec
+import importlib.util
 import inspect
 import logging
 import re
 from functools import lru_cache
+import sys
 from typing import List, Literal, Optional
 
 from typing_extensions import TypedDict
@@ -72,9 +75,8 @@ def _make_regular_expression(pkg_prefix: str) -> re.Pattern:
     if not pkg_prefix.isidentifier():
         raise ValueError(f"Invalid package prefix: {pkg_prefix}")
     return re.compile(
-        r"from\s+(" + pkg_prefix + "(?:_\w+)?(?:\.\w+)*?)\s+import\s+"
-        r"((?:\w+(?:,\s*)?)*"  # Match zero or more words separated by a comma+optional ws
-        r"(?:\s*\(.*?\))?)",  # Match optional parentheses block
+        r"from\s+(" + pkg_prefix + r"(?:_\w+)?(?:\.\w+)*?)\s+import\s+\(?"
+        r"((?:\w+(?:,\s*)?)*)\s*\)?",  # Match zero or more words separated by a comma+optional ws
         re.DOTALL,  # Match newlines as well
     )
 
@@ -85,22 +87,57 @@ def _make_regular_expression(pkg_prefix: str) -> re.Pattern:
 
 
 @lru_cache(maxsize=10_000)
-def _get_full_module_name(module_path: str, class_name: str) -> Optional[str]:
+def _get_full_module_name(
+    module_path: str, class_name: str | None, doc_title: str
+) -> Optional[str]:
     """Get full module name using inspect, with LRU cache to memoize results."""
     try:
-        module = importlib.import_module(module_path)
-        class_ = getattr(module, class_name)
-        module = inspect.getmodule(class_)
-        if module is None:
-            # For constants, inspect.getmodule() might return None
-            # In this case, we'll return the original module_path
-            return module_path
+        if module_path in sys.modules:
+            module = sys.modules[module_path]
+        else:
+            spec: ModuleSpec | None = importlib.util.find_spec(module_path)
+            if spec is not None:
+                module = importlib.util.module_from_spec(spec)
+                sys.modules[module_path] = module
+                spec.loader.exec_module(module)
+
+        if class_name is not None:
+            class_ = getattr(module, class_name)
+
+            if re.match(r"\w+\s+as\s+\w+", class_name):
+                # Handle cases like "A as B"
+                class_name, _ = class_name.split(" as ")
+
+            module = inspect.getmodule(class_)
+            if module is None:
+                # For constants, inspect.getmodule() might return None
+                # In this case, we'll return the original module_path
+                return module_path
         return module.__name__
     except AttributeError as e:
-        logger.warning(f"API Reference: Could not find module for {class_name}, {e}")
+        if class_name is not None:
+            # the class_name might actually be a module
+            # e.g. from langchain import hub
+            # try to import it as a module, and if that doesn't work, throw
+            if class_name is not None:
+                module_name = _get_full_module_name(
+                    f"{module_path}.{class_name}", None, doc_title
+                )
+                if module_name is not None:
+                    # return the name of the parent module, rather than the name of the class as though it were a module
+                    return module.__name__
+                logger.warning(
+                    f"API Reference: Could not find module for {class_name} in {module_path}, imported in doc {doc_title}, {e}"
+                )
+            # don't log if we're trying to import the "hub" part as though it were a module
+        logger.warning(
+            f"API Reference: Could not find module for {module_path}, imported in doc {doc_title}, {e}"
+        )
         return None
     except ImportError as e:
-        logger.warning(f"API Reference: Failed to load for class {class_name}, {e}")
+        logger.warning(
+            f"API Reference: Failed to import module {module_path} {doc_title}, {e}"
+        )
         return None
 
 
@@ -160,7 +197,22 @@ def _get_imports(
             if imp.strip()
         ]
         for class_name in imported_classes:
-            module_path = _get_full_module_name(module, class_name)
+            if module == "langchain_core.messages" and class_name == ")":
+                print("WARNING: ", file=sys.stderr)
+                print(
+                    f"WARNING: Trying to import {class_name} from {module} in doc {doc_title}",
+                    file=sys.stderr,
+                )
+                print("WARNING: ", file=sys.stderr)
+                print("WARNING:", import_match.group(0), file=sys.stderr)
+                print("WARNING: ", file=sys.stderr)
+                print(
+                    "\n".join([f"WARNING: {line}" for line in code.splitlines()]),
+                    file=sys.stderr,
+                )
+                print("WARNING: ", file=sys.stderr)
+
+            module_path = _get_full_module_name(module, class_name, doc_title)
             if not module_path:
                 continue
             if len(module_path.split(".")) < 2:
@@ -230,7 +282,7 @@ def get_imports(code: str, doc_title: str) -> List[ImportInformation]:
     return all_imports
 
 
-def update_markdown_with_imports(markdown: str) -> str:
+def update_markdown_with_imports(markdown: str, file_name: str) -> str:
     """Update markdown to include API reference links for imports in Python code blocks.
 
     This function scans the markdown content for Python code blocks, extracts any imports, and appends links to their API documentation.
@@ -250,7 +302,8 @@ def update_markdown_with_imports(markdown: str) -> str:
         This function will append an API reference link to the `TextGenerator` class from the `langchain.nlp` module if it's recognized.
     """
     code_block_pattern = re.compile(
-        r'(?P<indent>[ \t]*)```(?P<language>python|py)\n(?P<code>.*?)\n(?P=indent)```', re.DOTALL
+        r"(?P<indent>[ \t]*)```(?P<language>python|py)\n(?P<code>.*?)\n(?P=indent)```",
+        re.DOTALL,
     )
 
     def replace_code_block(match: re.Match) -> str:
@@ -262,23 +315,23 @@ def replace_code_block(match: re.Match) -> str:
         Returns:
             str: The modified code block with API reference links appended if applicable.
         """
-        indent = match.group('indent')
-        code_block = match.group('code')
-        language = match.group('language')  # Preserve the language from the regex match
+        indent = match.group("indent")
+        code_block = match.group("code")
+        language = match.group("language")  # Preserve the language from the regex match
         # Retrieve import information from the code block
-        imports = get_imports(code_block, "__unused__")
+        imports = get_imports(code_block, file_name)
 
         original_code_block = match.group(0)
         # If no imports are found, return the original code block
         if not imports:
             return original_code_block
 
         # Generate API reference links for each import
-        api_links = ' | '.join(
+        api_links = " | ".join(
             f'<a href="{imp["docs"]}">{imp["imported"]}</a>' for imp in imports
         )
         # Return the code block with appended API reference links
-        return f'{original_code_block}\n\n{indent}API Reference: {api_links}'
+        return f"{original_code_block}\n\n{indent}API Reference: {api_links}"
 
     # Apply the replace_code_block function to all matches in the markdown
     updated_markdown = code_block_pattern.sub(replace_code_block, markdown)

diff --git a/docs/_scripts/notebook_convert.py b/docs/_scripts/notebook_convert.py
@@ -7,6 +7,7 @@
 import nbformat
 from nbconvert.exporters import MarkdownExporter
 from nbconvert.preprocessors import Preprocessor
+import glob
 
 
 class EscapePreprocessor(Preprocessor):
@@ -181,11 +182,19 @@ def _convert_notebooks(
         raise ValueError("Either --output_dir or --replace must be specified")
 
     output_dir_path = DOCS if replace else Path(output_dir)
-    notebooks = list(DOCS.rglob(pattern))
 
-    file_names = [notebook.name for notebook in notebooks]
+    # Get the directory where the script was executed
+    base_dir = os.getcwd()
+    # Build the full search pattern using the current working directory as the base
+    full_pattern = os.path.join(base_dir, args.pattern)
 
-    for notebook in notebooks:
+    # Use glob with recursive search enabled
+    matching_files = glob.glob(full_pattern, recursive=True)
+    paths = [Path(file) for file in matching_files]
+
+    file_names = [notebook.name for notebook in paths]
+
+    for notebook in paths:
         markdown = convert_notebook(notebook, mode="exec")
         markdown_path = output_dir_path / notebook.relative_to(DOCS).with_suffix(".md")
         markdown_path.parent.mkdir(parents=True, exist_ok=True)
@@ -215,7 +224,7 @@ def replace_link(match: re.Match) -> str:
             return match.group(0)
 
         # Process all markdown files in the output directory.
-        for path in output_dir_path.rglob("*.md"):
+        for path in output_dir_path.rglob("**/*.md"):
             with open(path, "r", encoding="utf-8") as f:
                 content = f.read()
             new_content = re.sub(link_pattern, replace_link, content)

diff --git a/docs/_scripts/notebook_convert_templates/md_executable/index.md.j2 b/docs/_scripts/notebook_convert_templates/md_executable/index.md.j2
@@ -6,7 +6,7 @@
 {%- if 'magics_language' in cell.metadata  -%}
     {{ cell.metadata.magics_language}}
 {%- elif 'name' in nb.metadata.get('language_info', {}) -%}
-    {{ nb.metadata.language_info.name }} exec="on" source="above" session="1"
+    {{ nb.metadata.language_info.name }} exec="on" source="above" session="1" result="ansi"
 {%- endif %}
 {{ cell.source}}
 ```

diff --git a/docs/_scripts/notebook_hooks.py b/docs/_scripts/notebook_hooks.py
@@ -183,7 +183,7 @@ def handle_vcr_setup(
             id = _hash_string(code)
 
         if session is not None and session != "":
-            logger.info(f"new session {session} on page {document_filename}")
+            logger.info(f"new {language} session {session} on page {document_filename}")
 
         cassette_prefix = document_filename.replace(".md", "").replace(os.path.sep, "_")
 
@@ -239,8 +239,7 @@ def handle_vcr_teardown(
     if document_filename is None:
         logger.warning(f"no document filename found while tearing down {session}!")
     else:
-        logger.info(f"tearing down {session} on {document_filename}")
-        logger.info(traceback.format_stack())
+        logger.info(f"tearing down {language} {session} on {document_filename}")
 
     kwargs = dict(
         code=code,
@@ -274,7 +273,7 @@ def _on_page_markdown_with_config(
 
     # Append API reference links to code blocks
     if add_api_references:
-        markdown = update_markdown_with_imports(markdown)
+        markdown = update_markdown_with_imports(markdown, page.file.src_path)
     # Apply highlight comments to code blocks
     markdown = _highlight_code_blocks(markdown)
 

diff --git a/docs/docs/concepts/breakpoints.md b/docs/docs/concepts/breakpoints.md
@@ -88,7 +88,7 @@ We recommend that you [**use the `interrupt` function instead**](#the-interrupt-
 
 ??? node "`NodeInterrupt` exception"
 
-    The developer can define some *condition* that must be met for a breakpoint to be triggered. This concept of [dynamic breakpoints](./low_level.md#dynamic-breakpoints) is useful when the developer wants to halt the graph under *a particular condition*. This uses a `NodeInterrupt`, which is a special type of exception that can be raised from within a node based upon some condition. As an example, we can define a dynamic breakpoint that triggers when the `input` is longer than 5 characters.
+    The developer can define some *condition* that must be met for a breakpoint to be triggered. This concept of _dynamic breakpoints_ is useful when the developer wants to halt the graph under *a particular condition*. This uses a `NodeInterrupt`, which is a special type of exception that can be raised from within a node based upon some condition. As an example, we can define a dynamic breakpoint that triggers when the `input` is longer than 5 characters.
 
     ```python
     def my_node(state: State) -> State:

diff --git a/docs/docs/concepts/low_level.md b/docs/docs/concepts/low_level.md
@@ -494,7 +494,7 @@ Read more about how the `interrupt` is used for **human-in-the-loop** workflows
 
 ## Breakpoints
 
-Breakpoints pause graph execution at specific points and enable stepping through execution step by step. Breakpoints are powered by LangGraph's [**persistence layer**](./persistence.md), which saves the state after each graph step. Breakpoints can also be used to enable [**human-in-the-loop**](./human_in_the_loop.md) workflows, though we recommend using the [`interrupt` function](#interrupt-function) for this purpose.
+Breakpoints pause graph execution at specific points and enable stepping through execution step by step. Breakpoints are powered by LangGraph's [**persistence layer**](./persistence.md), which saves the state after each graph step. Breakpoints can also be used to enable [**human-in-the-loop**](./human_in_the_loop.md) workflows, though we recommend using the [`interrupt` function](#interrupt) for this purpose.
 
 Read more about breakpoints in the [Breakpoints conceptual guide](./breakpoints.md).
 

diff --git a/docs/docs/how-tos/index.md b/docs/docs/how-tos/index.md
@@ -11,7 +11,7 @@ Here you’ll find answers to “How do I...?” types of questions. These guide
 
 ### Graph API Basics
 
-- [How to update graph state from nodes](state-reducers.ipynb)
+- [How to update graph state from nodes](state-reducers.md)
 - [How to create a sequence of steps](sequence.ipynb)
 - [How to create branches for parallel execution](branching.ipynb)
 - [How to create and control loops with recursion limits](recursion-limit.ipynb)