diff --git a/docs/curate-text/process-data/language-management/language.md b/docs/curate-text/process-data/language-management/language.md index 2ca9d8852..f9c3b6bdb 100644 --- a/docs/curate-text/process-data/language-management/language.md +++ b/docs/curate-text/process-data/language-management/language.md @@ -20,7 +20,7 @@ NeMo Curator's language identification system works through a three-step process 1. **Text Preprocessing**: For FastText classification, normalize input text by stripping whitespace and converting newlines to spaces. -2. **FastText Language Detection**: The pre-trained FastText language identification model ([`lid.176.bin`]((https://fasttext.cc/docs/en/language-identification.html))) analyzes the preprocessed text and returns: +2. **FastText Language Detection**: The pre-trained FastText language identification model ([`lid.176.bin`](https://fasttext.cc/docs/en/language-identification.html)) analyzes the preprocessed text and returns: - A confidence score (0.0 to 1.0) indicating certainty of the prediction - A language code (for example, "EN", "ES", "FR") in FastText's two-letter uppercase format diff --git a/docs/get-started/video.md b/docs/get-started/video.md index 8d22faa9c..691df11ac 100644 --- a/docs/get-started/video.md +++ b/docs/get-started/video.md @@ -7,6 +7,7 @@ difficulty: "beginner" content_type: "tutorial" modality: "video-only" only: not ga +orphan: true --- (gs-video)= @@ -48,7 +49,7 @@ docker tag nvcr.io/nvidia/nemo/nemo-curator-video:0.6.0 nemo_video_curator:1.0.0 ``` ```{seealso} -For details on video container environments and configurations, see [Video Curator Environments](reference-infrastructure-container-environments-video). +For details on video container environments and configurations, see Video Curator Environments. ``` ::: @@ -146,4 +147,4 @@ export PATH="$PATH:$HOME/.local/bin" ## Next Steps -Explore the [Video Curation documentation](video-overview). +Explore the Video Curation documentation. diff --git a/pyproject.toml b/pyproject.toml index 0ac4481c0..fe02003bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,6 +132,18 @@ all_nightly = [ "nemo_curator[image_nightly]", ] +[dependency-groups] +docs = [ + "sphinx", + "myst-parser", + "sphinx-autodoc2", + "sphinx-copybutton", + "nvidia-sphinx-theme", + "sphinx-design", + "sphinxcontrib-mermaid", + "swagger-plugin-for-sphinx", +] + [project.scripts] get_common_crawl_urls = "nemo_curator.scripts.get_common_crawl_urls:console_script" get_wikipedia_urls = "nemo_curator.scripts.get_wikipedia_urls:console_script" diff --git a/ray-curator/pyproject.toml b/ray-curator/pyproject.toml index 47d9771da..789480d0e 100644 --- a/ray-curator/pyproject.toml +++ b/ray-curator/pyproject.toml @@ -85,6 +85,18 @@ all = [ "ray_curator[video]", ] +[dependency-groups] +docs = [ + "sphinx", + "myst-parser", + "sphinx-autodoc2", + "sphinx-copybutton", + "nvidia-sphinx-theme", + "sphinx-design", + "sphinxcontrib-mermaid", + "swagger-plugin-for-sphinx", +] + [tool.pixi.workspace] channels = ["conda-forge"] platforms = ["linux-64", "linux-aarch64"] diff --git a/requirements-docs.txt b/requirements-docs.txt index ee2377720..583fa265c 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -5,8 +5,7 @@ sphinx-copybutton nvidia-sphinx-theme sphinx-autobuild sphinx-design -pinecone -openai +docutils python-dotenv sphinxcontrib-mermaid swagger-plugin-for-sphinx \ No newline at end of file