From cb05d1b75e4dafb0c7b7df4f5925e6976263db70 Mon Sep 17 00:00:00 2001
From: Diego Canez <canezdiego@gmail.com>
Date: Fri, 25 Oct 2024 17:50:56 +0200
Subject: [PATCH] docs: update docs

---
 .github/workflows/deploy_book.yaml | 60 +++++++++++++++++++
 cmake/CMakeLists.new.old.txt       | 60 -------------------
 cmake/CMakeLists_old.txt           | 55 ------------------
 cmake/FindTensorRT.cmake           | 93 ------------------------------
 cu124.yaml                         |  1 +
 docs/src/part1/getting_started.md  | 76 ++++++++++++++++++------
 6 files changed, 118 insertions(+), 227 deletions(-)
 create mode 100644 .github/workflows/deploy_book.yaml
 delete mode 100644 cmake/CMakeLists.new.old.txt
 delete mode 100644 cmake/CMakeLists_old.txt
 delete mode 100644 cmake/FindTensorRT.cmake

diff --git a/.github/workflows/deploy_book.yaml b/.github/workflows/deploy_book.yaml
new file mode 100644
index 0000000..3bf9f48
--- /dev/null
+++ b/.github/workflows/deploy_book.yaml
@@ -0,0 +1,60 @@
+name: deploy-book
+
+# Run this when the master or main branch changes
+on:
+  push:
+    branches:
+    - master
+    - main
+    # If your git repository has the Jupyter Book within some-subfolder next to
+    # unrelated files, you can make this run only if a file within that specific
+    # folder has been modified.
+    #
+    paths:
+    - docs/src
+
+# This job installs dependencies, builds the book, and pushes it to `gh-pages`
+jobs:
+  deploy-book:
+    runs-on: ubuntu-latest
+    permissions:
+      pages: write
+      id-token: write
+    steps:
+    - uses: actions/checkout@v3
+
+    # Install dependencies
+    - name: Set up Python 3.11
+      uses: actions/setup-python@v4
+      with:
+        python-version: 3.11
+
+    - name: Install dependencies
+      run: |
+        pip install -r docs/requirements.txt
+
+    # (optional) Cache your executed notebooks between runs
+    # if you have config:
+    # execute:
+    #   execute_notebooks: cache
+    - name: cache executed notebooks
+      uses: actions/cache@v3
+      with:
+        path: _build/.jupyter_cache
+        key: jupyter-book-cache-${{ hashFiles('requirements.txt') }}
+
+    # Build the book
+    - name: Build the book
+      run: |
+        jupyter-book build docs/src --path-output docs
+
+    # Upload the book's HTML as an artifact
+    - name: Upload artifact
+      uses: actions/upload-pages-artifact@v2
+      with:
+        path: "docs/_build/html"
+
+    # Deploy the book's HTML to GitHub Pages
+    - name: Deploy to GitHub Pages
+      id: deployment
+      uses: actions/deploy-pages@v2
\ No newline at end of file
diff --git a/cmake/CMakeLists.new.old.txt b/cmake/CMakeLists.new.old.txt
deleted file mode 100644
index 0ffe137..0000000
--- a/cmake/CMakeLists.new.old.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
-project(example-app)
-
-# *** Set CUDA path ***
-set(CUDA_TOOLKIT_ROOT_DIR $ENV{CONDA_PREFIX})
-# *** Find python site-packages **
-find_package (Python COMPONENTS Interpreter Development)
-message(STATUS "PYTHON_EXECUTABLE: ${Python_EXECUTABLE}")
-execute_process(COMMAND ${Python_EXECUTABLE} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())" OUTPUT_VARIABLE Python_PACKAGES OUTPUT_STRIP_TRAILING_WHITESPACE)
-message(STATUS "Python_PACKAGES: ${Python_PACKAGES}")
-
-
-# *** Set libtorch path ***
-# set libtorch path
-set(LIBTORCH_PATH "${PROJECT_SOURCE_DIR}/third-party/libtorch")
-set(LIBTORCH_TRT_PATH "${PROJECT_SOURCE_DIR}/third-party/torch_tensorrt")
-list(APPEND CMAKE_PREFIX_PATH "${LIBTORCH_PATH}")
-# *** Set libtensorrt path from Python_PACKAGES ***
-# list(APPEND CMAKE_PREFIX_PATH "${Python_PACKAGES}/tensorrt_libs")
-# list the files in the directory Python_PACKAGES/tensorrt_libs/*
-# file(GLOB TORCH_TENSORRT_LIBS ${PROJECT_SOURCE_DIR}/third-party/torch_tensorrt/lib/*runtime*)
-
-
-# file(GLOB TENSORRT_LIBS ${Python_PACKAGES}/tensorrt_libs/*)
-# list(REMOVE_ITEM TENSORRT_LIBS "${Python_PACKAGES}/tensorrt_libs/__init__.py")
-# message(STATUS "TENSORRT_LIBS: ${TENSORRT_LIBS}")
-# message(STATUS "TORCH_TENSORRT_LIBS: ${TORCH_TENSORRT_LIBS}")
-
-# print CMAKE_PREFIX_PATH
-message(STATUS "CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}")
-set(TensorRT_DIR ${Python_PACKAGES}/tensorrt_libs)
-# find_package(TensorRT REQUIRED)
-# find_library(nvinfer_plugin)
-# add libnvinfer_plugin to TENSORRT_LIBS
-find_package(Torch REQUIRED)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
-
-# add_executable(example-app example-app.cpp)
-add_executable(example-app example-app.cpp)
-
-target_link_libraries(example-app "${TORCH_LIBRARIES}")
-# -Wl,--no-as-needed -ltorchtrt -Wl,--as-needed
-# link directory to LIBTORCH_PATH/lib
-target_link_directories(example-app PRIVATE ${LIBTORCH_PATH}/lib)
-target_link_directories(example-app PRIVATE ${Python_PACKAGES}/tensorrt_libs)
-target_link_directories(example-app PRIVATE ${LIBTORCH_TRT_PATH}/lib)
-target_include_directories(example-app PRIVATE ${LIBTORCH_TRT_PATH}/include)
-# link libtorchtrt_runtime, libtorchtrt, libtorchtrt_plugins and nvinfer
-# target_link_libraries(example-app nvinfer)
-target_link_libraries(example-app -l:libnvinfer_plugin.so.10)
-target_link_libraries(example-app -l:libnvinfer.so.10)
-target_link_libraries(example-app torchtrt torch_global_deps)
-# target_link_libraries(example-app -Wl,--no-as-needed torchtrt_runtime )
-# include to tensorrt
-
-# target_link_libraries(example-app -Wl,--no-as-needed -ltorch_global_deps ${TENSORRT_LIBS})
-# target_link_libraries(example-app -Wl,--no-as-needed ${TORCH_TENSORRT_LIBS})
-
-
-set_property(TARGET example-app PROPERTY CXX_STANDARD 17)
diff --git a/cmake/CMakeLists_old.txt b/cmake/CMakeLists_old.txt
deleted file mode 100644
index 2221d2d..0000000
--- a/cmake/CMakeLists_old.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
-project(example-app)
-list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/third-party/libtorch")
-set(CUDA_TOOLKIT_ROOT_DIR $ENV{CONDA_PREFIX})
-find_package(Torch REQUIRED)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
-
-// done
-find_package (Python COMPONENTS Interpreter Development)
-# find_package(CUDA REQUIRED)
-# find_package(CUDAToolkit REQUIRED)
-message(STATUS "CUDA_INCLUDE_DIRS: ${CUDA_INCLUDE_DIRS}")
-# print cuda libraries
-message(STATUS "CUDA_LIBRARIES: ${CUDA_LIBRARIES}")
-
-add_executable(example-app example-app.cpp)
-# target_link_libraries(example-app "${CUDA_LIBRARIES}")
-# target_include_directories(example-app PRIVATE ${CUDA_INCLUDE_DIRS})
-include_directories(
-  ${PROJECT_SOURCE_DIR}/third-party/torch_tensorrt/include
-    )
-target_link_libraries(example-app "${TORCH_LIBRARIES}")
-# print the path to the python executable
-message(STATUS "PYTHON_EXECUTABLE: ${Python_EXECUTABLE}")
-# set variable Python_PACKAGES to site-packages from PYTHON_EXECUTABLE
-execute_process(COMMAND ${Python_EXECUTABLE} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())" OUTPUT_VARIABLE Python_PACKAGES OUTPUT_STRIP_TRAILING_WHITESPACE)
-# print the path to the site-packages directory
-message(STATUS "Python_PACKAGES: ${Python_PACKAGES}")
-
-# link the torch_tensorrt libraries from Python_PACKAGES/tensorrt_libs/*
-# to the example-app executable
-# list all the files in the directory Python_PACKAGES/tensorrt_libs/*
-file(GLOB TORCH_TENSORRT_LIBS ${Python_PACKAGES}/tensorrt_libs/*)
-# remove __init__.py from the list of files
-list(REMOVE_ITEM TORCH_TENSORRT_LIBS "${Python_PACKAGES}/tensorrt_libs/__init__.py")
-# link the files in the variable TORCH_TENSORRT_LIBS to the example-app executable
-# -Wl,--no-as-needed is used to link the libraries even if they are not needed
-# target_link_libraries(example-app -Wl,--no-as-needed ${TORCH_TENSORRT_LIBS})
-
-# print the contents of the variable TORCH_TENSORRT_LIBS
-message(STATUS "TORCH_TENSORRT_LIBS: ${TORCH_TENSORRT_LIBS}")
-# target_link_libraries(example-app -Wl,--no-as-needed ${PROJECT_SOURCE_DIR}/third-party/torch_tensorrt/lib/libtorchtrt.so) #${PROJECT_SOURCE_DIR}/third-party/torch_tensorrt/lib/libtorchtrt_runtime.so ${PROJECT_SOURCE_DIR}/third-party/torch_tensorrt/lib/libtorchtrt_plugins.so)
-target_link_libraries(example-app "${TORCH_LIBRARIES}")
-target_link_libraries (example-app 
-#                                         ${PROJECT_SOURCE_DIR}/third-party/libtorch/lib/libtorch.so
-#                                         ${PROJECT_SOURCE_DIR}/third-party/libtorch/lib/libtorch_cuda.so
-#                                         ${PROJECT_SOURCE_DIR}/third-party/libtorch/lib/libtorch_cpu.so
-                                        ${PROJECT_SOURCE_DIR}/third-party/libtorch/lib/libtorch_global_deps.so
-#                                         ${PROJECT_SOURCE_DIR}/third-party/libtorch/lib/libbackend_with_compiler.so
-#                                          ${PROJECT_SOURCE_DIR}/third-party/libtorch/lib/libc10.so
-#                                         ${PROJECT_SOURCE_DIR}/third-party/libtorch/lib/libc10_cuda.so
-                                        )
-
-# target_link_libraries (example-app  
-set_property(TARGET example-app PROPERTY CXX_STANDARD 17)
\ No newline at end of file
diff --git a/cmake/FindTensorRT.cmake b/cmake/FindTensorRT.cmake
deleted file mode 100644
index 4658dd8..0000000
--- a/cmake/FindTensorRT.cmake
+++ /dev/null
@@ -1,93 +0,0 @@
-# source:
-# https://github.com/NVIDIA/tensorrt-laboratory/blob/master/cmake/FindTensorRT.cmake
-
-# This module defines the following variables:
-#
-# ::
-#
-#   TensorRT_INCLUDE_DIRS
-#   TensorRT_LIBRARIES
-#   TensorRT_FOUND
-#
-# ::
-#
-#   TensorRT_VERSION_STRING - version (x.y.z)
-#   TensorRT_VERSION_MAJOR  - major version (x)
-#   TensorRT_VERSION_MINOR  - minor version (y)
-#   TensorRT_VERSION_PATCH  - patch version (z)
-#
-# Hints
-# ^^^^^
-# A user may set ``TensorRT_DIR`` to an installation root to tell this module where to look.
-#
-set(_TensorRT_SEARCHES)
-
-if(TensorRT_DIR)
-    set(_TensorRT_SEARCH_ROOT PATHS ${TensorRT_DIR} NO_DEFAULT_PATH)
-    list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_ROOT)
-endif()
-
-# appends some common paths
-set(_TensorRT_SEARCH_NORMAL
-        PATHS "/usr"
-        )
-list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_NORMAL)
-
-# Include dir
-foreach(search ${_TensorRT_SEARCHES})
-    find_path(TensorRT_INCLUDE_DIR NAMES NvInfer.h ${${search}} PATH_SUFFIXES include)
-endforeach()
-
-if(NOT TensorRT_LIBRARY)
-    foreach(search ${_TensorRT_SEARCHES})
-        find_library(TensorRT_LIBRARY NAMES nvinfer ${${search}} PATH_SUFFIXES lib)
-    endforeach()
-endif()
-
-if(NOT TensorRT_PARSERS_LIBRARY)
-    foreach(search ${_TensorRT_SEARCHES})
-        find_library(TensorRT_NVPARSERS_LIBRARY NAMES nvparsers ${${search}} PATH_SUFFIXES lib)
-    endforeach()
-endif()
-
-if(NOT TensorRT_NVONNXPARSER_LIBRARY)
-    foreach(search ${_TensorRT_SEARCHES})
-        find_library(TensorRT_NVONNXPARSER_LIBRARY NAMES nvonnxparser ${${search}} PATH_SUFFIXES lib)
-    endforeach()
-endif()
-
-if(NOT TensorRT_PLUGIN_LIBRARY)
-    foreach(search ${_TensorRT_SEARCHES})
-        find_library(TensorRT_PLUGIN_LIBRARY NAMES nvinfer_plugin ${${search}} PATH_SUFFIXES lib)
-    endforeach()
-endif()
-
-mark_as_advanced(TensorRT_INCLUDE_DIR)
-
-if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInfer.h")
-    file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$")
-    file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$")
-    file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$")
-
-    string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}")
-    string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}")
-    string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}")
-    set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}")
-endif()
-
-include(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(TensorRT REQUIRED_VARS TensorRT_LIBRARY TensorRT_INCLUDE_DIR VERSION_VAR TensorRT_VERSION_STRING)
-
-if(TensorRT_FOUND)
-    set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR})
-
-    if(NOT TensorRT_LIBRARIES)
-        set(TensorRT_LIBRARIES ${TensorRT_LIBRARY} ${TensorRT_PLUGIN_LIBRARY} ${TensorRT_NVONNXPARSER_LIBRARY} ${TensorRT_NVPARSERS_LIBRARY})
-    endif()
-
-    if(NOT TARGET TensorRT::TensorRT)
-        add_library(TensorRT::TensorRT UNKNOWN IMPORTED)
-        set_target_properties(TensorRT::TensorRT PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIRS}")
-        set_property(TARGET TensorRT::TensorRT APPEND PROPERTY IMPORTED_LOCATION "${TensorRT_LIBRARY}")
-    endif()
-endif()
diff --git a/cu124.yaml b/cu124.yaml
index ab738a8..8a70061 100644
--- a/cu124.yaml
+++ b/cu124.yaml
@@ -11,3 +11,4 @@ dependencies:
   - gcc=13 
   # nvidia-modelopt requires crypt.h, see: https://github.com/stanford-futuredata/ColBERT/issues/309
   - libxcrypt 
+  - poetry
diff --git a/docs/src/part1/getting_started.md b/docs/src/part1/getting_started.md
index 367d66d..1d02a71 100644
--- a/docs/src/part1/getting_started.md
+++ b/docs/src/part1/getting_started.md
@@ -1,31 +1,68 @@
 # Getting Started
 
-TODO: 
-- [ ] Introduce project structure 
+```{contents}
+```
+
+## Project structure
+
+The project is structured as follows:
+
+```
+.
+├── artifacts           # Model weights and scripts I/O
+├── build               # Build directory (location for cpp executables)
+├── cpp                 # source code for cpp executables
+├── detrex              # fork of detrex
+├── docs                # documentation
+├── logs                
+├── notebooks           # jupyter notebooks
+├── output              # [Training] `scripts.train_net` outputs (tensorboard logs, weights, etc)
+├── projects            # configurations and model definitions
+├── scripts             # utility scripts 
+├── src                 # python source code
+├── third-party         # third-party c libraries
+├── wandb_output        # Output from wandb
+├── CMakeLists.txt      # CMake configuration for cpp executables
+├── cu124.yaml          # Conda environment file (only system dependencies: cuda, gcc, python)
+├── Makefile            # Makefile for project scripts
+├── poetry.lock         # Locked python dependencies
+├── pyproject.toml      # Poetry configuration
+├── README.md 
+```
+
+The main folders to focus are `src` and `scripts` as it is where most of the source code lies.
 
 ## Installation
 
-TODO: 
-- [ ] Write installation instructions nicely
+First make sure the (bold) pre-requirements are fulfilled:
+- **Conda** 
+- **Make** 
+- CMake (for building cpp executables)
+
+
+First let's create our conda environment. This will install the cuda runtime and libraries, python, the poetry dependency manager and other stuff:
 
-Prerequirements:
-- Conda
-- Make
-- CMake
+```bash
+conda env create -f cu124.yaml
+conda activate cu124
+```
+
+To avoid TorchInductor and ModelOpt errors looking for `crypt.h`:
 
-Create conda environment:
 ```bash
-conda create -f cu124.yaml
+conda env config vars set CPATH=$CONDA_PREFIX/include  
 conda activate cu124
 ```
 
-Install python requirements: 
+Installing the dependencies requires some manual building (`detrex`, `detectron2`), so we can use the make commands to do it for us:
+
 ```bash
 make setup_python
 make setup_detrex
 ```
 
-If you need the C++ runtime with TensoRT:
+(Optional) If you need the C++ TensorRT runtime and the accompanying benchmark executables, you can build them with the following commands:
+
 ```bash
 make download_and_build_torchtrt
 # To build the `benchmark` executable
@@ -33,9 +70,12 @@ make build_cpp
 make compile_cpp
 ```
 
-## Downloading datasets
+This will automatically download the necessary files and build the libraries for you.
+
 
-If you have a designated folder for datasets, use it, for the purpose of this tutorial, we'll use `~/datasets`:
+## Downloading datasets (training-only)
+
+If you have a designated folder for datasets, use it, for the purpose of this tutorial, we'll use `~/datasets`. We'll test with the COCO dataset, so let's download it:
 
 ```bash
 cd ~/datasets
@@ -51,11 +91,9 @@ unzip train2017.zip
 unzip val2017.zip
 ```
 
-## Setting up environment variables
+To point the `detectron2` library to the dataset directory, we need to set the `DETECTRON2_DATASETS` environment variable:
 
 ```bash
-# Necessary to avoid TorchInductor and ModelOpt errors looking for crypt.h
-export CPATH=$(CONDA_PREFIX)/include  
-# To help detectron2 locate the dataset, set to your local path containing COCO
-export DETECTRON2_DATASETS=$DATASETS
+conda env config vars set DETECTRON2_DATASETS=~/datasets
+conda activate cu124
 ```
\ No newline at end of file