docs: improve sphinx docs and add favicon (#82)

yxlao · Dec 30, 2024 · 7360efd · 7360efd
1 parent 309a25a
commit 7360efd
Show file tree

Hide file tree

Showing 29 changed files with 678 additions and 683 deletions.
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -26,8 +26,7 @@ jobs:
 
     - name: Build documentation
       run: |
-        cd docs
-        make clean && make html # SPHINXOPTS="-W --keep-going"
+        make -C docs clean && make -C docs html SPHINXOPTS="-W --keep-going"
 
     - name: Notice
       run: |

diff --git a/README.md b/README.md
@@ -184,7 +184,7 @@ $$
 We follow the standard OpenCV-style camera coordinate system as illustrated at
 the beginning of the README.
 
-- **Camera coordinate:** right-handed, with $Z$ pointing away from the camera
+- **Camera Coordinates:** right-handed, with $Z$ pointing away from the camera
   towards the view direction and $Y$ axis pointing down. Note that the OpenCV
   convention (camtools' default) is different from the OpenGL/Blender
   convention, where $Z$ points towards the opposite view direction, $Y$ points
@@ -194,20 +194,20 @@ the beginning of the README.
   - `ct.convert.T_opengl_to_opencv()`
   - `ct.convert.pose_opencv_to_opengl()`
   - `ct.convert.pose_opengl_to_opencv()`
-- **Image coordinate:** starts from the top-left corner of the image, with $x$
+- **Image Coordinates:** starts from the top-left corner of the image, with $x$
   pointing right (corresponding to the image width) and $y$ pointing down
   (corresponding to the image height). This is consistent with OpenCV. Pay
   attention that the 0th dimension in the image array is the height (i.e., $y$)
   and the 1st dimension is the width (i.e., $x$). That is:
   - $x$ <=> $u$ <=> width <=> column <=> the 1st dimension
   - $y$ <=> $v$ <=> height <=> row <=> the 0th dimension
-- `K`: `(3, 3)` camera intrinsic matrix.
+- `K`: `(3, 3)` camera intrinsic.
   ```python
   K = [[fx,  s, cx],
        [ 0, fy, cy],
        [ 0,  0,  1]]
   ```
-- `T` or `W2C`: `(4, 4)` camera extrinsic matrix.
+- `T` or `W2C`: `(4, 4)` camera extrinsic.
   ```python
   T = [[R  | t   = [[R00, R01, R02, t0],
         0  | 1]]    [R10, R11, R12, t1],
@@ -234,7 +234,7 @@ the beginning of the README.
   t = T[:3, 3]
   ```
   - `t`'s shape is `(3,)`, not `(3, 1)`.
-- `pose` or `C2W`: `(4, 4)` camera pose matrix. It is the inverse of `T`.
+- `pose` or `C2W`: `(4, 4)` camera pose. It is the inverse of `T`.
   - `pose` is also known as the camera-to-world `C2W` matrix, which transforms a
     point in the camera coordinate to the world coordinate.
   - `pose` is the inverse of `T`, i.e., `pose == np.linalg.inv(T)`.

diff --git a/camtools/artifact.py b/camtools/artifact.py
@@ -1,3 +1,7 @@
+"""
+Functions for downloading artifacts from the camtools-artifacts repository.
+"""
+
 import requests
 from pathlib import Path
 

diff --git a/camtools/assets/camtools_logo_squre_dark.png b/camtools/assets/camtools_logo_squre_dark.png
diff --git a/camtools/assets/camtools_logo_squre_light.png b/camtools/assets/camtools_logo_squre_light.png
diff --git a/camtools/camera.py b/camtools/camera.py
@@ -1,3 +1,7 @@
+"""
+Functions for creating camera frustums and related visualizations.
+"""
+
 import open3d as o3d
 import numpy as np
 from typing import List, Optional, Dict, Tuple

diff --git a/camtools/colmap.py b/camtools/colmap.py
@@ -1,3 +1,7 @@
+"""
+Functions for reading and writing COLMAP models.
+"""
+
 # Copyright (c) 2022, ETH Zurich and UNC Chapel Hill.
 # All rights reserved.
 #

diff --git a/camtools/colormap.py b/camtools/colormap.py
@@ -1,3 +1,7 @@
+"""
+Functions for querying matplotlib's colormaps.
+"""
+
 import matplotlib
 import numpy as np
 from jaxtyping import Float

diff --git a/camtools/convert.py b/camtools/convert.py
@@ -1,3 +1,7 @@
+"""
+Functions for converting between different camera parameters and representations.
+"""
+
 import cv2
 import numpy as np
 import open3d as o3d
@@ -159,7 +163,7 @@ def T_to_C(T: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "3"]:
         T: Extrinsic matrix (world-to-camera) of shape (4, 4).
 
     Returns:
-        C: Camera center in world coordinates of shape (3,).
+        Camera center in world coordinates of shape (3,).
     """
     sanity.assert_T(T)
     R, t = T[:3, :3], T[:3, 3]
@@ -174,7 +178,7 @@ def pose_to_C(pose: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "3"]:
         pose: Pose matrix (camera-to-world) of shape (4, 4).
 
     Returns:
-        C: Camera center in world coordinates of shape (3,).
+        Camera center in world coordinates of shape (3,).
     """
     sanity.assert_pose(pose)
     C = pose[:3, 3]
@@ -189,8 +193,7 @@ def T_to_pose(T):
         T: Extrinsic matrix (world-to-camera) of shape (4, 4).
 
     Returns:
-        pose: Pose matrix (camera-to-world) of shape (4, 4),
-              which is the inverse of T.
+        Pose matrix (camera-to-world) of shape (4, 4), which is the inverse of T.
     """
     sanity.assert_T(T)
     return np.linalg.inv(T)
@@ -204,8 +207,8 @@ def pose_to_T(pose):
         pose: Pose matrix (camera-to-world) of shape (4, 4).
 
     Returns:
-        T: Extrinsic matrix (world-to-camera) of shape (4, 4),
-           which is the inverse of pose.
+        Extrinsic matrix (world-to-camera) of shape (4, 4), which is the inverse
+        of pose.
     """
     sanity.assert_T(pose)
     return np.linalg.inv(pose)
@@ -227,6 +230,12 @@ def T_opengl_to_opencv(T: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]:
         - -Z: The view direction
         - Used in: OpenGL, Blender, Nerfstudio
           https://docs.nerf.studio/quickstart/data_conventions.html#coordinate-conventions
+
+    Args:
+        T: Extrinsic matrix (world-to-camera) of shape (4, 4) in OpenCV convention.
+
+    Returns:
+        Extrinsic matrix (world-to-camera) of shape (4, 4) in OpenGL convention.
     """
     sanity.assert_T(T)
     # pose = T_to_pose(T)
@@ -255,6 +264,12 @@ def T_opencv_to_opengl(T: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]:
         - -Z: The view direction
         - Used in: OpenGL, Blender, Nerfstudio
           https://docs.nerf.studio/quickstart/data_conventions.html#coordinate-conventions
+
+    Args:
+        T: Extrinsic matrix (world-to-camera) of shape (4, 4) in OpenCV convention.
+
+    Returns:
+        Extrinsic matrix (world-to-camera) of shape (4, 4) in OpenGL convention.
     """
     sanity.assert_T(T)
     # pose = T_to_pose(T)
@@ -283,6 +298,12 @@ def pose_opengl_to_opencv(pose: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "
         - -Z: The view direction
         - Used in: OpenGL, Blender, Nerfstudio
           https://docs.nerf.studio/quickstart/data_conventions.html#coordinate-conventions
+
+    Args:
+        pose: Pose matrix (camera-to-world) of shape (4, 4) in OpenGL convention.
+
+    Returns:
+        Pose matrix (camera-to-world) of shape (4, 4) in OpenCV convention.
     """
     sanity.assert_pose(pose)
     pose = np.copy(pose)
@@ -308,6 +329,12 @@ def pose_opencv_to_opengl(pose: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "
         - -Z: The view direction
         - Used in: OpenGL, Blender, Nerfstudio
           https://docs.nerf.studio/quickstart/data_conventions.html#coordinate-conventions
+
+    Args:
+        pose: Pose matrix (camera-to-world) of shape (4, 4) in OpenCV convention.
+
+    Returns:
+        Pose matrix (camera-to-world) of shape (4, 4) in OpenGL convention.
     """
     sanity.assert_pose(pose)
     pose = np.copy(pose)
@@ -329,7 +356,7 @@ def R_t_to_C(
         t: Translation vector of shape (3,).
 
     Returns:
-        C: Camera center in world coordinates of shape (3,).
+        Camera center in world coordinates of shape (3,).
     """
     # Equivalently,
     # C = - R.T @ t
@@ -354,7 +381,7 @@ def R_C_to_t(
         C: Camera center in world coordinates of shape (3,) or (N, 3).
 
     Returns:
-        t: Translation vector of shape (3,) or (N, 3).
+        Translation vector of shape (3,) or (N, 3).
     """
     # https://github.com/isl-org/StableViewSynthesis/blob/main/data/create_custom_track.py
     C = C.reshape(-1, 3, 1)
@@ -416,7 +443,7 @@ def R_t_to_T(
         t: Translation vector of shape (3,).
 
     Returns:
-        T: Extrinsic matrix (world-to-camera) of shape (4, 4).
+        Extrinsic matrix (world-to-camera) of shape (4, 4).
     """
     T = np.eye(4)
     T[:3, :3] = R
@@ -434,9 +461,9 @@ def T_to_R_t(
         T: Extrinsic matrix (world-to-camera) of shape (4, 4).
 
     Returns:
-        Tuple containing:
-        - R: Rotation matrix of shape (3, 3)
-        - t: Translation vector of shape (3,)
+        Tuple[Float[np.ndarray, "3 3"], Float[np.ndarray, "3"]]:
+            - R: Rotation matrix of shape (3, 3)
+            - t: Translation vector of shape (3,)
     """
     sanity.assert_T(T)
     R = T[:3, :3]
@@ -455,10 +482,10 @@ def P_to_K_R_t(
         P: Projection matrix of shape (3, 4).
 
     Returns:
-        Tuple containing:
-        - K: Intrinsic matrix of shape (3, 3)
-        - R: Rotation matrix of shape (3, 3)
-        - t: Translation vector of shape (3,)
+        Tuple[Float[np.ndarray, "3 3"], Float[np.ndarray, "3 3"], Float[np.ndarray, "3"]]:
+            - K: Intrinsic matrix of shape (3, 3)
+            - R: Rotation matrix of shape (3, 3)
+            - t: Translation vector of shape (3,)
     """
     (
         camera_matrix,
@@ -488,9 +515,9 @@ def P_to_K_T(
         P: Projection matrix of shape (3, 4).
 
     Returns:
-        Tuple containing:
-        - K: Intrinsic matrix of shape (3, 3)
-        - T: Extrinsic matrix (world-to-camera) of shape (4, 4)
+        Tuple[Float[np.ndarray, "3 3"], Float[np.ndarray, "4 4"]]:
+            - K: Intrinsic matrix of shape (3, 3)
+            - T: Extrinsic matrix (world-to-camera) of shape (4, 4)
     """
     K, R, t = P_to_K_R_t(P)
     T = R_t_to_T(R, t)
@@ -509,7 +536,7 @@ def K_T_to_P(
         T: Extrinsic matrix (world-to-camera) of shape (4, 4).
 
     Returns:
-        P: Projection matrix of shape (3, 4).
+        Projection matrix of shape (3, 4).
     """
     return K @ T[:3, :]
 
@@ -647,11 +674,11 @@ def K_to_fx_fy_cx_cy(
         K: Intrinsic matrix of shape (3, 3).
 
     Returns:
-        Tuple containing:
-        - fx: Focal length in x direction
-        - fy: Focal length in y direction
-        - cx: Principal point x coordinate
-        - cy: Principal point y coordinate
+        Tuple[float, float, float, float]:
+            - fx: Focal length in x direction
+            - fy: Focal length in y direction
+            - cx: Principal point x coordinate
+            - cy: Principal point y coordinate
     """
     fx = K[0, 0]
     fy = K[1, 1]

diff --git a/camtools/geometry.py b/camtools/geometry.py
@@ -1,3 +1,7 @@
+"""
+Functions for creating and manipulating 3D geometries.
+"""
+
 import open3d as o3d
 import numpy as np
 from jaxtyping import Float