autonomousvision · btoussai · Aug 13, 2024
diff --git a/gaussian_renderer/__init__.py b/gaussian_renderer/__init__.py
@@ -41,6 +41,8 @@ def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor,
         image_width=int(viewpoint_camera.image_width),
         tanfovx=tanfovx,
         tanfovy=tanfovy,
+        cx=viewpoint_camera.cx,
+        cy=viewpoint_camera.cy,
         kernel_size=kernel_size,
         subpixel_offset=subpixel_offset,
         bg=bg_color,
@@ -141,6 +143,8 @@ def integrate(points3D, viewpoint_camera, pc : GaussianModel, pipe, bg_color : t
         image_width=int(viewpoint_camera.image_width),
         tanfovx=tanfovx,
         tanfovy=tanfovy,
+        cx=viewpoint_camera.cx,
+        cy=viewpoint_camera.cy,
         kernel_size=kernel_size,
         subpixel_offset=subpixel_offset,
         bg=bg_color,

diff --git a/scene/cameras.py b/scene/cameras.py
@@ -12,10 +12,10 @@
 import torch
 from torch import nn
 import numpy as np
-from utils.graphics_utils import getWorld2View2, getProjectionMatrix
+from utils.graphics_utils import getWorld2View2, getProjectionMatrix, getProjectionMatrixShift
 
 class Camera(nn.Module):
-    def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask,
+    def __init__(self, colmap_id, R, T, FoVx, FoVy, focal_x, focal_y, cx, cy, image, gt_alpha_mask,
                  image_name, uid,
                  trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda"
                  ):
@@ -27,6 +27,10 @@ def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask,
         self.T = T
         self.FoVx = FoVx
         self.FoVy = FoVy
+        self.focal_x = focal_x
+        self.focal_y = focal_y
+        self.cx = cx
+        self.cy = cy
         self.image_name = image_name
 
         try:
@@ -54,7 +58,11 @@ def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask,
         self.scale = scale
 
         self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda()
-        self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda()
+        # self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda()
+        self.projection_matrix = getProjectionMatrixShift(znear=self.znear, zfar=self.zfar, 
+                                                          focal_x=self.focal_x, focal_y=self.focal_y, cx=self.cx, cy=self.cy,
+                                                          width=self.image_width, height=self.image_height,
+                                                          fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda()
         self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0)
         self.camera_center = self.world_view_transform.inverse()[3, :3]
 
@@ -69,6 +77,8 @@ def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform,
         self.image_height = height    
         self.FoVy = fovy
         self.FoVx = fovx
+        self.cx = width / 2.0
+        self.cy = height / 2.0
         self.znear = znear
         self.zfar = zfar
         self.world_view_transform = world_view_transform

diff --git a/scene/dataset_readers.py b/scene/dataset_readers.py
@@ -29,6 +29,10 @@ class CameraInfo(NamedTuple):
     T: np.array
     FovY: np.array
     FovX: np.array
+    focal_length_x: np.array
+    focal_length_y: np.array
+    cx: np.array
+    cy: np.array
     image: np.array
     image_path: str
     image_name: str
@@ -83,12 +87,16 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
         T = np.array(extr.tvec)
 
         if intr.model=="SIMPLE_PINHOLE":
-            focal_length_x = intr.params[0]
-            FovY = focal2fov(focal_length_x, height)
+            focal_length_y = focal_length_x = intr.params[0]
+            FovY = focal2fov(focal_length_y, height)
             FovX = focal2fov(focal_length_x, width)
+            cx = width / 2
+            cy = height / 2
         elif intr.model=="PINHOLE":
             focal_length_x = intr.params[0]
             focal_length_y = intr.params[1]
+            cx = intr.params[2]
+            cy = intr.params[3]
             FovY = focal2fov(focal_length_y, height)
             FovX = focal2fov(focal_length_x, width)
         else:
@@ -103,8 +111,9 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
 
         image = Image.open(image_path)
 
-        cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
-                              image_path=image_path, image_name=image_name, width=width, height=height)
+        cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, 
+                              focal_length_x=focal_length_x, focal_length_y=focal_length_y, cx=cx, cy=cy, 
+                              image=image, image_path=image_path, image_name=image_name, width=width, height=height)
         cam_infos.append(cam_info)
     sys.stdout.write('\n')
     return cam_infos

diff --git a/submodules/diff-gaussian-rasterization/cuda_rasterizer/backward.cu b/submodules/diff-gaussian-rasterization/cuda_rasterizer/backward.cu
@@ -636,8 +636,9 @@ __global__ void __launch_bounds__(BLOCK_X * BLOCK_Y)
 renderCUDA(
 	const uint2* __restrict__ ranges,
 	const uint32_t* __restrict__ point_list,
-	int W, int H,
-	float focal_x, float focal_y,
+	const int W, const int H,
+	const float focal_x, const float focal_y,
+	const float cx, const float cy,
 	const float2* __restrict__ subpixel_offset,
 	const float* __restrict__ bg_color,
 	const float2* __restrict__ points_xy_image,
@@ -679,7 +680,7 @@ renderCUDA(
 	int toDo = range.y - range.x;
 
 	// create the ray
-	float2 ray = { (pixf.x - W/2.) / focal_x, (pixf.y - H/2.) / focal_y };
+	float2 ray = { (pixf.x - cx) / focal_x, (pixf.y - cy) / focal_y };
 
 	__shared__ int collected_id[BLOCK_SIZE];
 	__shared__ float2 collected_xy[BLOCK_SIZE];
@@ -968,6 +969,7 @@ void BACKWARD::preprocess(
 	const float* viewmatrix,
 	const float* projmatrix,
 	const float focal_x, float focal_y,
+	const float cx, const float cy,
 	const float tan_fovx, float tan_fovy,
 	const float kernel_size,
 	const glm::vec3* campos,
@@ -1036,8 +1038,9 @@ void BACKWARD::render(
 	const dim3 grid, const dim3 block,
 	const uint2* ranges,
 	const uint32_t* point_list,
-	int W, int H,
-	float focal_x, float focal_y,
+	const int W, const int H,
+	const float focal_x, const float focal_y,
+	const float cx, const float cy,
 	const float2* subpixel_offset,
 	const float* bg_color,
 	const float2* means2D,
@@ -1066,6 +1069,7 @@ void BACKWARD::render(
 		point_list,
 		W, H,
 		focal_x, focal_y,
+		cx, cy,
 		subpixel_offset,
 		bg_color,
 		means2D,

diff --git a/submodules/diff-gaussian-rasterization/cuda_rasterizer/backward.h b/submodules/diff-gaussian-rasterization/cuda_rasterizer/backward.h
@@ -25,8 +25,9 @@ namespace BACKWARD
 		const dim3 grid, dim3 block,
 		const uint2* ranges,
 		const uint32_t* point_list,
-		int W, int H,
-		float focal_x, float focal_y,
+		const int W, const int H,
+		const float focal_x, const float focal_y,
+		const float cx, const float cy,
 		const float2* subpixel_offset,
 		const float* bg_color,
 		const float2* means2D,
@@ -63,8 +64,9 @@ namespace BACKWARD
 		const float* view2gaussian,
 		const float* view,
 		const float* proj,
-		const float focal_x, float focal_y,
-		const float tan_fovx, float tan_fovy,
+		const float focal_x, const float focal_y,
+		const float cx, const float cy,
+		const float tan_fovx, const float tan_fovy,
 		const float kernel_size,
 		const glm::vec3* campos,
 		float3* dL_dmean2D,

diff --git a/submodules/diff-gaussian-rasterization/cuda_rasterizer/forward.cu b/submodules/diff-gaussian-rasterization/cuda_rasterizer/forward.cu
@@ -411,8 +411,9 @@ __global__ void __launch_bounds__(BLOCK_X * BLOCK_Y)
 renderCUDA(
 	const uint2* __restrict__ ranges,
 	const uint32_t* __restrict__ point_list,
-	int W, int H,
-	float focal_x, float focal_y,
+	const int W, const int H,
+	const float focal_x, const float focal_y,
+	const float cx, const float cy,
 	const float2* __restrict__ subpixel_offset,
 	const float2* __restrict__ points_xy_image,
 	const float* __restrict__ features,
@@ -445,7 +446,7 @@ renderCUDA(
 	bool done = !inside;
 
 	// create the ray
-	float2 ray = { (pixf.x - W/2.) / focal_x, (pixf.y - H/2.) / focal_y };
+	float2 ray = { (pixf.x - cx) / focal_x, (pixf.y - cy) / focal_y };
 
 	// Load start/end range of IDs to process in bit sorted list.
 	uint2 range = ranges[block.group_index().y * horizontal_blocks + block.group_index().x];
@@ -615,8 +616,9 @@ void FORWARD::render(
 	const dim3 grid, dim3 block,
 	const uint2* ranges,
 	const uint32_t* point_list,
-	int W, int H,
-	float focal_x, float focal_y,
+	const int W, const int H,
+	const float focal_x, const float focal_y,
+	const float cx, const float cy,
 	const float2* subpixel_offset,
 	const float2* means2D,
 	const float* colors,
@@ -639,6 +641,7 @@ void FORWARD::render(
 		point_list,
 		W, H,
 		focal_x, focal_y,
+		cx, cy,
 		subpixel_offset,
 		means2D,
 		colors,
@@ -671,9 +674,10 @@ void FORWARD::preprocess(int P, int D, int M,
 	const float* viewmatrix,
 	const float* projmatrix,
 	const glm::vec3* cam_pos,
-	const int W, int H,
-	const float focal_x, float focal_y,
-	const float tan_fovx, float tan_fovy,
+	const int W, const int H,
+	const float focal_x, const float focal_y,
+	const float cx, const float cy,
+	const float tan_fovx, const float tan_fovy,
 	const float kernel_size,
 	int* radii,
 	float2* means2D,
@@ -725,9 +729,10 @@ __global__ void preprocessPointsCUDA(int P, int D, int M,
 	const float* viewmatrix,
 	const float* projmatrix,
 	const glm::vec3* cam_pos,
-	const int W, int H,
-	const float tan_fovx, float tan_fovy,
-	const float focal_x, float focal_y,
+	const int W, const int H,
+	const float tan_fovx, const float tan_fovy,
+	const float focal_x, const float focal_y,
+	const float cx, const float cy,
 	float2* points2D,
 	float* depths,
 	const dim3 grid,
@@ -753,7 +758,7 @@ __global__ void preprocessPointsCUDA(int P, int D, int M,
 	float p_w = 1.0f / (p_hom.w + 0.0000001f);
 	float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
 
-	float2 point_image = {focal_x * p_view.x / (p_view.z + 0.0000001f) + W/2., focal_y * p_view.y / (p_view.z + 0.0000001f) + H/2.};
+	float2 point_image = {focal_x * p_view.x / (p_view.z + 0.0000001f) + cx, focal_y * p_view.y / (p_view.z + 0.0000001f) + cy};
 
 	// If the point is outside the image, quit.
 	if (point_image.x < 0 || point_image.x >= W || point_image.y < 0 || point_image.y >= H)
@@ -770,9 +775,10 @@ void FORWARD::preprocess_points(int PN, int D, int M,
 		const float* viewmatrix,
 		const float* projmatrix,
 		const glm::vec3* cam_pos,
-		const int W, int H,
-		const float focal_x, float focal_y,
-		const float tan_fovx, float tan_fovy,
+		const int W, const int H,
+		const float focal_x, const float focal_y,
+		const float cx, const float cy,
+		const float tan_fovx, const float tan_fovy,
 		float2* points2D,
 		float* depths,
 		const dim3 grid,
@@ -788,6 +794,7 @@ void FORWARD::preprocess_points(int PN, int D, int M,
 		W, H,
 		tan_fovx, tan_fovy,
 		focal_x, focal_y,
+		cx, cy,
 		points2D,
 		depths,
 		grid,
@@ -807,8 +814,9 @@ integrateCUDA(
 	const uint2* __restrict__ point_ranges,
 	const uint32_t* __restrict__ gaussian_list,
 	const uint32_t* __restrict__ point_list,
-	int W, int H,
-	float focal_x, float focal_y,
+	const int W, const int H,
+	const float focal_x, const float focal_y,
+	const float cx, const float cy,
 	const float2* __restrict__ subpixel_offset,
 	const float2* __restrict__ points2D,
 	const float* __restrict__ features,
@@ -845,7 +853,7 @@ integrateCUDA(
 	const float depth_input = inside ? subpixel_offset[pix_id].x : 0.0f;
 
 	// create the ray
-	float2 ray = { (pixf.x - W/2.) / focal_x, (pixf.y - H/2.) / focal_y };
+	float2 ray = { (pixf.x - cx) / focal_x, (pixf.y - cy) / focal_y };
 
 	// Load start/end range of IDs to process in bit sorted list.
 	uint2 range = gaussian_ranges[block.group_index().y * horizontal_blocks + block.group_index().x];
@@ -917,7 +925,7 @@ integrateCUDA(
 
 			bool used = false;
 			for (int k = 0; k < 5; ++k){
-				float3 ray_point = { (pixf.x + offset_xs[k] - W/2.) / focal_x, (pixf.y + offset_ys[k] - H/2.) / focal_y, 1.0f };
+				float3 ray_point = { (pixf.x + offset_xs[k] - cx) / focal_x, (pixf.y + offset_ys[k] - cy) / focal_y, 1.0f };
 
 				const float normal[3] = { view2gaussian_j[0] * ray_point.x + view2gaussian_j[1] * ray_point.y + view2gaussian_j[2], 
 									      view2gaussian_j[1] * ray_point.x + view2gaussian_j[3] * ray_point.y + view2gaussian_j[4],
@@ -1155,7 +1163,7 @@ integrateCUDA(
 				// iterate over all projected points
 				for (int k = 0; k < num_projected; k++){
 					// create the ray
-					float3 ray_point = { (projected_xy[k].x - W/2.) / focal_x, (projected_xy[k].y - H/2.) / focal_y, 1.0 };
+					float3 ray_point = { (projected_xy[k].x - cx) / focal_x, (projected_xy[k].y - cy) / focal_y, 1.0 };
 					float ray_depth = projected_depth[k];
 
 					const float normal[3] = { view2gaussian_j[0] * ray_point.x + view2gaussian_j[1] * ray_point.y + view2gaussian_j[2], 
@@ -1223,8 +1231,9 @@ void FORWARD::integrate(
 	const uint2* point_ranges,
 	const uint32_t* gaussian_list,
 	const uint32_t* point_list,
-	int W, int H,
-	float focal_x, float focal_y,
+	const int W, const int H,
+	const float focal_x, const float focal_y,
+	const float cx, const float cy,
 	const float2* subpixel_offset,
 	const float2* points2D,
 	const float* colors,
@@ -1251,6 +1260,7 @@ void FORWARD::integrate(
 		point_list,
 		W, H,
 		focal_x, focal_y,
+		cx, cy,
 		subpixel_offset,
 		points2D,
 		colors,

diff --git a/submodules/diff-gaussian-rasterization/cuda_rasterizer/forward.h b/submodules/diff-gaussian-rasterization/cuda_rasterizer/forward.h
@@ -35,9 +35,10 @@ namespace FORWARD
 		const float* viewmatrix,
 		const float* projmatrix,
 		const glm::vec3* cam_pos,
-		const int W, int H,
-		const float focal_x, float focal_y,
-		const float tan_fovx, float tan_fovy,
+		const int W, const int H,
+		const float focal_x, const float focal_y,
+		const float cx, const float cy,
+		const float tan_fovx, const float tan_fovy,
 		const float kernel_size,
 		int* radii,
 		float2* points_xy_image,
@@ -55,8 +56,9 @@ namespace FORWARD
 		const dim3 grid, dim3 block,
 		const uint2* ranges,
 		const uint32_t* point_list,
-		int W, int H,
-		float focal_x, float focal_y,
+		const int W, const int H,
+		const float focal_x, const float focal_y,
+		const float cx, const float cy,
 		const float2* subpixel_offset,
 		const float2* points_xy_image,
 		const float* features,
@@ -80,9 +82,10 @@ namespace FORWARD
 		const float* viewmatrix,
 		const float* projmatrix,
 		const glm::vec3* cam_pos,
-		const int W, int H,
-		const float focal_x, float focal_y,
-		const float tan_fovx, float tan_fovy,
+		const int W, const int H,
+		const float focal_x, const float focal_y,
+		const float cx, const float cy,
+		const float tan_fovx, const float tan_fovy,
 		float2* points2D,
 		float* depths,
 		const dim3 grid,
@@ -96,8 +99,9 @@ namespace FORWARD
 		const uint2* point_ranges,
 		const uint32_t* gaussian_list,
 		const uint32_t* point_list,
-		int W, int H,
-		float focal_x, float focal_y,
+		const int W, const int H,
+		const float focal_x, const float focal_y,
+		const float cx, const float cy,
 		const float2* subpixel_offset,
 		const float2* points2D,
 		const float* features,