From 4962caeabdeef7e1bcc5368b2433a839c6f2ee25 Mon Sep 17 00:00:00 2001 From: pablovela5620 Date: Tue, 4 Apr 2023 08:38:30 -0500 Subject: [PATCH] add high resolution monocular priors script --- compare_mono_priors.py | 50 +++ scripts/datasets/extract_highres_cues.py | 353 ++++++++++++++++++ .../process_nerfstudio_to_sdfstudio.py | 50 ++- 3 files changed, 436 insertions(+), 17 deletions(-) create mode 100644 compare_mono_priors.py create mode 100644 scripts/datasets/extract_highres_cues.py diff --git a/compare_mono_priors.py b/compare_mono_priors.py new file mode 100644 index 00000000..8f0ba259 --- /dev/null +++ b/compare_mono_priors.py @@ -0,0 +1,50 @@ +import argparse +from pathlib import Path + +import cv2 +import rerun as rr + + +def main(lowres_path:Path, highres_path:Path): + lowres_img_paths = sorted(lowres_path.glob("*_rgb.png")) + highres_img_paths = sorted(highres_path.glob("*_rgb.png")) + lowres_depth_paths = sorted(lowres_path.glob("*_depth.png")) + highres_depth_paths = sorted(highres_path.glob("*_depth.png")) + lowres_normal_paths = sorted(lowres_path.glob("*_normal.png")) + highres_normal_paths = sorted(highres_path.glob("*_normal.png")) + + num_samples = len(lowres_img_paths) + + for i in range(num_samples): + rr.set_time_sequence("idx", i) + lowres_img = cv2.imread(str(lowres_img_paths[i])) + highres_img = cv2.imread(str(highres_img_paths[i])) + lowres_depth = cv2.imread(str(lowres_depth_paths[i])) + highres_depth = cv2.imread(str(highres_depth_paths[i])) + lowres_normal = cv2.imread(str(lowres_normal_paths[i])) + highres_normal = cv2.imread(str(highres_normal_paths[i])) + + # log priors togther to visualize easily + rr.log_image("compare-rgb/lowres", lowres_img[..., ::-1]) + rr.log_image("compare-rgb/highres", highres_img[..., ::-1]) + # rr.log_image("compare-depth/lowres", lowres_depth[..., ::-1]) + # rr.log_image("compare-depth/highres", highres_depth[..., ::-1]) + # rr.log_image("compare-normal/lowres", lowres_normal[..., ::-1]) + # rr.log_image("compare-normal/highres", highres_normal[..., ::-1]) + + rr.log_image("depth-lowres", lowres_depth[..., ::-1]) + rr.log_image("depth-highres", highres_depth[..., ::-1]) + rr.log_image("normal-lowres", lowres_normal[..., ::-1]) + rr.log_image("normal-highres", highres_normal[..., ::-1]) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="compare mono priors") + parser.add_argument('--lowres-path', type=Path, required=True, help='Path to directory containing lowres data') + parser.add_argument('--highres-path', type=Path, required=True, help='Path to directory containing highres data') + rr.script_add_args(parser) + args = parser.parse_args() + rr.script_setup(args, "my_application") + main(args.lowres_path, args.highres_path) + rr.script_teardown(args) + diff --git a/scripts/datasets/extract_highres_cues.py b/scripts/datasets/extract_highres_cues.py new file mode 100644 index 00000000..3dd67d29 --- /dev/null +++ b/scripts/datasets/extract_highres_cues.py @@ -0,0 +1,353 @@ +import torch +import numpy as np +import cv2 +import os +import glob +from pathlib import Path +from tqdm import tqdm +import matplotlib.pyplot as plt +import argparse + +# copy from vis-mvsnet +def find_files(dir, exts=['*.png', '*.jpg']): + if os.path.isdir(dir): + # types should be ['*.png', '*.jpg'] + files_grabbed = [] + for ext in exts: + files_grabbed.extend(glob.glob(os.path.join(dir, ext))) + if len(files_grabbed) > 0: + files_grabbed = sorted(files_grabbed) + return files_grabbed + else: + return [] + +# copy from vis-mvsnet +def load_cam(file: str): + """ read camera txt file """ + cam = np.zeros((2, 4, 4)) + with open(file) as f: + words = f.read().split() + # read extrinsic + for i in range(0, 4): + for j in range(0, 4): + extrinsic_index = 4 * i + j + 1 + cam[0][i][j] = words[extrinsic_index] + + # read intrinsic + for i in range(0, 3): + for j in range(0, 3): + intrinsic_index = 3 * i + j + 18 + cam[1][i][j] = words[intrinsic_index] + + return cam + +# copy from MiDaS +def compute_scale_and_shift(prediction, target, mask): + # system matrix: A = [[a_00, a_01], [a_10, a_11]] + a_00 = torch.sum(mask * prediction * prediction, (1, 2)) + a_01 = torch.sum(mask * prediction, (1, 2)) + a_11 = torch.sum(mask, (1, 2)) + + # right hand side: b = [b_0, b_1] + b_0 = torch.sum(mask * prediction * target, (1, 2)) + b_1 = torch.sum(mask * target, (1, 2)) + + # solution: x = A^-1 . b = [[a_11, -a_01], [-a_10, a_00]] / (a_00 * a_11 - a_01 * a_10) . b + x_0 = torch.zeros_like(b_0) + x_1 = torch.zeros_like(b_1) + + det = a_00 * a_11 - a_01 * a_01 + valid = det.nonzero() + + x_0[valid] = (a_11[valid] * b_0[valid] - a_01[valid] * b_1[valid]) / det[valid] + x_1[valid] = (-a_01[valid] * b_0[valid] + a_00[valid] * b_1[valid]) / det[valid] + + return x_0, x_1 + +# adatpted from https://github.com/dakshaau/ICP/blob/master/icp.py#L4 for rotation only +def best_fit_transform(A, B): + assert A.shape == B.shape + + # get number of dimensions + m = A.shape[1] + + # translate points to their centroids + AA = A + BB = B + + # rotation matrix + H = np.dot(AA.T, BB) + U, S, Vt = np.linalg.svd(H) + R = np.dot(Vt.T, U.T) + + # special reflection case + if np.linalg.det(R) < 0: + Vt[m-1,:] *= -1 + R = np.dot(Vt.T, U.T) + + return R + + +#TODO merge the following 4 function to one single function + +# align depth map in the x direction from left to right +def align_x(depth1, depth2, s1, e1, s2, e2): + assert depth1.shape[0] == depth2.shape[0] + assert depth1.shape[1] == depth2.shape[1] + + assert (e1 - s1) == (e2 - s2), f"{e1 - s1} | {e2-s2}" + # aligh depth2 to depth1 + scale, shift = compute_scale_and_shift(depth2[:, :, s2:e2], depth1[:, :, s1:e1], torch.ones_like(depth1[:, :, s1:e1])) + + depth2_aligned = scale * depth2 + shift + result = torch.ones((1, depth1.shape[1], depth1.shape[2] + depth2.shape[2] - (e1 - s1))) + + result[:, :, :s1] = depth1[:, :, :s1] + result[:, :, depth1.shape[2]:] = depth2_aligned[:, :, e2:] + + weight = np.linspace(1, 0, (e1-s1))[None, None, :] + result[:, :, s1:depth1.shape[2]] = depth1[:, :, s1:] * weight + depth2_aligned[:, :, :e2] * (1 - weight) + + return result + +# align depth map in the y direction from top to down +def align_y(depth1, depth2, s1, e1, s2, e2): + assert depth1.shape[0] == depth2.shape[0] + assert depth1.shape[2] == depth2.shape[2] + + assert (e1 - s1) == (e2 - s2) + # aligh depth2 to depth1 + scale, shift = compute_scale_and_shift(depth2[:, s2:e2, :], depth1[:, s1:e1, :], torch.ones_like(depth1[:, s1:e1, :])) + + depth2_aligned = scale * depth2 + shift + result = torch.ones((1, depth1.shape[1] + depth2.shape[1] - (e1 - s1), depth1.shape[2])) + + result[:, :s1, :] = depth1[:, :s1, :] + result[:, depth1.shape[1]:, :] = depth2_aligned[:, e2:, :] + + weight = np.linspace(1, 0, (e1-s1))[None, :, None] + result[:, s1:depth1.shape[1], :] = depth1[:, s1:, :] * weight + depth2_aligned[:, :e2, :] * (1 - weight) + + return result + +# align normal map in the x direction from left to right +def align_normal_x(normal1, normal2, s1, e1, s2, e2): + assert normal1.shape[0] == normal2.shape[0] + assert normal1.shape[1] == normal2.shape[1] + + assert (e1 - s1) == (e2 - s2) + + R = best_fit_transform(normal2[:, :, s2:e2].reshape(3, -1).T, normal1[:, :, s1:e1].reshape(3, -1).T) + + normal2_aligned = (R @ normal2.reshape(3, -1)).reshape(normal2.shape) + result = np.ones((3, normal1.shape[1], normal1.shape[2] + normal2.shape[2] - (e1 - s1))) + + result[:, :, :s1] = normal1[:, :, :s1] + result[:, :, normal1.shape[2]:] = normal2_aligned[:, :, e2:] + + weight = np.linspace(1, 0, (e1-s1))[None, None, :] + + result[:, :, s1:normal1.shape[2]] = normal1[:, :, s1:] * weight + normal2_aligned[:, :, :e2] * (1 - weight) + result = result / (np.linalg.norm(result, axis=0) + 1e-15)[None] + + return result + +# align normal map in the y direction from top to down +def align_normal_y(normal1, normal2, s1, e1, s2, e2): + assert normal1.shape[0] == normal2.shape[0] + assert normal1.shape[2] == normal2.shape[2] + + assert (e1 - s1) == (e2 - s2) + + R = best_fit_transform(normal2[:, s2:e2, :].reshape(3, -1).T, normal1[:, s1:e1, :].reshape(3, -1).T) + + normal2_aligned = (R @ normal2.reshape(3, -1)).reshape(normal2.shape) + result = np.ones((3, normal1.shape[1] + normal2.shape[1] - (e1 - s1), normal1.shape[2])) + + result[:, :s1, :] = normal1[:, :s1, :] + result[:, normal1.shape[1]:, :] = normal2_aligned[:, e2:, :] + + weight = np.linspace(1, 0, (e1-s1))[None, :, None] + + result[:, s1:normal1.shape[1], :] = normal1[:, s1:, :] * weight + normal2_aligned[:, :e2, :] * (1 - weight) + result = result / (np.linalg.norm(result, axis=0) + 1e-15)[None] + + return result + +def create_patches(image_path:Path, image_dir:Path, out_index: int)-> None: + image = cv2.imread(str(image_path)) + # assume square? + H, W = image.shape[:2] + + assert H == W == 384*2, f"image size is not 384*2, but {H}x{W}" + size = 384 + overlap = 128 # (128 + 128) -> 256 of overlap (384/3) each side overlapped with a middle section untouched + x = W // overlap + y = H // overlap + + + # crop images + for j in range(y-2): + for i in range(x-2): + image_cur = image[j*overlap:j*overlap+size, i*overlap:i*overlap+size, :] + # add _rgb to the end of the file name so that extract_monocular_cues.py can find it + target_file = image_dir / "patches" / f"{out_index:06d}_{j:02d}_{i:02d}_rgb.png" + target_file.parent.mkdir(exist_ok=True) + cv2.imwrite(str(target_file), image_cur) + + +def merge_patches(image_dir, out_index, depth_patches_path, normal_patches_path): + + H, W = 768, 768 + + assert H == W == 384*2 + overlap = 128 # (128 + 128) -> 256 of overlap (384/3) each side overlapped with a middle section untouched + x = W // overlap + y = H // overlap + + # align depth map + depths_row = [] + # align depth maps from left to right row by row + for j in range(y-2): + depths = [] + for i in range(x-2): + # depth_path = os.path.join(out_path, "%06d_%02d_%02d_depth.npy"%(out_index, j, i)) + depth_path = depth_patches_path / f"{out_index:06d}_{j:02d}_{i:02d}_depth.npy" + depth = np.load(depth_path) + depth = torch.from_numpy(depth)[None] + depths.append(depth) + + # align from left to right + depth_left = depths[0] + s1 = 128 + s2 = 0 + e2 = 128 * 2 + for depth_right in depths[1:]: + depth_left = align_x(depth_left, depth_right, s1, depth_left.shape[2], s2, e2) + s1 += 128 + depths_row.append(depth_left) + + depth_top = depths_row[0] + # align depth maps from top to down + s1 = 128 + s2 = 0 + e2 = 128 * 2 + for depth_bottom in depths_row[1:]: + depth_top = align_y(depth_top, depth_bottom, s1, depth_top.shape[1], s2, e2) + s1 += 128 + + depth_top = (depth_top - depth_top.min()) / (depth_top.max() - depth_top.min()) + + # final_depth_path = os.path.join(out_path_for_training ,"%06d_depth.png"%(out_index)) + final_depth_path = image_dir #/ "final_depth" + + final_depth_path.mkdir(exist_ok=True) + plt.imsave(final_depth_path / f"{out_index:06d}_depth.png", depth_top[0].numpy(), cmap='viridis') + np.save(final_depth_path / f"{out_index:06d}_depth.npy", depth_top.detach().cpu().numpy()[0]) + + + # normal + normals_row = [] + # align normal maps from left to right row by row + for j in range(y-2): + normals = [] + for i in range(x-2): + # normal_path = os.path.join(out_path, "%06d_%02d_%02d_normal.npy"%(out_index, j, i)) + normal_path = normal_patches_path / f"{out_index:06d}_{j:02d}_{i:02d}_normal.npy" + normal = np.load(normal_path) + normal = normal * 2. - 1. + normal = normal / (np.linalg.norm(normal, axis=0) + 1e-15)[None] + normals.append(normal) + + # align from left to right + normal_left = normals[0] + s1 = 128 + s2 = 0 + e2 = 128 * 2 + for normal_right in normals[1:]: + normal_left = align_normal_x(normal_left, normal_right, s1, normal_left.shape[2], s2, e2) + s1 += 128 + normals_row.append(normal_left) + + normal_top = normals_row[0] + # align normal maps from top to down + s1 = 128 + s2 = 0 + e2 = 128 * 2 + for normal_bottom in normals_row[1:]: + normal_top = align_normal_y(normal_top, normal_bottom, s1, normal_top.shape[1], s2, e2) + s1 += 128 + + + final_normal_path = image_dir #/ "final_normal" + final_normal_path.mkdir(exist_ok=True) + plt.imsave(final_normal_path / f"{out_index:06d}_normal.png", np.moveaxis(normal_top, [0,1, 2], [2, 0, 1]) * 0.5 + 0.5) + np.save(final_normal_path / f"{out_index:06d}_normal.npy", (normal_top + 1.) / 2.) + +def generate_monocular_priors(image_dir:Path, + omnidata_path, + pretrained_models): + assert image_dir.exists() + + image_paths = sorted(image_dir.glob("*_rgb.png")) + patches_path = image_dir / "patches" + + out_index = 0 + # create_patches + if not patches_path.exists(): + for image_path in tqdm(image_paths, desc="Creating Patches"): + create_patches(image_path, image_dir, out_index) + out_index += 1 + + # merge patches + out_index = 0 + # predict normals/depths for all patches + depth_patches_path = image_dir / 'depth_patches' + normal_patches_path = image_dir / 'normal_patches' + + num_patches = len(list(patches_path.glob("*.png"))) + num_depth_patches = len(list(depth_patches_path.glob('*.png'))) + num_normal_patches = len(list(normal_patches_path.glob('*.png'))) + + # check to make sure that monocular patches don't already exist + if num_patches != num_depth_patches: + + print("Generating mono depth...") + os.system( + f"python scripts/datasets/extract_monocular_cues.py \ + --omnidata_path {omnidata_path} \ + --pretrained_model {pretrained_models} \ + --img_path {image_dir / 'patches'} --output_path {depth_patches_path} \ + --task depth" + ) + if num_patches != num_normal_patches: + + print("Generating mono normal...") + os.system( + f"python scripts/datasets/extract_monocular_cues.py \ + --omnidata_path {omnidata_path} \ + --pretrained_model {pretrained_models} \ + --img_path {image_dir / 'patches'} --output_path {normal_patches_path} \ + --task normal" + ) + for image_path in tqdm(image_paths, desc="Merging Patches"): + merge_patches(image_dir, out_index, depth_patches_path, normal_patches_path) + out_index += 1 + +def main(): + parser = argparse.ArgumentParser(description='Generate high resolution outputs') + + parser.add_argument("--image-dir", required=True, help="directory containing images", type=Path) + parser.add_argument("--omnidata-path", dest="omnidata_path", + default="/home/user/omnidata/omnidata_tools/torch/", + help="path to omnidata model") + parser.add_argument("--pretrained-models", dest="pretrained_models", + default="/home/user/omnidata//omnidata_tools/torch/pretrained_models/", + help="path to pretrained models") + args = parser.parse_args() + + generate_monocular_priors(args.image_dir,args.omnidata_path, args.pretrained_models) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/datasets/process_nerfstudio_to_sdfstudio.py b/scripts/datasets/process_nerfstudio_to_sdfstudio.py index 9f32edbd..7ddb92d3 100644 --- a/scripts/datasets/process_nerfstudio_to_sdfstudio.py +++ b/scripts/datasets/process_nerfstudio_to_sdfstudio.py @@ -222,23 +222,36 @@ def main(args): if args.mono_prior: assert os.path.exists(args.pretrained_models), "Pretrained model path not found" assert os.path.exists(args.omnidata_path), "omnidata l path not found" - # generate mono depth and normal - print("Generating mono depth...") - os.system( - f"python scripts/datasets/extract_monocular_cues.py \ - --omnidata_path {args.omnidata_path} \ - --pretrained_model {args.pretrained_models} \ - --img_path {output_dir} --output_path {output_dir} \ - --task depth" - ) - print("Generating mono normal...") - os.system( - f"python scripts/datasets/extract_monocular_cues.py \ - --omnidata_path {args.omnidata_path} \ - --pretrained_model {args.pretrained_models} \ - --img_path {output_dir} --output_path {output_dir} \ - --task normal" - ) + if args.highres_mono_prior: + # make sure that crop mult is 2 + assert args.crop_mult == 2, "Crop mult must be 2 for highres mono prior" + # generate mono depth and normal + print("Generating High Resolution Monopriors...") + os.system( + f"python scripts/datasets/extract_highres_cues.py \ + --image-dir {output_dir} \ + --omnidata-path {args.omnidata_path} \ + --pretrained-model {args.pretrained_models} \ + " + ) + else: + # generate mono depth and normal + print("Generating mono depth...") + os.system( + f"python scripts/datasets/extract_monocular_cues.py \ + --omnidata_path {args.omnidata_path} \ + --pretrained_model {args.pretrained_models} \ + --img_path {output_dir} --output_path {output_dir} \ + --task depth" + ) + print("Generating mono normal...") + os.system( + f"python scripts/datasets/extract_monocular_cues.py \ + --omnidata_path {args.omnidata_path} \ + --pretrained_model {args.pretrained_models} \ + --img_path {output_dir} --output_path {output_dir} \ + --task normal" + ) print(f"Done! The processed data has been saved in {output_dir}") @@ -261,6 +274,9 @@ def main(args): parser.add_argument("--mono-prior", dest="mono_prior", action="store_true", help="Whether to generate mono-prior depths and normals. " "If enabled, the images will be cropped to 384*384") + parser.add_argument("--highres-mono-prior", action="store_true", + help="Whether to generate higher resolution mono-prior depths and normals. " + "If enabled, the images will be size to 768*768") parser.add_argument("--crop-mult", dest="crop_mult", type=int, default=1, help="image size will be resized to crop_mult*384, only take effect when enabling mono-prior") parser.add_argument("--omnidata-path", dest="omnidata_path",