diff --git a/droid_slam/data_readers/rgbd_utils.py b/droid_slam/data_readers/rgbd_utils.py index 1e674864..f3c726e0 100644 --- a/droid_slam/data_readers/rgbd_utils.py +++ b/droid_slam/data_readers/rgbd_utils.py @@ -2,7 +2,7 @@ import os.path as osp import torch -from lietorch import SE3 +from thirdparty.lietorch.lietorch import SE3 import geom.projective_ops as pops from scipy.spatial.transform import Rotation diff --git a/droid_slam/data_readers/tartan.py b/droid_slam/data_readers/tartan.py index 49780ca9..a98f25e6 100644 --- a/droid_slam/data_readers/tartan.py +++ b/droid_slam/data_readers/tartan.py @@ -6,7 +6,7 @@ import os import os.path as osp -from lietorch import SE3 +from thirdparty.lietorch.lietorch import SE3 from .base import RGBDDataset from .stream import RGBDStream diff --git a/droid_slam/depth_video.py b/droid_slam/depth_video.py index d209ece0..d87a41cc 100644 --- a/droid_slam/depth_video.py +++ b/droid_slam/depth_video.py @@ -1,6 +1,6 @@ import numpy as np import torch -import lietorch +from thirdparty.lietorch.lietorch import SE3 import droid_backends from torch.multiprocessing import Process, Queue, Lock, Value @@ -139,7 +139,7 @@ def normalize(self): def reproject(self, ii, jj): """ project points from ii -> jj """ ii, jj = DepthVideo.format_indicies(ii, jj) - Gs = lietorch.SE3(self.poses[None]) + Gs = SE3(self.poses[None]) coords, valid_mask = \ pops.projective_transform(Gs, self.disps[None], self.intrinsics[None], ii, jj) diff --git a/droid_slam/droid_backend.py b/droid_slam/droid_backend.py index 665eee0e..bce15c7b 100644 --- a/droid_slam/droid_backend.py +++ b/droid_slam/droid_backend.py @@ -2,7 +2,7 @@ import lietorch import numpy as np -from lietorch import SE3 +from thirdparty.lietorch.lietorch import SE3 from factor_graph import FactorGraph @@ -20,7 +20,7 @@ def __init__(self, net, video, args): self.backend_thresh = args.backend_thresh self.backend_radius = args.backend_radius self.backend_nms = args.backend_nms - + @torch.no_grad() def __call__(self, steps=12): """ main update """ @@ -31,9 +31,9 @@ def __call__(self, steps=12): graph = FactorGraph(self.video, self.update_op, corr_impl="alt", max_factors=16*t, upsample=self.upsample) - graph.add_proximity_factors(rad=self.backend_radius, - nms=self.backend_nms, - thresh=self.backend_thresh, + graph.add_proximity_factors(rad=self.backend_radius, + nms=self.backend_nms, + thresh=self.backend_thresh, beta=self.beta) graph.update_lowmem(steps=steps) diff --git a/droid_slam/droid_frontend.py b/droid_slam/droid_frontend.py index 5d2c56d6..ebadceda 100644 --- a/droid_slam/droid_frontend.py +++ b/droid_slam/droid_frontend.py @@ -2,7 +2,7 @@ import lietorch import numpy as np -from lietorch import SE3 +from thirdparty.lietorch.lietorch import SE3 from factor_graph import FactorGraph diff --git a/droid_slam/droid_net.py b/droid_slam/droid_net.py index 0e360314..639a75d0 100644 --- a/droid_slam/droid_net.py +++ b/droid_slam/droid_net.py @@ -9,7 +9,6 @@ from modules.gru import ConvGRU from modules.clipping import GradientClip -from lietorch import SE3 from geom.ba import BA import geom.projective_ops as pops @@ -77,6 +76,23 @@ def forward(self, net, ii): class UpdateModule(nn.Module): def __init__(self): + """ + Initialize the UpdateModule class. + This method sets up the neural network layers and components used in the UpdateModule. + It includes the following components: + - corr_encoder: A sequential model consisting of two convolutional layers with ReLU activations. + It processes the correlation planes. + - flow_encoder: A sequential model consisting of two convolutional layers with ReLU activations. + It processes the optical flow. + - weight: A sequential model consisting of two convolutional layers with ReLU activations, + a GradientClip layer, and a Sigmoid activation. It computes the weights. + - delta: A sequential model consisting of two convolutional layers with ReLU activations + and a GradientClip layer. It computes the delta values. + - gru: A ConvGRU layer that processes the concatenated features from corr_encoder, flow_encoder, and other inputs. + - agg: A GraphAgg layer that performs graph aggregation. + The method also calculates the number of correlation planes (cor_planes) based on the input dimensions. + """ + super(UpdateModule, self).__init__() cor_planes = 4 * (2*3 + 1)**2 diff --git a/droid_slam/factor_graph.py b/droid_slam/factor_graph.py index eb605d63..49fc01ea 100644 --- a/droid_slam/factor_graph.py +++ b/droid_slam/factor_graph.py @@ -3,7 +3,7 @@ import numpy as np import matplotlib.pyplot as plt -from lietorch import SE3 +from thirdparty.lietorch.lietorch import SE3 from modules.corr import CorrBlock, AltCorrBlock import geom.projective_ops as pops @@ -119,7 +119,7 @@ def add_factors(self, ii, jj, remove=False): inp = self.video.inps[ii].to(self.device).unsqueeze(0) self.inp = inp if self.inp is None else torch.cat([self.inp, inp], 1) - with torch.cuda.amp.autocast(enabled=False): + with torch.amp.autocast('cuda', enabled=False): target, _ = self.video.reproject(ii, jj) weight = torch.zeros_like(target) diff --git a/droid_slam/geom/chol.py b/droid_slam/geom/chol.py index 86bc8f9b..21a20b99 100644 --- a/droid_slam/geom/chol.py +++ b/droid_slam/geom/chol.py @@ -1,6 +1,5 @@ import torch import torch.nn.functional as F -import geom.projective_ops as pops class CholeskySolver(torch.autograd.Function): @staticmethod diff --git a/droid_slam/geom/losses.py b/droid_slam/geom/losses.py index 7fa91cf3..b12048ba 100644 --- a/droid_slam/geom/losses.py +++ b/droid_slam/geom/losses.py @@ -1,7 +1,7 @@ from collections import OrderedDict import numpy as np import torch -from lietorch import SO3, SE3, Sim3 +from thirdparty.lietorch.lietorch import SO3, SE3, Sim3 from .graph_utils import graph_to_edge_list from .projective_ops import projective_transform diff --git a/droid_slam/geom/projective_ops.py b/droid_slam/geom/projective_ops.py index ad31ffde..e7c3318a 100644 --- a/droid_slam/geom/projective_ops.py +++ b/droid_slam/geom/projective_ops.py @@ -1,7 +1,7 @@ import torch import torch.nn.functional as F -from lietorch import SE3, Sim3 +from thirdparty.lietorch.lietorch import SE3, Sim3 MIN_DEPTH = 0.2 @@ -9,6 +9,17 @@ def extract_intrinsics(intrinsics): return intrinsics[...,None,None,:].unbind(dim=-1) def coords_grid(ht, wd, **kwargs): + """ + Generates a grid of coordinates. + Args: + ht (int): Height of the grid. + wd (int): Width of the grid. + **kwargs: Additional arguments to be passed to the `to` method of the tensors. + Returns: + torch.Tensor: A tensor of shape (ht, wd, 2) containing the coordinates grid. + The first channel contains the x-coordinates and the second channel contains the y-coordinates. + """ + y, x = torch.meshgrid( torch.arange(ht).to(**kwargs).float(), torch.arange(wd).to(**kwargs).float()) @@ -100,7 +111,7 @@ def projective_transform(poses, depths, intrinsics, ii, jj, jacobian=False, retu X0, Jz = iproj(depths[:,ii], intrinsics[:,ii], jacobian=jacobian) # transform - Gij = poses[:,jj] * poses[:,ii].inv() + Gij = poses[:,jj] * poses[:,ii].inv() # TODO: se rompe aqui!!! Gij.data[:,ii==jj] = torch.as_tensor([-0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], device="cuda") X1, Ja = actp(Gij, X0, jacobian=jacobian) diff --git a/droid_slam/modules/corr.py b/droid_slam/modules/corr.py index 6a1e32ad..af5eff8f 100644 --- a/droid_slam/modules/corr.py +++ b/droid_slam/modules/corr.py @@ -22,6 +22,19 @@ def backward(ctx, grad_output): class CorrBlock: def __init__(self, fmap1, fmap2, num_levels=4, radius=3): + """ + Initializes the CorrBlock class. + Args: + fmap1 (torch.Tensor): The first feature map tensor. + fmap2 (torch.Tensor): The second feature map tensor. + num_levels (int, optional): The number of levels in the correlation pyramid. Default is 4. + radius (int, optional): The radius for correlation. Default is 3. + Attributes: + num_levels (int): The number of levels in the correlation pyramid. + radius (int): The radius for correlation. + corr_pyramid (list): A list to store the correlation pyramid. + """ + self.num_levels = num_levels self.radius = radius self.corr_pyramid = [] diff --git a/droid_slam/modules/extractor.py b/droid_slam/modules/extractor.py index 5807d45f..543f5709 100644 --- a/droid_slam/modules/extractor.py +++ b/droid_slam/modules/extractor.py @@ -181,9 +181,16 @@ def _make_layer(self, dim, stride=1): return nn.Sequential(*layers) def forward(self, x): - b, n, c1, h1, w1 = x.shape - x = x.view(b*n, c1, h1, w1) - + batch_size, num_views, \ + input_channels, input_height, input_width = x.shape + # Flatten batch and views together + x = x.view( + batch_size*num_views, + input_channels, + input_height, + input_width + ) + # Apply convolutions x = self.conv1(x) x = self.norm1(x) x = self.relu1(x) @@ -194,5 +201,5 @@ def forward(self, x): x = self.conv2(x) - _, c2, h2, w2 = x.shape - return x.view(b, n, c2, h2, w2) + batchxviews_size, output_channels, output_height, output_width = x.shape + return x.view(batch_size, num_views, output_channels, output_height, output_width) diff --git a/droid_slam/motion_filter.py b/droid_slam/motion_filter.py index 5cb18d9d..d39d7ebf 100644 --- a/droid_slam/motion_filter.py +++ b/droid_slam/motion_filter.py @@ -1,7 +1,6 @@ import cv2 import torch -import lietorch - +from thirdparty.lietorch.lietorch import SE3 from collections import OrderedDict from droid_net import DroidNet @@ -15,7 +14,7 @@ class MotionFilter: def __init__(self, net, video, thresh=2.5, device="cuda:0"): # split net modules - self.cnet = net.cnet + self.context_encoder = net.cnet self.fnet = net.fnet self.update = net.update @@ -28,55 +27,66 @@ def __init__(self, net, video, thresh=2.5, device="cuda:0"): # mean, std for image normalization self.MEAN = torch.as_tensor([0.485, 0.456, 0.406], device=self.device)[:, None, None] self.STDV = torch.as_tensor([0.229, 0.224, 0.225], device=self.device)[:, None, None] - - @torch.cuda.amp.autocast(enabled=True) + + @torch.autocast("cuda", enabled=True) def __context_encoder(self, image): """ context features """ - net, inp = self.cnet(image).split([128,128], dim=2) - return net.tanh().squeeze(0), inp.relu().squeeze(0) + feature_map = self.context_encoder(image) + context_features, input_features = feature_map.split([128, 128], dim=2) + return context_features.tanh().squeeze(0), input_features.relu().squeeze(0) - @torch.cuda.amp.autocast(enabled=True) + @torch.autocast("cuda", enabled=True) def __feature_encoder(self, image): """ features for correlation volume """ return self.fnet(image).squeeze(0) - @torch.cuda.amp.autocast(enabled=True) + @torch.autocast("cuda", enabled=True) @torch.no_grad() def track(self, tstamp, image, depth=None, intrinsics=None): """ main update operation - run on every frame in video """ - Id = lietorch.SE3.Identity(1,).data.squeeze() - ht = image.shape[-2] // 8 - wd = image.shape[-1] // 8 + IdentityTransformation = SE3.Identity(1,).data.squeeze() + height_div_8 = image.shape[-2] // 8 + width_div_8 = image.shape[-1] // 8 # normalize images - inputs = image[None, :, [2,1,0]].to(self.device) / 255.0 - inputs = inputs.sub_(self.MEAN).div_(self.STDV) + normalized_image_input = image[None, :, [2,1,0]].to(self.device) / 255.0 + normalized_image_input = normalized_image_input.sub_(self.MEAN).div_(self.STDV) # extract features - gmap = self.__feature_encoder(inputs) + image_feature_representation = self.__feature_encoder(normalized_image_input) ### always add first frame to the depth video ### if self.video.counter.value == 0: - net, inp = self.__context_encoder(inputs[:,[0]]) - self.net, self.inp, self.fmap = net, inp, gmap - self.video.append(tstamp, image[0], Id, 1.0, depth, intrinsics / 8.0, gmap, net[0,0], inp[0,0]) + context_features, context_input_features = self.__context_encoder(normalized_image_input[:,[0]]) + self.context_features, self.context_input_features, self.image_feature_map = context_features, context_input_features, image_feature_representation + self.video.append( + tstamp, + image[0], + IdentityTransformation, + 1.0, # Initial pose = Identity + depth, + intrinsics / 8.0, + image_feature_representation, + context_features[0,0], + context_input_features[0,0] + ) ### only add new frame if there is enough motion ### - else: + else: # index correlation volume - coords0 = pops.coords_grid(ht, wd, device=self.device)[None,None] - corr = CorrBlock(self.fmap[None,[0]], gmap[None,[0]])(coords0) + grid_coordinates = pops.coords_grid(height_div_8, width_div_8, device=self.device)[None,None] + correlation_features = CorrBlock(self.image_feature_map[None,[0]], image_feature_representation[None,[0]])(grid_coordinates) # approximate flow magnitude using 1 update iteration - _, delta, weight = self.update(self.net[None], self.inp[None], corr) + _, delta, weight = self.update(self.context_features[None], self.context_input_features[None], correlation_features) # check motion magnitue / add new frame to video if delta.norm(dim=-1).mean().item() > self.thresh: self.count = 0 - net, inp = self.__context_encoder(inputs[:,[0]]) - self.net, self.inp, self.fmap = net, inp, gmap - self.video.append(tstamp, image[0], None, None, depth, intrinsics / 8.0, gmap, net[0], inp[0]) + context_features, context_input_features = self.__context_encoder(normalized_image_input[:,[0]]) + self.context_features, self.context_input_features, self.image_feature_map = context_features, context_input_features, image_feature_representation + self.video.append(tstamp, image[0], None, None, depth, intrinsics / 8.0, image_feature_representation, context_features[0], context_input_features[0]) else: self.count += 1 diff --git a/droid_slam/trajectory_filler.py b/droid_slam/trajectory_filler.py index efa6269b..fcebeb5f 100644 --- a/droid_slam/trajectory_filler.py +++ b/droid_slam/trajectory_filler.py @@ -2,7 +2,8 @@ import torch import lietorch -from lietorch import SE3 +from thirdparty.lietorch.lietorch import SE3 +from thirdparty.lietorch.lietorch.groups import cat from collections import OrderedDict from factor_graph import FactorGraph from droid_net import DroidNet @@ -100,5 +101,5 @@ def __call__(self, image_stream): pose_list += self.__fill(tstamps, images, intrinsics) # stitch pose segments together - return lietorch.cat(pose_list, 0) + return cat(pose_list, 0) diff --git a/droid_slam/visualization.py b/droid_slam/visualization.py index b28cc755..32154676 100644 --- a/droid_slam/visualization.py +++ b/droid_slam/visualization.py @@ -7,7 +7,7 @@ import numpy as np import open3d as o3d -from lietorch import SE3 +from thirdparty.lietorch.lietorch import SE3 import geom.projective_ops as pops CAM_POINTS = np.array([ diff --git a/evaluation_scripts/test_euroc.py b/evaluation_scripts/test_euroc.py index d0214f5f..4010f93b 100644 --- a/evaluation_scripts/test_euroc.py +++ b/evaluation_scripts/test_euroc.py @@ -18,6 +18,7 @@ + def show_image(image): image = image.permute(1, 2, 0).cpu().numpy() cv2.imshow('image', image / 255.0) @@ -44,7 +45,7 @@ def image_stream(datapath, image_size=[320, 512], stereo=False, stride=1): 0.003680398547259526, 0.9999684752771629, -0.007035845251224894, -0.007729688520722713, 0.007064130529506649, 0.999945173484644 ]).reshape(3,3) - + P_r = np.array([435.2046959714599, 0, 367.4517211914062, -47.90639384423901, 0, 435.2046959714599, 252.2008514404297, 0, 0, 0, 1, 0]).reshape(3,4) map_r = cv2.initUndistortRectifyMap(K_r, d_r, R_r, P_r[:3,:3], (752, 480), cv2.CV_32F) @@ -58,15 +59,15 @@ def image_stream(datapath, image_size=[320, 512], stereo=False, stride=1): for t, (imgL, imgR) in enumerate(zip(images_left, images_right)): if stereo and not os.path.isfile(imgR): continue - tstamp = float(imgL.split('/')[-1][:-4]) + tstamp = float(imgL.split(os.sep)[-1][:-4]) images = [cv2.remap(cv2.imread(imgL), map_l[0], map_l[1], interpolation=cv2.INTER_LINEAR)] if stereo: images += [cv2.remap(cv2.imread(imgR), map_r[0], map_r[1], interpolation=cv2.INTER_LINEAR)] - + images = torch.from_numpy(np.stack(images, 0)) images = images.permute(0, 3, 1, 2).to("cuda:0", dtype=torch.float32) images = F.interpolate(images, image_size, mode="bilinear", align_corners=False) - + intrinsics = torch.as_tensor(intrinsics_vec).cuda() intrinsics[0] *= image_size[1] / wd0 intrinsics[1] *= image_size[0] / ht0 @@ -98,6 +99,9 @@ def image_stream(datapath, image_size=[320, 512], stereo=False, stride=1): parser.add_argument("--backend_thresh", type=float, default=24.0) parser.add_argument("--backend_radius", type=int, default=2) parser.add_argument("--backend_nms", type=int, default=2) + + parser.add_argument("--upsample", action="store_true", default=False) + args = parser.parse_args() torch.multiprocessing.set_start_method('spawn') @@ -107,11 +111,11 @@ def image_stream(datapath, image_size=[320, 512], stereo=False, stride=1): droid = Droid(args) time.sleep(5) - for (t, image, intrinsics) in tqdm(image_stream(args.datapath, stereo=args.stereo, stride=2)): droid.track(t, image, intrinsics=intrinsics) - traj_est = droid.terminate(image_stream(args.datapath, stride=1)) + img_stream = image_stream(args.datapath, stride=1) + traj_est = droid.terminate(img_stream) ### run evaluation ### @@ -121,9 +125,11 @@ def image_stream(datapath, image_size=[320, 512], stereo=False, stride=1): from evo.core import sync import evo.main_ape as main_ape from evo.core.metrics import PoseRelation + from evo.tools.plot import PlotMode + import matplotlib.pyplot as plt images_list = sorted(glob.glob(os.path.join(args.datapath, 'mav0/cam0/data/*.png'))) - tstamps = [float(x.split('/')[-1][:-4]) for x in images_list] + tstamps = [float(x.split('\\')[-1][:-4]) for x in images_list] traj_est = PoseTrajectory3D( positions_xyz=1.10 * traj_est[:,:3], @@ -139,4 +145,15 @@ def image_stream(datapath, image_size=[320, 512], stereo=False, stride=1): print(result) + # Plot trajectories + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + ax.plot(traj_ref.positions_xyz[:, 0], traj_ref.positions_xyz[:, 1], traj_ref.positions_xyz[:, 2], label='Reference') + ax.plot(traj_est.positions_xyz[:, 0], traj_est.positions_xyz[:, 1], traj_est.positions_xyz[:, 2], label='Estimate') + ax.set_xlabel('X') + ax.set_ylabel('Y') + ax.set_label('Z') + ax.legend() + plt.show() + diff --git a/src/altcorr_kernel.cu b/src/altcorr_kernel.cu index f418f5cd..b53be293 100644 --- a/src/altcorr_kernel.cu +++ b/src/altcorr_kernel.cu @@ -306,7 +306,7 @@ std::vector altcorr_cuda_forward( const dim3 threads(BLOCK_H, BLOCK_W); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(fmap1.type(), "altcorr_forward_kernel", ([&] { + AT_DISPATCH_FLOATING_TYPES_AND_HALF(fmap1.scalar_type(), "altcorr_forward_kernel", ([&] { altcorr_forward_kernel<<>>( fmap1.packed_accessor32(), fmap2.packed_accessor32(), diff --git a/src/correlation_kernels.cu b/src/correlation_kernels.cu index 2812067b..df5735cf 100644 --- a/src/correlation_kernels.cu +++ b/src/correlation_kernels.cu @@ -142,7 +142,7 @@ std::vector corr_index_cuda_forward( torch::Tensor corr = torch::zeros( {batch_size, 2*radius+1, 2*radius+1, ht, wd}, opts); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(volume.type(), "sampler_forward_kernel", ([&] { + AT_DISPATCH_FLOATING_TYPES_AND_HALF(volume.scalar_type(), "sampler_forward_kernel", ([&] { corr_index_forward_kernel<<>>( volume.packed_accessor32(), coords.packed_accessor32(), @@ -173,7 +173,7 @@ std::vector corr_index_cuda_backward( const dim3 threads(BLOCK, BLOCK); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(volume.type(), "sampler_backward_kernel", ([&] { + AT_DISPATCH_FLOATING_TYPES_AND_HALF(volume.scalar_type(), "sampler_backward_kernel", ([&] { corr_index_backward_kernel<<>>( coords.packed_accessor32(), corr_grad.packed_accessor32(), diff --git a/src/droid_kernels.cu b/src/droid_kernels.cu index 16ac697f..de7ec2de 100644 --- a/src/droid_kernels.cu +++ b/src/droid_kernels.cu @@ -16,9 +16,16 @@ #include #include +#ifdef _WIN32 + #include + typedef int64_t LongType; +#else + typedef long LongType; +#endif + typedef Eigen::SparseMatrix SpMat; typedef Eigen::Triplet T; -typedef std::vector> graph_t; +typedef std::vector> graph_t; typedef std::vector tensor_list_t; @@ -179,8 +186,8 @@ __global__ void projective_transform_kernel( const torch::PackedTensorAccessor32 poses, const torch::PackedTensorAccessor32 disps, const torch::PackedTensorAccessor32 intrinsics, - const torch::PackedTensorAccessor32 ii, - const torch::PackedTensorAccessor32 jj, + const torch::PackedTensorAccessor32 ii, + const torch::PackedTensorAccessor32 jj, torch::PackedTensorAccessor32 Hs, torch::PackedTensorAccessor32 vs, torch::PackedTensorAccessor32 Eii, @@ -428,8 +435,8 @@ __global__ void projmap_kernel( const torch::PackedTensorAccessor32 poses, const torch::PackedTensorAccessor32 disps, const torch::PackedTensorAccessor32 intrinsics, - const torch::PackedTensorAccessor32 ii, - const torch::PackedTensorAccessor32 jj, + const torch::PackedTensorAccessor32 ii, + const torch::PackedTensorAccessor32 jj, torch::PackedTensorAccessor32 coords, torch::PackedTensorAccessor32 valid) { @@ -519,8 +526,8 @@ __global__ void frame_distance_kernel( const torch::PackedTensorAccessor32 poses, const torch::PackedTensorAccessor32 disps, const torch::PackedTensorAccessor32 intrinsics, - const torch::PackedTensorAccessor32 ii, - const torch::PackedTensorAccessor32 jj, + const torch::PackedTensorAccessor32 ii, + const torch::PackedTensorAccessor32 jj, torch::PackedTensorAccessor32 dist, const float beta) { @@ -662,7 +669,7 @@ __global__ void depth_filter_kernel( const torch::PackedTensorAccessor32 poses, const torch::PackedTensorAccessor32 disps, const torch::PackedTensorAccessor32 intrinsics, - const torch::PackedTensorAccessor32 inds, + const torch::PackedTensorAccessor32 inds, const torch::PackedTensorAccessor32 thresh, torch::PackedTensorAccessor32 counter) { @@ -853,8 +860,8 @@ __global__ void iproj_kernel( __global__ void accum_kernel( const torch::PackedTensorAccessor32 inps, - const torch::PackedTensorAccessor32 ptrs, - const torch::PackedTensorAccessor32 idxs, + const torch::PackedTensorAccessor32 ptrs, + const torch::PackedTensorAccessor32 idxs, torch::PackedTensorAccessor32 outs) { @@ -933,7 +940,7 @@ __global__ void pose_retr_kernel( __global__ void disp_retr_kernel( torch::PackedTensorAccessor32 disps, const torch::PackedTensorAccessor32 dz, - const torch::PackedTensorAccessor32 inds) + const torch::PackedTensorAccessor32 inds) { const int i = inds[blockIdx.x]; const int ht = disps.size(1); @@ -950,9 +957,9 @@ torch::Tensor accum_cuda(torch::Tensor data, torch::Tensor ix, torch::Tensor jx) torch::Tensor jx_cpu = jx.to(torch::kCPU); torch::Tensor inds = torch::argsort(ix_cpu); - long* ix_data = ix_cpu.data_ptr(); - long* jx_data = jx_cpu.data_ptr(); - long* kx_data = inds.data_ptr(); + LongType* ix_data = ix_cpu.data_ptr(); + LongType* jx_data = jx_cpu.data_ptr(); + LongType* kx_data = inds.data_ptr(); int count = jx.size(0); std::vector cols; @@ -960,7 +967,7 @@ torch::Tensor accum_cuda(torch::Tensor data, torch::Tensor ix, torch::Tensor jx) torch::Tensor ptrs_cpu = torch::zeros({count+1}, torch::TensorOptions().dtype(torch::kInt64)); - long* ptrs_data = ptrs_cpu.data_ptr(); + LongType* ptrs_data = ptrs_cpu.data_ptr(); ptrs_data[0] = 0; int i = 0; @@ -973,10 +980,10 @@ torch::Tensor accum_cuda(torch::Tensor data, torch::Tensor ix, torch::Tensor jx) ptrs_data[j+1] = cols.size(); } - torch::Tensor idxs_cpu = torch::zeros({long(cols.size())}, + torch::Tensor idxs_cpu = torch::zeros({LongType(cols.size())}, torch::TensorOptions().dtype(torch::kInt64)); - long* idxs_data = idxs_cpu.data_ptr(); + LongType* idxs_data = idxs_cpu.data_ptr(); for (int i=0; i>>( data.packed_accessor32(), - ptrs.packed_accessor32(), - idxs.packed_accessor32(), + ptrs.packed_accessor32(), + idxs.packed_accessor32(), out.packed_accessor32()); return out; @@ -1001,7 +1008,7 @@ torch::Tensor accum_cuda(torch::Tensor data, torch::Tensor ix, torch::Tensor jx) __global__ void EEt6x6_kernel( const torch::PackedTensorAccessor32 E, const torch::PackedTensorAccessor32 Q, - const torch::PackedTensorAccessor32 idx, + const torch::PackedTensorAccessor32 idx, torch::PackedTensorAccessor32 S) { @@ -1060,7 +1067,7 @@ __global__ void Ev6x1_kernel( const torch::PackedTensorAccessor32 E, const torch::PackedTensorAccessor32 Q, const torch::PackedTensorAccessor32 w, - const torch::PackedTensorAccessor32 idx, + const torch::PackedTensorAccessor32 idx, torch::PackedTensorAccessor32 v) { const int D = E.size(2); @@ -1095,7 +1102,7 @@ __global__ void Ev6x1_kernel( __global__ void EvT6x1_kernel( const torch::PackedTensorAccessor32 E, const torch::PackedTensorAccessor32 x, - const torch::PackedTensorAccessor32 idx, + const torch::PackedTensorAccessor32 idx, torch::PackedTensorAccessor32 w) { @@ -1135,8 +1142,8 @@ class SparseBlock { auto jj_cpu = jj.to(torch::kCPU).to(torch::kInt64); auto As_acc = As_cpu.accessor(); - auto ii_acc = ii_cpu.accessor(); - auto jj_acc = jj_cpu.accessor(); + auto ii_acc = ii_cpu.accessor(); + auto jj_acc = jj_cpu.accessor(); std::vector tripletList; for (int n=0; n(); - auto ii_acc = ii_cpu.accessor(); + auto ii_acc = ii_cpu.accessor(); for (int n=0; n(); - const long* jj_data = jj_cpu.data_ptr(); - const long* kk_data = kk_cpu.data_ptr(); + const LongType* ii_data = ii_cpu.data_ptr(); + const LongType* jj_data = jj_cpu.data_ptr(); + const LongType* kk_data = kk_cpu.data_ptr(); - std::vector> graph(P); - std::vector> index(P); + std::vector> graph(P); + std::vector> index(P); for (int n=0; n ii_list, jj_list, idx, jdx; + std::vector ii_list, jj_list, idx, jdx; for (int i=0; i>>( E.packed_accessor32(), Q.packed_accessor32(), - ix_cuda.packed_accessor32(), + ix_cuda.packed_accessor32(), S.packed_accessor32()); Ev6x1_kernel<<>>( E.packed_accessor32(), Q.packed_accessor32(), w.packed_accessor32(), - jx_cuda.packed_accessor32(), + jx_cuda.packed_accessor32(), v.packed_accessor32()); // schur block @@ -1362,8 +1369,8 @@ std::vector ba_cuda( poses.packed_accessor32(), disps.packed_accessor32(), intrinsics.packed_accessor32(), - ii.packed_accessor32(), - jj.packed_accessor32(), + ii.packed_accessor32(), + jj.packed_accessor32(), Hs.packed_accessor32(), vs.packed_accessor32(), Eii.packed_accessor32(), @@ -1411,7 +1418,7 @@ std::vector ba_cuda( EvT6x1_kernel<<>>( E.packed_accessor32(), dx.packed_accessor32(), - ix.packed_accessor32(), + ix.packed_accessor32(), dw.packed_accessor32()); dz = Q * (w - accum_cuda(dw, ii_exp, kx)); @@ -1425,7 +1432,7 @@ std::vector ba_cuda( disp_retr_kernel<<>>( disps.packed_accessor32(), dz.packed_accessor32(), - kx.packed_accessor32()); + kx.packed_accessor32()); } } @@ -1452,8 +1459,8 @@ torch::Tensor frame_distance_cuda( poses.packed_accessor32(), disps.packed_accessor32(), intrinsics.packed_accessor32(), - ii.packed_accessor32(), - jj.packed_accessor32(), + ii.packed_accessor32(), + jj.packed_accessor32(), dist.packed_accessor32(), beta); return dist; @@ -1479,8 +1486,8 @@ std::vector projmap_cuda( poses.packed_accessor32(), disps.packed_accessor32(), intrinsics.packed_accessor32(), - ii.packed_accessor32(), - jj.packed_accessor32(), + ii.packed_accessor32(), + jj.packed_accessor32(), coords.packed_accessor32(), valid.packed_accessor32()); @@ -1507,7 +1514,7 @@ torch::Tensor depth_filter_cuda( poses.packed_accessor32(), disps.packed_accessor32(), intrinsics.packed_accessor32(), - ix.packed_accessor32(), + ix.packed_accessor32(), thresh.packed_accessor32(), counter.packed_accessor32()); diff --git a/train.py b/train.py index 77a14d21..ab83a939 100644 --- a/train.py +++ b/train.py @@ -10,7 +10,7 @@ from torch.utils.data import DataLoader from data_readers.factory import dataset_factory -from lietorch import SO3, SE3, Sim3 +from thirdparty.lietorch.lietorch import SO3, SE3, Sim3 from geom import losses from geom.losses import geodesic_loss, residual_loss, flow_loss from geom.graph_utils import build_frame_graph