Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion droid_slam/data_readers/rgbd_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os.path as osp

import torch
from lietorch import SE3
from thirdparty.lietorch.lietorch import SE3

import geom.projective_ops as pops
from scipy.spatial.transform import Rotation
Expand Down
2 changes: 1 addition & 1 deletion droid_slam/data_readers/tartan.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import os
import os.path as osp

from lietorch import SE3
from thirdparty.lietorch.lietorch import SE3
from .base import RGBDDataset
from .stream import RGBDStream

Expand Down
4 changes: 2 additions & 2 deletions droid_slam/depth_video.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
import torch
import lietorch
from thirdparty.lietorch.lietorch import SE3
import droid_backends

from torch.multiprocessing import Process, Queue, Lock, Value
Expand Down Expand Up @@ -139,7 +139,7 @@ def normalize(self):
def reproject(self, ii, jj):
""" project points from ii -> jj """
ii, jj = DepthVideo.format_indicies(ii, jj)
Gs = lietorch.SE3(self.poses[None])
Gs = SE3(self.poses[None])

coords, valid_mask = \
pops.projective_transform(Gs, self.disps[None], self.intrinsics[None], ii, jj)
Expand Down
10 changes: 5 additions & 5 deletions droid_slam/droid_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import lietorch
import numpy as np

from lietorch import SE3
from thirdparty.lietorch.lietorch import SE3
from factor_graph import FactorGraph


Expand All @@ -20,7 +20,7 @@ def __init__(self, net, video, args):
self.backend_thresh = args.backend_thresh
self.backend_radius = args.backend_radius
self.backend_nms = args.backend_nms

@torch.no_grad()
def __call__(self, steps=12):
""" main update """
Expand All @@ -31,9 +31,9 @@ def __call__(self, steps=12):

graph = FactorGraph(self.video, self.update_op, corr_impl="alt", max_factors=16*t, upsample=self.upsample)

graph.add_proximity_factors(rad=self.backend_radius,
nms=self.backend_nms,
thresh=self.backend_thresh,
graph.add_proximity_factors(rad=self.backend_radius,
nms=self.backend_nms,
thresh=self.backend_thresh,
beta=self.beta)

graph.update_lowmem(steps=steps)
Expand Down
2 changes: 1 addition & 1 deletion droid_slam/droid_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import lietorch
import numpy as np

from lietorch import SE3
from thirdparty.lietorch.lietorch import SE3
from factor_graph import FactorGraph


Expand Down
18 changes: 17 additions & 1 deletion droid_slam/droid_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from modules.gru import ConvGRU
from modules.clipping import GradientClip

from lietorch import SE3
from geom.ba import BA

import geom.projective_ops as pops
Expand Down Expand Up @@ -77,6 +76,23 @@ def forward(self, net, ii):

class UpdateModule(nn.Module):
def __init__(self):
"""
Initialize the UpdateModule class.
This method sets up the neural network layers and components used in the UpdateModule.
It includes the following components:
- corr_encoder: A sequential model consisting of two convolutional layers with ReLU activations.
It processes the correlation planes.
- flow_encoder: A sequential model consisting of two convolutional layers with ReLU activations.
It processes the optical flow.
- weight: A sequential model consisting of two convolutional layers with ReLU activations,
a GradientClip layer, and a Sigmoid activation. It computes the weights.
- delta: A sequential model consisting of two convolutional layers with ReLU activations
and a GradientClip layer. It computes the delta values.
- gru: A ConvGRU layer that processes the concatenated features from corr_encoder, flow_encoder, and other inputs.
- agg: A GraphAgg layer that performs graph aggregation.
The method also calculates the number of correlation planes (cor_planes) based on the input dimensions.
"""

super(UpdateModule, self).__init__()
cor_planes = 4 * (2*3 + 1)**2

Expand Down
4 changes: 2 additions & 2 deletions droid_slam/factor_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

import matplotlib.pyplot as plt
from lietorch import SE3
from thirdparty.lietorch.lietorch import SE3
from modules.corr import CorrBlock, AltCorrBlock
import geom.projective_ops as pops

Expand Down Expand Up @@ -119,7 +119,7 @@ def add_factors(self, ii, jj, remove=False):
inp = self.video.inps[ii].to(self.device).unsqueeze(0)
self.inp = inp if self.inp is None else torch.cat([self.inp, inp], 1)

with torch.cuda.amp.autocast(enabled=False):
with torch.amp.autocast('cuda', enabled=False):
target, _ = self.video.reproject(ii, jj)
weight = torch.zeros_like(target)

Expand Down
1 change: 0 additions & 1 deletion droid_slam/geom/chol.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import torch
import torch.nn.functional as F
import geom.projective_ops as pops

class CholeskySolver(torch.autograd.Function):
@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion droid_slam/geom/losses.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from collections import OrderedDict
import numpy as np
import torch
from lietorch import SO3, SE3, Sim3
from thirdparty.lietorch.lietorch import SO3, SE3, Sim3
from .graph_utils import graph_to_edge_list
from .projective_ops import projective_transform

Expand Down
15 changes: 13 additions & 2 deletions droid_slam/geom/projective_ops.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
import torch
import torch.nn.functional as F

from lietorch import SE3, Sim3
from thirdparty.lietorch.lietorch import SE3, Sim3

MIN_DEPTH = 0.2

def extract_intrinsics(intrinsics):
return intrinsics[...,None,None,:].unbind(dim=-1)

def coords_grid(ht, wd, **kwargs):
"""
Generates a grid of coordinates.
Args:
ht (int): Height of the grid.
wd (int): Width of the grid.
**kwargs: Additional arguments to be passed to the `to` method of the tensors.
Returns:
torch.Tensor: A tensor of shape (ht, wd, 2) containing the coordinates grid.
The first channel contains the x-coordinates and the second channel contains the y-coordinates.
"""

y, x = torch.meshgrid(
torch.arange(ht).to(**kwargs).float(),
torch.arange(wd).to(**kwargs).float())
Expand Down Expand Up @@ -100,7 +111,7 @@ def projective_transform(poses, depths, intrinsics, ii, jj, jacobian=False, retu
X0, Jz = iproj(depths[:,ii], intrinsics[:,ii], jacobian=jacobian)

# transform
Gij = poses[:,jj] * poses[:,ii].inv()
Gij = poses[:,jj] * poses[:,ii].inv() # TODO: se rompe aqui!!!

Gij.data[:,ii==jj] = torch.as_tensor([-0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], device="cuda")
X1, Ja = actp(Gij, X0, jacobian=jacobian)
Expand Down
13 changes: 13 additions & 0 deletions droid_slam/modules/corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,19 @@ def backward(ctx, grad_output):

class CorrBlock:
def __init__(self, fmap1, fmap2, num_levels=4, radius=3):
"""
Initializes the CorrBlock class.
Args:
fmap1 (torch.Tensor): The first feature map tensor.
fmap2 (torch.Tensor): The second feature map tensor.
num_levels (int, optional): The number of levels in the correlation pyramid. Default is 4.
radius (int, optional): The radius for correlation. Default is 3.
Attributes:
num_levels (int): The number of levels in the correlation pyramid.
radius (int): The radius for correlation.
corr_pyramid (list): A list to store the correlation pyramid.
"""

self.num_levels = num_levels
self.radius = radius
self.corr_pyramid = []
Expand Down
17 changes: 12 additions & 5 deletions droid_slam/modules/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,16 @@ def _make_layer(self, dim, stride=1):
return nn.Sequential(*layers)

def forward(self, x):
b, n, c1, h1, w1 = x.shape
x = x.view(b*n, c1, h1, w1)

batch_size, num_views, \
input_channels, input_height, input_width = x.shape
# Flatten batch and views together
x = x.view(
batch_size*num_views,
input_channels,
input_height,
input_width
)
# Apply convolutions
x = self.conv1(x)
x = self.norm1(x)
x = self.relu1(x)
Expand All @@ -194,5 +201,5 @@ def forward(self, x):

x = self.conv2(x)

_, c2, h2, w2 = x.shape
return x.view(b, n, c2, h2, w2)
batchxviews_size, output_channels, output_height, output_width = x.shape
return x.view(batch_size, num_views, output_channels, output_height, output_width)
60 changes: 35 additions & 25 deletions droid_slam/motion_filter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import cv2
import torch
import lietorch

from thirdparty.lietorch.lietorch import SE3
from collections import OrderedDict
from droid_net import DroidNet

Expand All @@ -15,7 +14,7 @@ class MotionFilter:
def __init__(self, net, video, thresh=2.5, device="cuda:0"):

# split net modules
self.cnet = net.cnet
self.context_encoder = net.cnet
self.fnet = net.fnet
self.update = net.update

Expand All @@ -28,55 +27,66 @@ def __init__(self, net, video, thresh=2.5, device="cuda:0"):
# mean, std for image normalization
self.MEAN = torch.as_tensor([0.485, 0.456, 0.406], device=self.device)[:, None, None]
self.STDV = torch.as_tensor([0.229, 0.224, 0.225], device=self.device)[:, None, None]
@torch.cuda.amp.autocast(enabled=True)

@torch.autocast("cuda", enabled=True)
def __context_encoder(self, image):
""" context features """
net, inp = self.cnet(image).split([128,128], dim=2)
return net.tanh().squeeze(0), inp.relu().squeeze(0)
feature_map = self.context_encoder(image)
context_features, input_features = feature_map.split([128, 128], dim=2)
return context_features.tanh().squeeze(0), input_features.relu().squeeze(0)

@torch.cuda.amp.autocast(enabled=True)
@torch.autocast("cuda", enabled=True)
def __feature_encoder(self, image):
""" features for correlation volume """
return self.fnet(image).squeeze(0)

@torch.cuda.amp.autocast(enabled=True)
@torch.autocast("cuda", enabled=True)
@torch.no_grad()
def track(self, tstamp, image, depth=None, intrinsics=None):
""" main update operation - run on every frame in video """

Id = lietorch.SE3.Identity(1,).data.squeeze()
ht = image.shape[-2] // 8
wd = image.shape[-1] // 8
IdentityTransformation = SE3.Identity(1,).data.squeeze()
height_div_8 = image.shape[-2] // 8
width_div_8 = image.shape[-1] // 8

# normalize images
inputs = image[None, :, [2,1,0]].to(self.device) / 255.0
inputs = inputs.sub_(self.MEAN).div_(self.STDV)
normalized_image_input = image[None, :, [2,1,0]].to(self.device) / 255.0
normalized_image_input = normalized_image_input.sub_(self.MEAN).div_(self.STDV)

# extract features
gmap = self.__feature_encoder(inputs)
image_feature_representation = self.__feature_encoder(normalized_image_input)

### always add first frame to the depth video ###
if self.video.counter.value == 0:
net, inp = self.__context_encoder(inputs[:,[0]])
self.net, self.inp, self.fmap = net, inp, gmap
self.video.append(tstamp, image[0], Id, 1.0, depth, intrinsics / 8.0, gmap, net[0,0], inp[0,0])
context_features, context_input_features = self.__context_encoder(normalized_image_input[:,[0]])
self.context_features, self.context_input_features, self.image_feature_map = context_features, context_input_features, image_feature_representation
self.video.append(
tstamp,
image[0],
IdentityTransformation,
1.0, # Initial pose = Identity
depth,
intrinsics / 8.0,
image_feature_representation,
context_features[0,0],
context_input_features[0,0]
)

### only add new frame if there is enough motion ###
else:
else:
# index correlation volume
coords0 = pops.coords_grid(ht, wd, device=self.device)[None,None]
corr = CorrBlock(self.fmap[None,[0]], gmap[None,[0]])(coords0)
grid_coordinates = pops.coords_grid(height_div_8, width_div_8, device=self.device)[None,None]
correlation_features = CorrBlock(self.image_feature_map[None,[0]], image_feature_representation[None,[0]])(grid_coordinates)

# approximate flow magnitude using 1 update iteration
_, delta, weight = self.update(self.net[None], self.inp[None], corr)
_, delta, weight = self.update(self.context_features[None], self.context_input_features[None], correlation_features)

# check motion magnitue / add new frame to video
if delta.norm(dim=-1).mean().item() > self.thresh:
self.count = 0
net, inp = self.__context_encoder(inputs[:,[0]])
self.net, self.inp, self.fmap = net, inp, gmap
self.video.append(tstamp, image[0], None, None, depth, intrinsics / 8.0, gmap, net[0], inp[0])
context_features, context_input_features = self.__context_encoder(normalized_image_input[:,[0]])
self.context_features, self.context_input_features, self.image_feature_map = context_features, context_input_features, image_feature_representation
self.video.append(tstamp, image[0], None, None, depth, intrinsics / 8.0, image_feature_representation, context_features[0], context_input_features[0])

else:
self.count += 1
Expand Down
5 changes: 3 additions & 2 deletions droid_slam/trajectory_filler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import torch
import lietorch

from lietorch import SE3
from thirdparty.lietorch.lietorch import SE3
from thirdparty.lietorch.lietorch.groups import cat
from collections import OrderedDict
from factor_graph import FactorGraph
from droid_net import DroidNet
Expand Down Expand Up @@ -100,5 +101,5 @@ def __call__(self, image_stream):
pose_list += self.__fill(tstamps, images, intrinsics)

# stitch pose segments together
return lietorch.cat(pose_list, 0)
return cat(pose_list, 0)

2 changes: 1 addition & 1 deletion droid_slam/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import open3d as o3d

from lietorch import SE3
from thirdparty.lietorch.lietorch import SE3
import geom.projective_ops as pops

CAM_POINTS = np.array([
Expand Down
Loading