Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Evaluation scripts #16

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,4 @@ tests/resources/results
tests/resources/*.png
tests/resources/*.pkl
blearn/output
*.out
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ Contains the metric network based on [Depth Anything V2](https://github.com/Dept
All important code diffs compared to this upstream codebase are outlined in general.

### Pretrained networks
We provide the networks obtained from training and used for evaluation in the paper [here](tbd).
We provide the networks obtained from training and used for evaluation in the paper [here](https://drive.google.com/file/d/1jJN_75OLDWyFMOjH_NrAl0Set08Gpzh0/view?usp=drive_link).

## Training datasets
**blearn** is a tool that allows you to generate a synthetic image and depth training dataset using Blender.
Together with a mesh and texture obtained from photogrammetry, realistic synthetic datasets can be generated.
The script is executed with Blender's built-in Python interpreter, which has the advantage that the Blender Python API is correctly loaded already.

### Download existing datasets
Existing datasets can be downloaded from [here](tdb). The download contains the datasets, as well as the blender projects used to obtain the datasets.
Existing datasets can be downloaded from [here](https://drive.google.com/file/d/1jJN_75OLDWyFMOjH_NrAl0Set08Gpzh0/view?usp=drive_link). The download contains the datasets, as well as the blender projects used to obtain the datasets.

### Generating training datasets
In order to (re-)generate the training datasets, the following steps are needed:
Expand All @@ -41,4 +41,4 @@ In order to (re-)generate the training datasets, the following steps are needed:
4. The dataset rendering can then be started using: `blender -b <path to blender project file> --python blearn.py`. Ensure that you adjust the config file accordingly in `blearn.py`.

## Validation datasets
The method for obtaining the validation datasets is described in the paper. The datasets are made available [here](tbd).
The method for obtaining the validation datasets is described in the paper. The datasets are made available [here](https://drive.google.com/file/d/1jJN_75OLDWyFMOjH_NrAl0Set08Gpzh0/view?usp=drive_link).
53 changes: 33 additions & 20 deletions radarmeetsvision/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
logger = logging.getLogger(__name__)

class Interface:
def __init__(self):
def __init__(self, force_gpu=False):
self.batch_size = None
self.criterion = None
self.depth_min_max = None
self.depth_prior_dir = None
self.device = get_device()
self.device = get_device(force_gpu=force_gpu)
self.encoder = None
self.lr = None
self.max_depth = None
Expand All @@ -32,6 +32,7 @@ def __init__(self):
self.optimizer = None
self.output_channels = None
self.previous_best = self.reset_previous_best()
self.relative_depth = None
self.results = None
self.results_path = None
self.use_depth_prior = None
Expand Down Expand Up @@ -78,6 +79,9 @@ def set_results_path(self, results_path):
else:
logger.error(f'{self.results_path} does not exist')

def set_relative_depth(self, relative_depth):
self.relative_depth = bool(relative_depth)

def get_results(self):
return self.results, self.results_per_sample

Expand All @@ -88,8 +92,14 @@ def load_model(self, pretrained_from=None):
logger.info(f'Using encoder: {self.encoder}')
logger.info(f'Using max depth: {self.max_depth}')
logger.info(f'Using output channels: {self.output_channels}')
logger.info(f'Using relative depth: {self.relative_depth}')

# The network should predict from 0 to 1
max_depth = self.max_depth
if self.relative_depth:
max_depth = 1.0

self.model = get_model(pretrained_from, self.use_depth_prior, self.encoder, self.max_depth, output_channels=self.output_channels)
self.model = get_model(pretrained_from, self.use_depth_prior, self.encoder, max_depth, output_channels=self.output_channels)
self.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
self.model.to(self.device)
else:
Expand Down Expand Up @@ -150,18 +160,17 @@ def get_single_dataset(self, dataset_dir, min_index=0, max_index=-1):
loader = DataLoader(dataset, batch_size=self.batch_size, pin_memory=True, drop_last=True)
return dataset, loader

def update_best_result(self, results, nsamples):
if nsamples:
logger.info('==========================================================================================')
logger.info('{:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}'.format(*tuple(results.keys())))
logger.info('{:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}'.format(*tuple([(v / nsamples).item() for v in results.values()])))
logger.info('==========================================================================================')

for k in results.keys():
if k in ['d1', 'd2', 'd3']:
self.previous_best[k] = max(self.previous_best[k], (results[k] / nsamples).item())
else:
self.previous_best[k] = min(self.previous_best[k], (results[k] / nsamples).item())
def update_best_result(self, results):
logger.info('==========================================================================================')
logger.info('{:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}'.format(*tuple(results.keys())))
logger.info('{:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}'.format(*tuple([v for v in results.values()])))
logger.info('==========================================================================================')

for k in results.keys():
if k in ['d1', 'd2', 'd3']:
self.previous_best[k] = max(self.previous_best[k], results[k])
else:
self.previous_best[k] = min(self.previous_best[k], results[k])


def prepare_sample(self, sample, random_flip=False):
Expand All @@ -183,7 +192,6 @@ def prepare_sample(self, sample, random_flip=False):
else:
depth_target, mask = None, None


return image, depth_prior, depth_target, mask


Expand All @@ -198,7 +206,7 @@ def train_epoch(self, epoch, train_loader):
image, _, depth_target, mask = self.prepare_sample(sample, random_flip=True)

prediction = self.model(image)
depth_prediction = get_depth_from_prediction(prediction, image)
depth_prediction = get_depth_from_prediction(prediction, image, relative_depth=self.relative_depth)
if depth_prediction is not None and mask.sum() > 0:
loss = self.criterion(depth_prediction, depth_target, mask)
if loss is not None:
Expand Down Expand Up @@ -228,7 +236,7 @@ def validate_epoch(self, epoch, val_loader, iteration_callback=None):
with torch.no_grad():
prediction = self.model(image)
prediction = interpolate_shape(prediction, depth_target)
depth_prediction = get_depth_from_prediction(prediction, image)
depth_prediction = get_depth_from_prediction(prediction, image, relative_depth=self.relative_depth)

# TODO: Expand on this interface
if iteration_callback is not None:
Expand All @@ -246,8 +254,13 @@ def validate_epoch(self, epoch, val_loader, iteration_callback=None):
abs_rel = (self.results["abs_rel"]/nsamples).item()
logger.info(f'Iter: {i}/{len(val_loader)}, Absrel: {abs_rel:.3f}')

self.update_best_result(self.results, nsamples)
self.save_checkpoint(epoch)
# Save the results
if nsamples:
for k in self.results.keys():
self.results[k] = (self.results[k]/nsamples).item()

self.update_best_result(self.results)
self.save_checkpoint(epoch)


def save_checkpoint(self, epoch):
Expand Down
30 changes: 20 additions & 10 deletions radarmeetsvision/metric_depth_network/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,27 @@

logger = logging.getLogger(__name__)

def get_depth_from_prediction(prediction, input):
def get_depth_from_prediction(prediction, input, relative_depth=False):
depth = prediction[:, 0, :, :]
if prediction.shape[1] > 1 and input.shape[1] > 3:
prior = input[:, 3, :, :]
prior_mask = prior > 0
if prior_mask.sum():
prior_mean = prior[prior_mask].mean()
confidence = prediction[:, 1, :, :]
depth = depth * confidence + prior_mean * (1.0 - confidence)
else:
depth = None

if input.shape[1] > 3:
prior = interpolate_shape(input[:, 3, :, :], depth, mode='nearest').squeeze(0)

# If using a weight output channel + depth priors
if prediction.shape[1] > 1:
prior_mask = prior > 0
if prior_mask.sum():
prior_mean = prior[prior_mask].mean()
confidence = prediction[:, 1, :, :]
depth = depth * confidence + prior_mean * (1.0 - confidence)
else:
depth = None

# If predicting relative depth
elif relative_depth:
mask_valid_depth = (prior > 0.0) & (depth > 0.0)
prior_mean = (prior[mask_valid_depth] / depth[mask_valid_depth]).mean()
depth *= prior_mean

return depth

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,12 @@ def get_depth(self, index):
depth = None
depth_path = self.depth_dir / self.depth_template.format(index)
depth_normalized_path = self.depth_dir / self.depth_normalized_template.format(index)

if depth_path.is_file():
depth = np.load(depth_path)

elif depth_normalized_path.is_file():
# TODO: This does not work probably
if self.depth_range is not None and self.depth_min is not None:
depth_normalized = np.load(depth_normalized_path)
depth_valid_mask = (depth_normalized > 0.0) & (depth_normalized <= 1.0)
Expand Down
8 changes: 5 additions & 3 deletions radarmeetsvision/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
import torch
from datetime import datetime

def get_device(min_memory_gb=8):
def get_device(force_gpu=False, min_memory_gb=8):
device_str = 'cpu'
if torch.cuda.is_available():
device = torch.cuda.get_device_properties(0)
total_memory_gb = device.total_memory / (1024 ** 3)
if total_memory_gb > min_memory_gb:
if total_memory_gb > min_memory_gb or force_gpu:
device_str = 'cuda'
return device_str

Expand Down Expand Up @@ -51,11 +51,13 @@ def setup_global_logger(output_dir=None):
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)
file_formatter = logging.Formatter(
'%%Y-%%m-%%d %%H:%%M:%%S - %(name)s - %(levelname)s - %(message)s'
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)


def load_config(config_path):
with open(config_path, 'r') as file:
config = json.load(file)
Expand Down
36 changes: 18 additions & 18 deletions scripts/evaluation/config.json
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
{
"scenarios": {
"Industrial Hall": "maschinenhalle0",
"Agricultural Field": "outdoor0",
"Agricultural Field": "outdoor3",
"Rhône Glacier": "rhone_flight"
},
"index": {
"maschinenhalle0": [0, -1],
"outdoor0": [0, -1],
"outdoor3": [0, -1],
"rhone_flight": [1205, 1505]
},
"networks": {
"Metric Depth \\cite{depthanythingv2}-S": "rgb_s_bs8_e9.pth",
"Metric Depth \\cite{depthanythingv2}-B": "rgb_b_bs4_e8.pth",
"Scaled Relative Depth \\cite{depthanythingv2}-S": "relrgb_s_bs8_e9.pth",
"Scaled Relative Depth \\cite{depthanythingv2}-B": "relrgb_b_bs4_e9.pth",
"Ours-S": "radar_s_bs8_e19.pth",
"Ours-B": "radar_b_bs4_e21.pth"
"Metric Depth \\cite{depthanythingv2}-S": "rgb-s-e16.pth",
"Metric Depth \\cite{depthanythingv2}-B": "rgb-b-e8.pth",
"Scaled Relative Depth \\cite{depthanythingv2}-S": "relative-s-e6.pth",
"Scaled Relative Depth \\cite{depthanythingv2}-B": "relative-b-e0.pth",
"Ours-S": "radar-s-e12.pth",
"Ours-B": "radar-b-e24.pth"
},
"Metric Depth \\cite{depthanythingv2}-S": {
"use_depth_prior": false,
Expand All @@ -24,7 +24,7 @@
"depth_min": 0.19983673095703125,
"depth_max": 120.49285888671875,
"encoder": "vits",
"marker": "X",
"marker": "o",
"plot_prediction": 0
},
"Metric Depth \\cite{depthanythingv2}-B": {
Expand All @@ -34,25 +34,25 @@
"depth_min": 0.19983673095703125,
"depth_max": 120.49285888671875,
"encoder": "vitb",
"marker": "X",
"marker": "o",
"plot_prediction": 0
},
"Scaled Relative Depth \\cite{depthanythingv2}-S": {
"use_depth_prior": false,
"use_depth_prior": true,
"output_channels": 1,
"relative_depth": 1,
"depth_min": 0.0,
"depth_max": 1.0,
"depth_min": 0.19983673095703125,
"depth_max": 120.49285888671875,
"encoder": "vits",
"marker": "D",
"plot_prediction": 0
},
"Scaled Relative Depth \\cite{depthanythingv2}-B": {
"use_depth_prior": false,
"use_depth_prior": true,
"output_channels": 1,
"relative_depth": 1,
"depth_min": 0.0,
"depth_max": 1.0,
"depth_min": 0.19983673095703125,
"depth_max": 120.49285888671875,
"encoder": "vitb",
"marker": "D",
"plot_prediction": 0
Expand All @@ -64,7 +64,7 @@
"depth_min": 0.19983673095703125,
"depth_max": 120.49285888671875,
"encoder": "vits",
"marker": "o",
"marker": "X",
"plot_prediction": 0
},
"Ours-B": {
Expand All @@ -74,7 +74,7 @@
"depth_min": 0.19983673095703125,
"depth_max": 120.49285888671875,
"encoder": "vitb",
"marker": "o",
"marker": "X",
"plot_prediction": 1
},
"height": 480,
Expand Down
20 changes: 20 additions & 0 deletions scripts/evaluation/config_best_metric_b.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"scenarios": {
"Industrial Hall": "maschinenhalle0",
"Agricultural Field": "outdoor3",
"Rhône Glacier": "rhone_flight"
},
"use_depth_prior": false,
"output_channels": 1,
"relative_depth": 0,
"depth_min": 0.19983673095703125,
"depth_max": 120.49285888671875,
"height": 480,
"width": 640,
"encoder": "vitb",
"index": {
"maschinenhalle0": [0, -1],
"outdoor3": [0, -1],
"rhone_flight": [1205, 1505]
}
}
20 changes: 20 additions & 0 deletions scripts/evaluation/config_best_metric_s.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"scenarios": {
"Industrial Hall": "maschinenhalle0",
"Agricultural Field": "outdoor3",
"Rhône Glacier": "rhone_flight"
},
"use_depth_prior": false,
"output_channels": 1,
"relative_depth": 0,
"depth_min": 0.19983673095703125,
"depth_max": 120.49285888671875,
"height": 480,
"width": 640,
"encoder": "vits",
"index": {
"maschinenhalle0": [0, -1],
"outdoor3": [0, -1],
"rhone_flight": [1205, 1505]
}
}
20 changes: 20 additions & 0 deletions scripts/evaluation/config_best_radar_b.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"scenarios": {
"Industrial Hall": "maschinenhalle0",
"Agricultural Field": "outdoor3",
"Rhône Glacier": "rhone_flight"
},
"use_depth_prior": true,
"output_channels": 2,
"relative_depth": 0,
"depth_min": 0.19983673095703125,
"depth_max": 120.49285888671875,
"height": 480,
"width": 640,
"encoder": "vitb",
"index": {
"maschinenhalle0": [0, -1],
"outdoor3": [0, -1],
"rhone_flight": [1205, 1505]
}
}
Loading