From db799419411d4fceb19ec896d845089725ced1c4 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:33:43 +0100 Subject: [PATCH 1/7] eval: initial script to find best configuration --- scripts/evaluation/config_best.json | 15 ++++ scripts/evaluation/find_best.py | 111 ++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 scripts/evaluation/config_best.json create mode 100644 scripts/evaluation/find_best.py diff --git a/scripts/evaluation/config_best.json b/scripts/evaluation/config_best.json new file mode 100644 index 0000000..71f1327 --- /dev/null +++ b/scripts/evaluation/config_best.json @@ -0,0 +1,15 @@ +{ + "scenarios": { + "Industrial Hall": "maschinenhalle0", + "Agricultural Field": "outdoor3", + "Rhône Glacier": "rhone_flight" + }, + "use_depth_prior": true, + "output_channels": 2, + "relative_depth": 0, + "depth_min": 0.19983673095703125, + "depth_max": 120.49285888671875, + "height": 480, + "width": 640, + "encoder": "vitb" +} diff --git a/scripts/evaluation/find_best.py b/scripts/evaluation/find_best.py new file mode 100644 index 0000000..94ceb10 --- /dev/null +++ b/scripts/evaluation/find_best.py @@ -0,0 +1,111 @@ +###################################################################### +# +# Copyright (c) 2024 ETHZ Autonomous Systems Lab. All rights reserved. +# +###################################################################### + +import argparse +import json +import logging +import re +import radarmeetsvision as rmv + +from pathlib import Path + +logger = logging.getLogger(__name__) + +# TODO: This is just copy pasta, not so nice +class Evaluation: + def __init__(self, config, scenario_key, network_key, args): + self.results_per_sample = {} + self.results_dict = {} + self.interface = rmv.Interface() + self.networks_dir = None + if args.network is not None: + self.networks_dir = Path(args.network) + + self.datasets_dir = args.dataset + self.results, self.results_per_sample = None, None + self.setup_interface(config, scenario_key, network_key) + self.run(network_key) + + def run(self, network_key): + self.interface.validate_epoch(0, self.loader) + self.results, self.results_per_sample = self.interface.get_results() + self.results['method'] = network_key + + def setup_interface(self, config, scenario_key, network_key): + self.interface.set_epochs(1) + self.interface.set_encoder(config['encoder']) + self.interface.set_depth_range((config['depth_min'], config['depth_max'])) + self.interface.set_output_channels(config['output_channels']) + self.interface.set_use_depth_prior(config['use_depth_prior']) + self.interface.load_model(pretrained_from=network_key) + self.interface.set_size(config['height'], config['width']) + self.interface.set_batch_size(1) + self.interface.set_criterion() + + dataset_list = [config['scenarios'][scenario_key]] + index_list = [config['index'][config["scenarios"][scenario_key]]] + self.loader, _ = self.interface.get_dataset_loader('val_all', self.datasets_dir, dataset_list, index_list) + + def get_results_per_sample(self): + return self.results_per_sample + + def get_results(self): + return self.results + +def load_config(config_path): + with open(config_path, 'r') as file: + config = json.load(file) + return config + + +def main(): + parser = argparse.ArgumentParser(description='Create results table for paper') + parser.add_argument('--config', type=str, default='scripts/evaluation/config_best.json', help='Path to the JSON config file.') + parser.add_argument('--dataset', type=str, required=True, help='Path to the dataset directory') + parser.add_argument('--output', type=str, required=True, help='Path to the output directory') + parser.add_argument('--network', type=str, help='Path to the network directory') + + args = parser.parse_args() + + rmv.setup_global_logger(args.output) + + if not Path(args.config).is_file(): + raise FileNotFoundError(f'The config file {args.config} does not exist.') + + if not Path(args.dataset).is_dir(): + raise FileNotFoundError(f'The dataset dir {args.dataset} does not exist.') + + config = load_config(args.config) + network_dir = Path(args.network) + pth_files = list(network_dir.rglob("*.pth")) + + best_abs_rel, best_abs_rel_file = 1000.0, None + file_mask = r"latest_([0-9]*).pth" + for pth_file in pth_files: + out = re.search(file_mask, str(pth_file)) + if out is not None: + + # NOTE: Consider only 25 epochs for all, no improvements observed after that + index = int(out.group(1)) + if index < 24: + abs_rel = 0.0 + for scenario_key in config['scenarios'].keys(): + logger.info(f'Evaluation: {scenario_key} {pth_file}') + eval_obj = Evaluation(config, scenario_key, pth_file, args) + results_dict = eval_obj.get_results_dict() + abs_rel += results_dict['abs_rel'] + + abs_rel /= len(config['scenarios'].keys()) + if abs_rel < best_abs_rel: + best_abs_rel = abs_rel + best_abs_rel_file = pth_file + logger.info("==========================================") + logger.info(f"{pth_file}: {abs_rel:.3f}") + logger.info("==========================================") + + +if __name__ == "__main__": + main() \ No newline at end of file From a8f0a3bda5a25dea89697cf862baa2be5a6c1287 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:01:06 +0100 Subject: [PATCH 2/7] eval: add configs --- scripts/evaluation/config_best_metric_b.json | 20 +++++++++++++++++++ scripts/evaluation/config_best_metric_s.json | 20 +++++++++++++++++++ ...fig_best.json => config_best_radar_b.json} | 7 ++++++- scripts/evaluation/config_best_radar_s.json | 20 +++++++++++++++++++ .../evaluation/config_best_relative_b.json | 20 +++++++++++++++++++ .../evaluation/config_best_relative_s.json | 20 +++++++++++++++++++ 6 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 scripts/evaluation/config_best_metric_b.json create mode 100644 scripts/evaluation/config_best_metric_s.json rename scripts/evaluation/{config_best.json => config_best_radar_b.json} (70%) create mode 100644 scripts/evaluation/config_best_radar_s.json create mode 100644 scripts/evaluation/config_best_relative_b.json create mode 100644 scripts/evaluation/config_best_relative_s.json diff --git a/scripts/evaluation/config_best_metric_b.json b/scripts/evaluation/config_best_metric_b.json new file mode 100644 index 0000000..9b7ece2 --- /dev/null +++ b/scripts/evaluation/config_best_metric_b.json @@ -0,0 +1,20 @@ +{ + "scenarios": { + "Industrial Hall": "maschinenhalle0", + "Agricultural Field": "outdoor3", + "Rhône Glacier": "rhone_flight" + }, + "use_depth_prior": false, + "output_channels": 1, + "relative_depth": 0, + "depth_min": 0.19983673095703125, + "depth_max": 120.49285888671875, + "height": 480, + "width": 640, + "encoder": "vitb", + "index": { + "maschinenhalle0": [0, -1], + "outdoor3": [0, -1], + "rhone_flight": [1205, 1505] + } +} diff --git a/scripts/evaluation/config_best_metric_s.json b/scripts/evaluation/config_best_metric_s.json new file mode 100644 index 0000000..6ffb453 --- /dev/null +++ b/scripts/evaluation/config_best_metric_s.json @@ -0,0 +1,20 @@ +{ + "scenarios": { + "Industrial Hall": "maschinenhalle0", + "Agricultural Field": "outdoor3", + "Rhône Glacier": "rhone_flight" + }, + "use_depth_prior": false, + "output_channels": 1, + "relative_depth": 0, + "depth_min": 0.19983673095703125, + "depth_max": 120.49285888671875, + "height": 480, + "width": 640, + "encoder": "vits", + "index": { + "maschinenhalle0": [0, -1], + "outdoor3": [0, -1], + "rhone_flight": [1205, 1505] + } +} diff --git a/scripts/evaluation/config_best.json b/scripts/evaluation/config_best_radar_b.json similarity index 70% rename from scripts/evaluation/config_best.json rename to scripts/evaluation/config_best_radar_b.json index 71f1327..38dab3b 100644 --- a/scripts/evaluation/config_best.json +++ b/scripts/evaluation/config_best_radar_b.json @@ -11,5 +11,10 @@ "depth_max": 120.49285888671875, "height": 480, "width": 640, - "encoder": "vitb" + "encoder": "vitb", + "index": { + "maschinenhalle0": [0, -1], + "outdoor3": [0, -1], + "rhone_flight": [1205, 1505] + } } diff --git a/scripts/evaluation/config_best_radar_s.json b/scripts/evaluation/config_best_radar_s.json new file mode 100644 index 0000000..c6a824e --- /dev/null +++ b/scripts/evaluation/config_best_radar_s.json @@ -0,0 +1,20 @@ +{ + "scenarios": { + "Industrial Hall": "maschinenhalle0", + "Agricultural Field": "outdoor3", + "Rhône Glacier": "rhone_flight" + }, + "use_depth_prior": true, + "output_channels": 2, + "relative_depth": 0, + "depth_min": 0.19983673095703125, + "depth_max": 120.49285888671875, + "height": 480, + "width": 640, + "encoder": "vits", + "index": { + "maschinenhalle0": [0, -1], + "outdoor3": [0, -1], + "rhone_flight": [1205, 1505] + } +} diff --git a/scripts/evaluation/config_best_relative_b.json b/scripts/evaluation/config_best_relative_b.json new file mode 100644 index 0000000..7300e8c --- /dev/null +++ b/scripts/evaluation/config_best_relative_b.json @@ -0,0 +1,20 @@ +{ + "scenarios": { + "Industrial Hall": "maschinenhalle0", + "Agricultural Field": "outdoor3", + "Rhône Glacier": "rhone_flight" + }, + "use_depth_prior": true, + "output_channels": 1, + "relative_depth": 0, + "depth_min": 0.0, + "depth_max": 1.0, + "height": 480, + "width": 640, + "encoder": "vitb", + "index": { + "maschinenhalle0": [0, -1], + "outdoor3": [0, -1], + "rhone_flight": [1205, 1505] + } +} diff --git a/scripts/evaluation/config_best_relative_s.json b/scripts/evaluation/config_best_relative_s.json new file mode 100644 index 0000000..8864600 --- /dev/null +++ b/scripts/evaluation/config_best_relative_s.json @@ -0,0 +1,20 @@ +{ + "scenarios": { + "Industrial Hall": "maschinenhalle0", + "Agricultural Field": "outdoor3", + "Rhône Glacier": "rhone_flight" + }, + "use_depth_prior": true, + "output_channels": 1, + "relative_depth": 0, + "depth_min": 0.0, + "depth_max": 1.0, + "height": 480, + "width": 640, + "encoder": "vits", + "index": { + "maschinenhalle0": [0, -1], + "outdoor3": [0, -1], + "rhone_flight": [1205, 1505] + } +} From 1413ead5f7cd3eefaaf535cba53ddd04a709d571 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Fri, 20 Dec 2024 11:09:14 +0100 Subject: [PATCH 3/7] eval: various fixes --- .gitignore | 1 + radarmeetsvision/interface.py | 32 ++++++++++++++++++-------------- scripts/evaluation/find_best.py | 2 +- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 6c61047..039d94b 100644 --- a/.gitignore +++ b/.gitignore @@ -165,3 +165,4 @@ tests/resources/results tests/resources/*.png tests/resources/*.pkl blearn/output +*.out diff --git a/radarmeetsvision/interface.py b/radarmeetsvision/interface.py index 8cfc757..ba741c2 100644 --- a/radarmeetsvision/interface.py +++ b/radarmeetsvision/interface.py @@ -150,18 +150,17 @@ def get_single_dataset(self, dataset_dir, min_index=0, max_index=-1): loader = DataLoader(dataset, batch_size=self.batch_size, pin_memory=True, drop_last=True) return dataset, loader - def update_best_result(self, results, nsamples): - if nsamples: - logger.info('==========================================================================================') - logger.info('{:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}'.format(*tuple(results.keys()))) - logger.info('{:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}'.format(*tuple([(v / nsamples).item() for v in results.values()]))) - logger.info('==========================================================================================') - - for k in results.keys(): - if k in ['d1', 'd2', 'd3']: - self.previous_best[k] = max(self.previous_best[k], (results[k] / nsamples).item()) - else: - self.previous_best[k] = min(self.previous_best[k], (results[k] / nsamples).item()) + def update_best_result(self, results): + logger.info('==========================================================================================') + logger.info('{:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}'.format(*tuple(results.keys()))) + logger.info('{:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}'.format(*tuple([v for v in results.values()]))) + logger.info('==========================================================================================') + + for k in results.keys(): + if k in ['d1', 'd2', 'd3']: + self.previous_best[k] = max(self.previous_best[k], results[k]) + else: + self.previous_best[k] = min(self.previous_best[k], results[k]) def prepare_sample(self, sample, random_flip=False): @@ -246,8 +245,13 @@ def validate_epoch(self, epoch, val_loader, iteration_callback=None): abs_rel = (self.results["abs_rel"]/nsamples).item() logger.info(f'Iter: {i}/{len(val_loader)}, Absrel: {abs_rel:.3f}') - self.update_best_result(self.results, nsamples) - self.save_checkpoint(epoch) + # Save the results + if nsamples: + for k in self.results.keys(): + self.results[k] = (self.results[k]/nsamples).item() + + self.update_best_result(self.results) + self.save_checkpoint(epoch) def save_checkpoint(self, epoch): diff --git a/scripts/evaluation/find_best.py b/scripts/evaluation/find_best.py index 94ceb10..4b21451 100644 --- a/scripts/evaluation/find_best.py +++ b/scripts/evaluation/find_best.py @@ -95,7 +95,7 @@ def main(): for scenario_key in config['scenarios'].keys(): logger.info(f'Evaluation: {scenario_key} {pth_file}') eval_obj = Evaluation(config, scenario_key, pth_file, args) - results_dict = eval_obj.get_results_dict() + results_dict = eval_obj.get_results() abs_rel += results_dict['abs_rel'] abs_rel /= len(config['scenarios'].keys()) From e05a46c6b9af964cdb7869873318133d4be30eb4 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Fri, 20 Dec 2024 13:50:13 +0100 Subject: [PATCH 4/7] eval: Fix relative prediction --- radarmeetsvision/interface.py | 17 ++++++++--- .../metric_depth_network/common.py | 30 ++++++++++++------- .../dataset/blearndataset.py | 2 ++ radarmeetsvision/utils.py | 4 ++- .../evaluation/config_best_relative_b.json | 6 ++-- .../evaluation/config_best_relative_s.json | 6 ++-- scripts/evaluation/evaluate.py | 1 + scripts/evaluation/find_best.py | 4 ++- 8 files changed, 48 insertions(+), 22 deletions(-) diff --git a/radarmeetsvision/interface.py b/radarmeetsvision/interface.py index ba741c2..8398436 100644 --- a/radarmeetsvision/interface.py +++ b/radarmeetsvision/interface.py @@ -32,6 +32,7 @@ def __init__(self): self.optimizer = None self.output_channels = None self.previous_best = self.reset_previous_best() + self.relative_depth = None self.results = None self.results_path = None self.use_depth_prior = None @@ -78,6 +79,9 @@ def set_results_path(self, results_path): else: logger.error(f'{self.results_path} does not exist') + def set_relative_depth(self, relative_depth): + self.relative_depth = bool(relative_depth) + def get_results(self): return self.results, self.results_per_sample @@ -88,8 +92,14 @@ def load_model(self, pretrained_from=None): logger.info(f'Using encoder: {self.encoder}') logger.info(f'Using max depth: {self.max_depth}') logger.info(f'Using output channels: {self.output_channels}') + logger.info(f'Using relative depth: {self.relative_depth}') + + # The network should predict from 0 to 1 + max_depth = self.max_depth + if self.relative_depth: + max_depth = 1.0 - self.model = get_model(pretrained_from, self.use_depth_prior, self.encoder, self.max_depth, output_channels=self.output_channels) + self.model = get_model(pretrained_from, self.use_depth_prior, self.encoder, max_depth, output_channels=self.output_channels) self.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(self.model) self.model.to(self.device) else: @@ -182,7 +192,6 @@ def prepare_sample(self, sample, random_flip=False): else: depth_target, mask = None, None - return image, depth_prior, depth_target, mask @@ -197,7 +206,7 @@ def train_epoch(self, epoch, train_loader): image, _, depth_target, mask = self.prepare_sample(sample, random_flip=True) prediction = self.model(image) - depth_prediction = get_depth_from_prediction(prediction, image) + depth_prediction = get_depth_from_prediction(prediction, image, relative_depth=self.relative_depth) if depth_prediction is not None and mask.sum() > 0: loss = self.criterion(depth_prediction, depth_target, mask) if loss is not None: @@ -227,7 +236,7 @@ def validate_epoch(self, epoch, val_loader, iteration_callback=None): with torch.no_grad(): prediction = self.model(image) prediction = interpolate_shape(prediction, depth_target) - depth_prediction = get_depth_from_prediction(prediction, image) + depth_prediction = get_depth_from_prediction(prediction, image, relative_depth=self.relative_depth) # TODO: Expand on this interface if iteration_callback is not None: diff --git a/radarmeetsvision/metric_depth_network/common.py b/radarmeetsvision/metric_depth_network/common.py index b5f98a0..4071638 100644 --- a/radarmeetsvision/metric_depth_network/common.py +++ b/radarmeetsvision/metric_depth_network/common.py @@ -10,17 +10,27 @@ logger = logging.getLogger(__name__) -def get_depth_from_prediction(prediction, input): +def get_depth_from_prediction(prediction, input, relative_depth=False): depth = prediction[:, 0, :, :] - if prediction.shape[1] > 1 and input.shape[1] > 3: - prior = input[:, 3, :, :] - prior_mask = prior > 0 - if prior_mask.sum(): - prior_mean = prior[prior_mask].mean() - confidence = prediction[:, 1, :, :] - depth = depth * confidence + prior_mean * (1.0 - confidence) - else: - depth = None + + if input.shape[1] > 3: + prior = interpolate_shape(input[:, 3, :, :], depth, mode='nearest').squeeze(0) + + # If using a weight output channel + depth priors + if prediction.shape[1] > 1: + prior_mask = prior > 0 + if prior_mask.sum(): + prior_mean = prior[prior_mask].mean() + confidence = prediction[:, 1, :, :] + depth = depth * confidence + prior_mean * (1.0 - confidence) + else: + depth = None + + # If predicting relative depth + elif relative_depth: + mask_valid_depth = (prior > 0.0) & (depth > 0.0) + prior_mean = (prior[mask_valid_depth] / depth[mask_valid_depth]).mean() + depth *= prior_mean return depth diff --git a/radarmeetsvision/metric_depth_network/dataset/blearndataset.py b/radarmeetsvision/metric_depth_network/dataset/blearndataset.py index ffef778..db3e5ae 100644 --- a/radarmeetsvision/metric_depth_network/dataset/blearndataset.py +++ b/radarmeetsvision/metric_depth_network/dataset/blearndataset.py @@ -147,10 +147,12 @@ def get_depth(self, index): depth = None depth_path = self.depth_dir / self.depth_template.format(index) depth_normalized_path = self.depth_dir / self.depth_normalized_template.format(index) + if depth_path.is_file(): depth = np.load(depth_path) elif depth_normalized_path.is_file(): + # TODO: This does not work probably if self.depth_range is not None and self.depth_min is not None: depth_normalized = np.load(depth_normalized_path) depth_valid_mask = (depth_normalized > 0.0) & (depth_normalized <= 1.0) diff --git a/radarmeetsvision/utils.py b/radarmeetsvision/utils.py index fd1029f..570f7f7 100644 --- a/radarmeetsvision/utils.py +++ b/radarmeetsvision/utils.py @@ -51,11 +51,13 @@ def setup_global_logger(output_dir=None): file_handler = logging.FileHandler(log_file) file_handler.setLevel(logging.INFO) file_formatter = logging.Formatter( - '%%Y-%%m-%%d %%H:%%M:%%S - %(name)s - %(levelname)s - %(message)s' + '%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' ) file_handler.setFormatter(file_formatter) logger.addHandler(file_handler) + def load_config(config_path): with open(config_path, 'r') as file: config = json.load(file) diff --git a/scripts/evaluation/config_best_relative_b.json b/scripts/evaluation/config_best_relative_b.json index 7300e8c..4abd8bf 100644 --- a/scripts/evaluation/config_best_relative_b.json +++ b/scripts/evaluation/config_best_relative_b.json @@ -6,9 +6,9 @@ }, "use_depth_prior": true, "output_channels": 1, - "relative_depth": 0, - "depth_min": 0.0, - "depth_max": 1.0, + "relative_depth": 1, + "depth_min": 0.19983673095703125, + "depth_max": 120.49285888671875, "height": 480, "width": 640, "encoder": "vitb", diff --git a/scripts/evaluation/config_best_relative_s.json b/scripts/evaluation/config_best_relative_s.json index 8864600..f89cb15 100644 --- a/scripts/evaluation/config_best_relative_s.json +++ b/scripts/evaluation/config_best_relative_s.json @@ -6,9 +6,9 @@ }, "use_depth_prior": true, "output_channels": 1, - "relative_depth": 0, - "depth_min": 0.0, - "depth_max": 1.0, + "relative_depth": 1, + "depth_min": 0.19983673095703125, + "depth_max": 120.49285888671875, "height": 480, "width": 640, "encoder": "vits", diff --git a/scripts/evaluation/evaluate.py b/scripts/evaluation/evaluate.py index 0a08c57..83af348 100644 --- a/scripts/evaluation/evaluate.py +++ b/scripts/evaluation/evaluate.py @@ -45,6 +45,7 @@ def setup_interface(self, config, scenario_key, network_key): self.interface.set_depth_range((depth_min, depth_max)) self.interface.set_output_channels(network_config['output_channels']) self.interface.set_use_depth_prior(network_config['use_depth_prior']) + self.interface.set_relative_depth(network_config['relative_depth']) network_file = None if self.networks_dir is not None and config['networks'][network_key] is not None: diff --git a/scripts/evaluation/find_best.py b/scripts/evaluation/find_best.py index 4b21451..0fd1df6 100644 --- a/scripts/evaluation/find_best.py +++ b/scripts/evaluation/find_best.py @@ -40,6 +40,8 @@ def setup_interface(self, config, scenario_key, network_key): self.interface.set_depth_range((config['depth_min'], config['depth_max'])) self.interface.set_output_channels(config['output_channels']) self.interface.set_use_depth_prior(config['use_depth_prior']) + self.interface.set_relative_depth(config['relative_depth']) + self.interface.load_model(pretrained_from=network_key) self.interface.set_size(config['height'], config['width']) self.interface.set_batch_size(1) @@ -90,7 +92,7 @@ def main(): # NOTE: Consider only 25 epochs for all, no improvements observed after that index = int(out.group(1)) - if index < 24: + if index <= 24: abs_rel = 0.0 for scenario_key in config['scenarios'].keys(): logger.info(f'Evaluation: {scenario_key} {pth_file}') From 019928d4854c708de8f079e13ce28d7bdf945f55 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Mon, 23 Dec 2024 10:18:09 +0100 Subject: [PATCH 5/7] evaluation: Improve plotting --- scripts/evaluation/config.json | 36 ++++++++--------- scripts/evaluation/create_scatter_plot.py | 49 +++++++++++++---------- scripts/evaluation/run_best.bash | 22 ++++++++++ 3 files changed, 68 insertions(+), 39 deletions(-) create mode 100755 scripts/evaluation/run_best.bash diff --git a/scripts/evaluation/config.json b/scripts/evaluation/config.json index 8a989d6..28d066b 100644 --- a/scripts/evaluation/config.json +++ b/scripts/evaluation/config.json @@ -1,21 +1,21 @@ { "scenarios": { "Industrial Hall": "maschinenhalle0", - "Agricultural Field": "outdoor0", + "Agricultural Field": "outdoor3", "Rhône Glacier": "rhone_flight" }, "index": { "maschinenhalle0": [0, -1], - "outdoor0": [0, -1], + "outdoor3": [0, -1], "rhone_flight": [1205, 1505] }, "networks": { - "Metric Depth \\cite{depthanythingv2}-S": "rgb_s_bs8_e9.pth", - "Metric Depth \\cite{depthanythingv2}-B": "rgb_b_bs4_e8.pth", - "Scaled Relative Depth \\cite{depthanythingv2}-S": "relrgb_s_bs8_e9.pth", - "Scaled Relative Depth \\cite{depthanythingv2}-B": "relrgb_b_bs4_e9.pth", - "Ours-S": "radar_s_bs8_e19.pth", - "Ours-B": "radar_b_bs4_e21.pth" + "Metric Depth \\cite{depthanythingv2}-S": "rgb-s-e16.pth", + "Metric Depth \\cite{depthanythingv2}-B": "rgb-b-e8.pth", + "Scaled Relative Depth \\cite{depthanythingv2}-S": "relative-s-e6.pth", + "Scaled Relative Depth \\cite{depthanythingv2}-B": "relative-b-e0.pth", + "Ours-S": "radar-s-e12.pth", + "Ours-B": "radar-b-e24.pth" }, "Metric Depth \\cite{depthanythingv2}-S": { "use_depth_prior": false, @@ -24,7 +24,7 @@ "depth_min": 0.19983673095703125, "depth_max": 120.49285888671875, "encoder": "vits", - "marker": "X", + "marker": "o", "plot_prediction": 0 }, "Metric Depth \\cite{depthanythingv2}-B": { @@ -34,25 +34,25 @@ "depth_min": 0.19983673095703125, "depth_max": 120.49285888671875, "encoder": "vitb", - "marker": "X", + "marker": "o", "plot_prediction": 0 }, "Scaled Relative Depth \\cite{depthanythingv2}-S": { - "use_depth_prior": false, + "use_depth_prior": true, "output_channels": 1, "relative_depth": 1, - "depth_min": 0.0, - "depth_max": 1.0, + "depth_min": 0.19983673095703125, + "depth_max": 120.49285888671875, "encoder": "vits", "marker": "D", "plot_prediction": 0 }, "Scaled Relative Depth \\cite{depthanythingv2}-B": { - "use_depth_prior": false, + "use_depth_prior": true, "output_channels": 1, "relative_depth": 1, - "depth_min": 0.0, - "depth_max": 1.0, + "depth_min": 0.19983673095703125, + "depth_max": 120.49285888671875, "encoder": "vitb", "marker": "D", "plot_prediction": 0 @@ -64,7 +64,7 @@ "depth_min": 0.19983673095703125, "depth_max": 120.49285888671875, "encoder": "vits", - "marker": "o", + "marker": "X", "plot_prediction": 0 }, "Ours-B": { @@ -74,7 +74,7 @@ "depth_min": 0.19983673095703125, "depth_max": 120.49285888671875, "encoder": "vitb", - "marker": "o", + "marker": "X", "plot_prediction": 1 }, "height": 480, diff --git a/scripts/evaluation/create_scatter_plot.py b/scripts/evaluation/create_scatter_plot.py index c4319f2..0a9994f 100644 --- a/scripts/evaluation/create_scatter_plot.py +++ b/scripts/evaluation/create_scatter_plot.py @@ -25,7 +25,7 @@ mpl.rcParams['text.usetex'] = True mpl.rcParams['font.family'] = 'serif' -def create_scatter_plot(results_per_scenario_sample, config, output_dir, subsample=10): +def create_scatter_plot(results_per_scenario_sample, config, output_dir, subsample=1): scenario_color = { 'Industrial Hall': 'k', 'Agricultural Field': 'green', @@ -33,12 +33,19 @@ def create_scatter_plot(results_per_scenario_sample, config, output_dir, subsamp } label_dict = { "Metric Depth \\cite{depthanythingv2}-B": "Metric Depth-B", - "Ours-B": "Ours-B" + "Ours-B": "Ours-B", + "Scaled Relative Depth \\cite{depthanythingv2}-B": "Scaled Relative-B" } samples = [['00000_rgb.jpg', '00000_dp.jpg'], ['00050_rgb.jpg', '00050_dp.jpg'], ['00250_rgb.jpg', '00250_dp.jpg']] + colors = { + "Ours-B": "#0072B2", + "Metric Depth \\cite{depthanythingv2}-B": "#D55E00", + "Scaled Relative Depth \\cite{depthanythingv2}-B": "#009E73", + } + sample_dir = Path('scripts/evaluation/samples') img_out = None for i, sample in enumerate(samples): @@ -52,42 +59,42 @@ def create_scatter_plot(results_per_scenario_sample, config, output_dir, subsamp img_out = img else: img_out = np.concatenate((img_out, img), axis=0) - border=5 + border = 5 img_out[:border, :] = (0.0, 0.0, 0.0) img_out[-border:, :] = (0.0, 0.0, 0.0) img_out[:, :border] = (0.0, 0.0, 0.0) img_out[:, -border:] = (0.0, 0.0, 0.0) + # Update figure layout for 2 columns and 3 rows in the right column + fig = plt.figure(figsize=(12, 6)) # Adjust width to fit two columns + gs = GridSpec(3, 2, width_ratios=[2, 3], height_ratios=[1, 1, 1]) # 3 rows, 2 columns - fig = plt.figure(figsize=(9, 2.7)) # Adjust the figure size if needed - gs = GridSpec(1, 2, width_ratios=[1, 2]) # Set the width ratios to 1:2 - - ax0 = fig.add_subplot(gs[0]) + # Left column: Combined image + ax0 = fig.add_subplot(gs[:, 0]) # Use all 3 rows of the left column for the image ax0.imshow(img_out) ax0.axis('off') - ax1 = fig.add_subplot(gs[1]) - for scenario_key in config['scenarios'].keys(): - for i, network_key in enumerate(config['networks'].keys()): - if '-B' in network_key and not 'Scaled' in network_key: + # Right column: 3 rows of scatter plots + axs = [fig.add_subplot(gs[0, 1]), fig.add_subplot(gs[1, 1]), fig.add_subplot(gs[2, 1])] + for i, scenario_key in enumerate(config['scenarios'].keys()): + for network_key in config['networks'].keys(): + if '-B' in network_key: # and not 'Scaled' in network_key: average_depths = results_per_scenario_sample[scenario_key][network_key]['average_depth'] abs_rel_values = results_per_scenario_sample[scenario_key][network_key]['abs_rel'] average_depths_subsampled = average_depths[::subsample] abs_rel_values_subsampled = abs_rel_values[::subsample] label = label_dict[network_key] + ' ' + (scenario_key if 'Metric' in network_key else '') - ax1.scatter(average_depths_subsampled, abs_rel_values_subsampled, label=label, marker=config[network_key]['marker'], c=scenario_color[scenario_key], s=25, alpha=0.5) + axs[i].scatter(average_depths_subsampled, abs_rel_values_subsampled, label=label, marker=config[network_key]['marker'], s=25, alpha=0.5, c=colors[network_key]) + if i == 2: + axs[i].set_xlabel('Average Scene Depth [m]') + axs[i].set_ylabel('Absolute Relative Error [ ]') + axs[i].legend(loc='upper right') + axs[i].grid() - # Set axis labels, title, and legend - plt.xlabel('Average Scene Depth [m]') - plt.ylabel('Absolute Relative Error [ ]') - plt.legend(loc='upper right') - ax1.grid() plt.tight_layout() - - # Save the plot - output_file = Path(output_dir) / f'results_overview.png' - plt.savefig(str(output_file), transparent=True, bbox_inches='tight', dpi=400) + output_file = Path(output_dir) / 'results_overview.png' + plt.savefig(str(output_file), transparent=False, bbox_inches='tight', dpi=400) plt.close() # Post-process the saved image to crop any unnecessary white space diff --git a/scripts/evaluation/run_best.bash b/scripts/evaluation/run_best.bash new file mode 100755 index 0000000..d14f3be --- /dev/null +++ b/scripts/evaluation/run_best.bash @@ -0,0 +1,22 @@ +DATASET_DIR='/home/asl/Storage/publish/validation/' +NETWORK_DIR='/home/asl/Storage/training_for_publication/' +CONFIG_DIR='scripts/evaluation/' +OUTPUT_DIR='/home/asl/Storage/output' + +# Radar +mkdir $OUTPUT_DIR/radar_b +mkdir $OUTPUT_DIR/radar_s +python3 scripts/evaluation/find_best.py --dataset $DATASET_DIR --output $OUTPUT_DIR/radar_b --network $NETWORK_DIR/training_base/radar --config $CONFIG_DIR/config_best_radar_b.json & +python3 scripts/evaluation/find_best.py --dataset $DATASET_DIR --output $OUTPUT_DIR/radar_s --network $NETWORK_DIR/training_small2/radar --config $CONFIG_DIR/config_best_radar_s.json & + +# Metric RGB +mkdir $OUTPUT_DIR/metric_b +mkdir $OUTPUT_DIR/metric_s +python3 scripts/evaluation/find_best.py --dataset $DATASET_DIR --output $OUTPUT_DIR/metric_b --network $NETWORK_DIR/training_base2/metric_rgb --config $CONFIG_DIR/config_best_metric_b.json & +python3 scripts/evaluation/find_best.py --dataset $DATASET_DIR --output $OUTPUT_DIR/metric_s --network $NETWORK_DIR/training_small/metric_rgb --config $CONFIG_DIR/config_best_metric_s.json & + +# Relative +mkdir $OUTPUT_DIR/relative_b +mkdir $OUTPUT_DIR/relative_s +python3 scripts/evaluation/find_best.py --dataset $DATASET_DIR --output $OUTPUT_DIR/relative_b --network $NETWORK_DIR/training_base/relative_rgb --config $CONFIG_DIR/config_best_relative_b.json & +python3 scripts/evaluation/find_best.py --dataset $DATASET_DIR --output $OUTPUT_DIR/relative_s --network $NETWORK_DIR/training_small/relative_rgb/ --config $CONFIG_DIR/config_best_relative_s.json & From 91b51fd291c92e8545bbfff514ac11bb7beff9c8 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Mon, 23 Dec 2024 17:45:59 +0100 Subject: [PATCH 6/7] export: Add script --- README.md | 6 +-- radarmeetsvision/interface.py | 4 +- radarmeetsvision/utils.py | 4 +- scripts/export/export.py | 75 +++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 7 deletions(-) create mode 100644 scripts/export/export.py diff --git a/README.md b/README.md index f0cf3c7..c6ccaa5 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Contains the metric network based on [Depth Anything V2](https://github.com/Dept All important code diffs compared to this upstream codebase are outlined in general. ### Pretrained networks -We provide the networks obtained from training and used for evaluation in the paper [here](tbd). +We provide the networks obtained from training and used for evaluation in the paper [here](https://drive.google.com/file/d/1jJN_75OLDWyFMOjH_NrAl0Set08Gpzh0/view?usp=drive_link). ## Training datasets **blearn** is a tool that allows you to generate a synthetic image and depth training dataset using Blender. @@ -31,7 +31,7 @@ Together with a mesh and texture obtained from photogrammetry, realistic synthet The script is executed with Blender's built-in Python interpreter, which has the advantage that the Blender Python API is correctly loaded already. ### Download existing datasets -Existing datasets can be downloaded from [here](tdb). The download contains the datasets, as well as the blender projects used to obtain the datasets. +Existing datasets can be downloaded from [here](https://drive.google.com/file/d/1jJN_75OLDWyFMOjH_NrAl0Set08Gpzh0/view?usp=drive_link). The download contains the datasets, as well as the blender projects used to obtain the datasets. ### Generating training datasets In order to (re-)generate the training datasets, the following steps are needed: @@ -41,4 +41,4 @@ In order to (re-)generate the training datasets, the following steps are needed: 4. The dataset rendering can then be started using: `blender -b --python blearn.py`. Ensure that you adjust the config file accordingly in `blearn.py`. ## Validation datasets -The method for obtaining the validation datasets is described in the paper. The datasets are made available [here](tbd). \ No newline at end of file +The method for obtaining the validation datasets is described in the paper. The datasets are made available [here](https://drive.google.com/file/d/1jJN_75OLDWyFMOjH_NrAl0Set08Gpzh0/view?usp=drive_link). \ No newline at end of file diff --git a/radarmeetsvision/interface.py b/radarmeetsvision/interface.py index 8398436..57d73e5 100644 --- a/radarmeetsvision/interface.py +++ b/radarmeetsvision/interface.py @@ -18,12 +18,12 @@ logger = logging.getLogger(__name__) class Interface: - def __init__(self): + def __init__(self, force_gpu=False): self.batch_size = None self.criterion = None self.depth_min_max = None self.depth_prior_dir = None - self.device = get_device() + self.device = get_device(force_gpu=force_gpu) self.encoder = None self.lr = None self.max_depth = None diff --git a/radarmeetsvision/utils.py b/radarmeetsvision/utils.py index 570f7f7..1c5a731 100644 --- a/radarmeetsvision/utils.py +++ b/radarmeetsvision/utils.py @@ -10,12 +10,12 @@ import torch from datetime import datetime -def get_device(min_memory_gb=8): +def get_device(force_gpu=False, min_memory_gb=8): device_str = 'cpu' if torch.cuda.is_available(): device = torch.cuda.get_device_properties(0) total_memory_gb = device.total_memory / (1024 ** 3) - if total_memory_gb > min_memory_gb: + if total_memory_gb > min_memory_gb or force_gpu: device_str = 'cuda' return device_str diff --git a/scripts/export/export.py b/scripts/export/export.py new file mode 100644 index 0000000..7131ab6 --- /dev/null +++ b/scripts/export/export.py @@ -0,0 +1,75 @@ +import argparse +import torch +from pathlib import Path + +from radarmeetsvision import Interface + + +def export_to_onnx(interface, output_path, input_size=(518, 518), opset_version=16): + """ + Exports the model to an ONNX file without dynamic axes. + + Args: + interface (Interface): The Interface object containing the model. + output_path (str): Path to save the ONNX model. + input_size (tuple): Input size of the model (height, width). + opset_version (int): ONNX opset version to use. + """ + # Create dummy input with the correct dimensions + batch_size = 1 + channels = 4 if interface.use_depth_prior else 3 # Use 4 if depth prior is enabled + dummy_input = torch.randn(batch_size, channels, *input_size, device=interface.device) + + # Ensure the model is on the correct device and in evaluation mode + model = interface.model + model.eval() + + dummy_output = model.forward(dummy_input) + + # Export the model + torch.onnx.export( + model, + dummy_input, + output_path, + input_names=["input"], + output_names=["output"], + opset_version=opset_version, + ) + print(f"Model exported to {output_path}") + + +def main(args): + # Initialize the Interface and configure it + interface = Interface(force_gpu=True) + interface.set_encoder(args.encoder) + interface.set_depth_range((args.min_depth, args.max_depth)) + interface.set_output_channels(args.output_channels) + interface.set_size(args.height, args.width) + interface.set_batch_size(1) + interface.set_use_depth_prior(bool(args.use_depth_prior)) + interface.load_model(pretrained_from=args.network) + + # Output directory for ONNX file + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + onnx_path = output_dir / f"{args.encoder}_model.onnx" + + # Export the model to ONNX + export_to_onnx(interface, onnx_path, input_size=(args.height, args.width), opset_version=args.opset) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Export a model to ONNX format") + parser.add_argument("--network", type=str, required=True, help="Path to the pretrained network file") + parser.add_argument("--encoder", type=str, required=True, help="Encoder type (e.g., 'vitb')") + parser.add_argument("--min-depth", type=float, default=0.19983673095703125, help="Minimum depth value") + parser.add_argument("--max-depth", type=float, default=120.49285888671875, help="Maximum depth value") + parser.add_argument("--output-channels", type=int, default=2, help="Number of output channels") + parser.add_argument("--height", type=int, default=518, help="Input image height") + parser.add_argument("--width", type=int, default=518, help="Input image width") + parser.add_argument("--use-depth-prior", type=int, default=1, help="Whether to use depth prior (1 for yes, 0 for no)") + parser.add_argument("--output-dir", type=str, required=True, help="Directory to save the ONNX file") + parser.add_argument("--opset", type=int, default=16, help="ONNX opset version") + args = parser.parse_args() + + main(args) From 949225c38a3a6302036887b179504c0b20d9ae57 Mon Sep 17 00:00:00 2001 From: marcojob <44396071+marcojob@users.noreply.github.com> Date: Tue, 24 Dec 2024 09:19:04 +0100 Subject: [PATCH 7/7] export: slight rename --- scripts/export/export.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/export/export.py b/scripts/export/export.py index 7131ab6..bbe140a 100644 --- a/scripts/export/export.py +++ b/scripts/export/export.py @@ -50,9 +50,9 @@ def main(args): interface.load_model(pretrained_from=args.network) # Output directory for ONNX file - output_dir = Path(args.output_dir) + output_dir = Path(args.output) output_dir.mkdir(parents=True, exist_ok=True) - onnx_path = output_dir / f"{args.encoder}_model.onnx" + onnx_path = output_dir / f"rmv_{args.encoder}.onnx" # Export the model to ONNX export_to_onnx(interface, onnx_path, input_size=(args.height, args.width), opset_version=args.opset) @@ -68,7 +68,7 @@ def main(args): parser.add_argument("--height", type=int, default=518, help="Input image height") parser.add_argument("--width", type=int, default=518, help="Input image width") parser.add_argument("--use-depth-prior", type=int, default=1, help="Whether to use depth prior (1 for yes, 0 for no)") - parser.add_argument("--output-dir", type=str, required=True, help="Directory to save the ONNX file") + parser.add_argument("--output", type=str, required=True, help="Directory to save the ONNX file") parser.add_argument("--opset", type=int, default=16, help="ONNX opset version") args = parser.parse_args()