From f9dccb3294439572cecc772e17b4ad3e6dde4115 Mon Sep 17 00:00:00 2001 From: Masafumi Date: Thu, 7 Mar 2024 21:48:41 +0900 Subject: [PATCH] change datasets and trained models folder organization --- scripts/generate_terrain_dataset.py | 4 +- scripts/test_slip_regressors.py | 15 ++--- scripts/test_terrain_classifier.py | 8 +-- scripts/train_slip_regressors.py | 4 +- scripts/train_terrain_classifier.py | 7 +- src/data/dataset_generator.py | 65 +++++++++++++++---- src/data/terrain_dataset.py | 16 ++++- .../trainers/regressor_trainer.py | 16 ++--- 8 files changed, 87 insertions(+), 48 deletions(-) diff --git a/scripts/generate_terrain_dataset.py b/scripts/generate_terrain_dataset.py index fdfdfc2..cda5d5f 100644 --- a/scripts/generate_terrain_dataset.py +++ b/scripts/generate_terrain_dataset.py @@ -18,8 +18,7 @@ def main(): subset_index = 1 script_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) data_directory = os.path.join( - script_directory, - f"datasets/dataset{dataset_index:02d}/subset{subset_index:02d}/", + script_directory, f"datasets/dataset{dataset_index:02d}/" ) grid_size = 64 resolution = 0.5 @@ -69,6 +68,7 @@ def main(): instance_count=10, params_terrain_geometry=params_terrain_geometry, params_terrain_coloring=params_terrain_coloring, + subset_index=subset_index, ) generator_test.generate_dataset() diff --git a/scripts/test_slip_regressors.py b/scripts/test_slip_regressors.py index 666894c..a3b76e6 100644 --- a/scripts/test_slip_regressors.py +++ b/scripts/test_slip_regressors.py @@ -61,7 +61,9 @@ def main(device: str) -> None: # Load all the trained and actual models and test them for i in range(10): # Load the training data - train_data = torch.load(os.path.join(data_directory, f"{i:02d}_class_data.pth")) + train_data = torch.load( + os.path.join(data_directory, f"observations/{i:02d}_class.pth") + ) train_x = train_data["train_x"].to(device=device) train_y = train_data["train_y"].to(device=device) # Initialize the GP model @@ -70,9 +72,7 @@ def main(device: str) -> None: # Load the trained model model = load_model_state_dict( model=model, - model_directory=os.path.join( - model_directory, f"learned_models/{i:02d}_terrain_class_model.pth" - ), + model_directory=os.path.join(model_directory, f"models/{i:02d}_class.pth"), device=device, ) @@ -80,12 +80,7 @@ def main(device: str) -> None: mean, lower, upper = model.predict(test_phis) # Load the actual model for reference - with open( - os.path.join( - model_directory, f"actual_models/{i:02d}_terrain_class_model.pkl" - ), - "rb", - ) as f: + with open(os.path.join(data_directory, f"models/{i:02d}_class.pkl"), "rb") as f: slip_model = pickle.load(f) test_slips = slip_model.observe_slip(test_phis) diff --git a/scripts/test_terrain_classifier.py b/scripts/test_terrain_classifier.py index 541f56b..7bc512c 100644 --- a/scripts/test_terrain_classifier.py +++ b/scripts/test_terrain_classifier.py @@ -31,13 +31,11 @@ def main(device: str) -> None: subset_index = 1 script_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) data_directory = os.path.join( - script_directory, - f"datasets/dataset{dataset_index:02d}/subset{subset_index:02d}/", + script_directory, f"datasets/dataset{dataset_index:02d}/" ) # Set the model directory model_directory = os.path.join( - script_directory, - f"trained_models/dataset{dataset_index:02d}/subset{subset_index:02d}/Unet/", + script_directory, f"trained_models/dataset{dataset_index:02d}/Unet/" ) # Set the parameters for model training params_model_training = ParamsModelTraining( @@ -61,7 +59,7 @@ def main(device: str) -> None: model = load_model_state_dict(model, model_directory, device) # Load the test dataset - test_dataset = TerrainDataset(data_directory, "test") + test_dataset = TerrainDataset(data_directory, "test", subset_index) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False) # Test the model diff --git a/scripts/train_slip_regressors.py b/scripts/train_slip_regressors.py index 564eec9..4d3ad37 100644 --- a/scripts/train_slip_regressors.py +++ b/scripts/train_slip_regressors.py @@ -21,11 +21,9 @@ def main(device: str) -> None: # Set the model directory dataset_index = 1 - subset_index = 1 script_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) model_directory = os.path.join( - script_directory, - f"trained_models/dataset{dataset_index:02d}/subset{subset_index:02d}/GPR/", + script_directory, f"trained_models/dataset{dataset_index:02d}/GPR/" ) data_directory = os.path.join( script_directory, f"datasets/dataset{dataset_index:02d}/" diff --git a/scripts/train_terrain_classifier.py b/scripts/train_terrain_classifier.py index 6593718..f0e82c6 100644 --- a/scripts/train_terrain_classifier.py +++ b/scripts/train_terrain_classifier.py @@ -25,16 +25,13 @@ def main(device: str) -> None: model = Unet(classes=10).to(device) # Set the data directory dataset_index = 1 - subset_index = 1 script_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) data_directory = os.path.join( - script_directory, - f"datasets/dataset{dataset_index:02d}/subset{subset_index:02d}/", + script_directory, f"datasets/dataset{dataset_index:02d}/" ) # Set the model directory model_directory = os.path.join( - script_directory, - f"trained_models/dataset{dataset_index:02d}/subset{subset_index:02d}/Unet/", + script_directory, f"trained_models/dataset{dataset_index:02d}/Unet/" ) # Set the parameters for model training diff --git a/src/data/dataset_generator.py b/src/data/dataset_generator.py index 46d2a0f..36e241c 100644 --- a/src/data/dataset_generator.py +++ b/src/data/dataset_generator.py @@ -8,7 +8,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from tqdm import tqdm -from typing import Tuple +from typing import Tuple, Optional import torch import multiprocessing @@ -33,6 +33,7 @@ def __init__( num_selected_terrain_classes: int = 4, params_terrain_geometry: ParamsTerrainGeometry = None, params_terrain_coloring: ParamsTerrainColoring = None, + subset_index: Optional[int] = None, ) -> None: """ Initializes the DatasetGenerator with the specified parameters. @@ -48,19 +49,27 @@ def __init__( - num_selected_terrain_classes (int): The number of terrain classes to select for each environment. - params_terrain_geometry (ParamsTerrainGeometry): The parameters for the terrain geometry. - params_terrain_coloring (ParamsTerrainColoring): The parameters for the terrain coloring. + - subset_index (Optional[int]): The index of the subset for the test split. """ # Validate the data_split argument if data_split not in ["train", "valid", "test"]: raise ValueError("data_split must be one of 'train', 'valid', 'test'") + if data_split == "test" and subset_index is None: + raise ValueError("subset_index must be specified for generating test data.") self.data_directory = data_directory self.data_split = data_split + self.subset_index = ( + subset_index if data_split == "test" else None + ) # make sure subset_index is None for train and valid self.grid_size = grid_size self.resolution = resolution + self.environment_count = environment_count self.instance_count = instance_count self.num_total_terrain_classes = num_total_terrain_classes self.num_selected_terrain_classes = num_selected_terrain_classes + self.params_terrain_geometry = params_terrain_geometry self.params_terrain_coloring = params_terrain_coloring @@ -106,9 +115,17 @@ def generate_dataset(self, processes: int = 4) -> None: last_instance_seed = ( base_instance_seed + self.environment_count * self.instance_count ) - file_path = os.path.join( - self.data_directory, self.data_split, "seed_information.pt" - ) + if self.data_split == "test": + file_path = os.path.join( + self.data_directory, + self.data_split, + f"subset{self.subset_index:02d}", + "seed_information.pt", + ) + else: + file_path = os.path.join( + self.data_directory, self.data_split, "seed_information.pt" + ) os.makedirs(os.path.dirname(file_path), exist_ok=True) torch.save( { @@ -135,9 +152,23 @@ def set_seed(self) -> Tuple[int, int]: environment_seed = seed_information["environment_seed"] base_instance_seed = seed_information["last_instance_seed"] elif self.data_split == "test": - seed_information = torch.load( - os.path.join(self.data_directory, "valid", "seed_information.pt") - ) + if self.subset_index is None: + raise ValueError( + "subset_index must be specified for generating test data." + ) + + if self.subset_index == 1: + data_directory = os.path.join( + self.data_directory, "valid", "seed_information.pt" + ) + else: + data_directory = os.path.join( + self.data_directory, + "test", + f"subset{self.subset_index - 1:02d}", + "seed_information.pt", + ) + seed_information = torch.load(data_directory) environment_seed = seed_information["environment_seed"] base_instance_seed = seed_information["last_instance_seed"] return environment_seed, base_instance_seed @@ -229,11 +260,21 @@ def generate_and_save_environment_group( grid_map = self.generate_map_instance(seed=instance_seed) # Save map instance - file_path = os.path.join( - self.data_directory, - self.data_split, - f"{environment_index:03d}_{instance_index:03d}.pt", - ) + if self.data_split == "test": + # Save map instance with subset index + file_path = os.path.join( + self.data_directory, + self.data_split, + f"subset{self.subset_index:02d}", + f"{environment_index:03d}_{instance_index:03d}.pt", + ) + else: + # Save map instance without subset index + file_path = os.path.join( + self.data_directory, + self.data_split, + f"{environment_index:03d}_{instance_index:03d}.pt", + ) os.makedirs(os.path.dirname(file_path), exist_ok=True) torch.save(grid_map.tensor_data, file_path) diff --git a/src/data/terrain_dataset.py b/src/data/terrain_dataset.py index b58bd05..124dd63 100644 --- a/src/data/terrain_dataset.py +++ b/src/data/terrain_dataset.py @@ -3,7 +3,7 @@ """ import os -from typing import Tuple +from typing import Tuple, Optional import torch from torch.utils.data import Dataset @@ -14,19 +14,29 @@ class TerrainDataset(Dataset): Supports loading color maps and mask maps as well as converting them into a format suitable for model training or evaluation. """ - def __init__(self, data_directory: str, data_split: str): + def __init__( + self, data_directory: str, data_split: str, subset_index: Optional[int] = None + ): """ Initializes the TerrainDataset with the specified directory, and data split. Parameters: - data_directory (str): The directory containing the dataset. - data_split (str): The dataset split ('train', 'valid', 'test'). + - subset_index (Optional[int]): The index of the testing subset. """ # Validate the data_split argument if data_split not in ["train", "valid", "test"]: raise ValueError("data_split must be one of 'train', 'valid', 'test'") + if data_split == "test" and subset_index is None: + raise ValueError("subset index must be specified for the test split") - self.data_directory = os.path.join(data_directory, data_split + "/") + if data_split == "test": + self.data_directory = os.path.join( + data_directory, data_split, f"subset{subset_index:02d}/" + ) + else: + self.data_directory = os.path.join(data_directory, data_split + "/") self.data_indices = [ file for file in os.listdir(self.data_directory) diff --git a/src/prediction_models/trainers/regressor_trainer.py b/src/prediction_models/trainers/regressor_trainer.py index 5a06393..d223061 100644 --- a/src/prediction_models/trainers/regressor_trainer.py +++ b/src/prediction_models/trainers/regressor_trainer.py @@ -73,9 +73,7 @@ def __init__( f"lr{self.params_model_training.learning_rate:.0e}_" f"iters{self.params_model_training.num_iterations:03d}", ) - self.learned_models_directory = os.path.join( - self.model_directory, "learned_models" - ) + self.learned_models_directory = os.path.join(self.model_directory, "models") if not os.path.exists(self.learned_models_directory): os.makedirs(self.learned_models_directory) @@ -84,7 +82,8 @@ def __init__( os.makedirs(data_directory) self.data_directory = os.path.join(data_directory, "slip_models") if not os.path.exists(self.data_directory): - os.makedirs(self.data_directory) + os.makedirs(os.path.join(self.data_directory, "models")) + os.makedirs(os.path.join(self.data_directory, "observations")) def validate_minmax(self, minmax: Tuple[float, float]) -> Tuple[float, float]: """ @@ -229,7 +228,7 @@ def save( """ # Save the actual slip model with open( - os.path.join(self.data_directory, f"{terrain_class:02d}_class_model.pkl"), + os.path.join(self.data_directory, f"models/{terrain_class:02d}_class.pkl"), "wb", ) as f: pickle.dump(slip_model, f) @@ -237,14 +236,15 @@ def save( # Save the training inputs and outputs as a dictionary torch.save( {"train_x": train_x, "train_y": train_y}, - os.path.join(self.data_directory, f"{terrain_class:02d}_class_data.pth"), + os.path.join( + self.data_directory, f"observations/{terrain_class:02d}_class.pth" + ), ) # Save the learned regressor model torch.save( model.state_dict(), os.path.join( - self.learned_models_directory, - f"{terrain_class:02d}_terrain_class_model.pth", + self.learned_models_directory, f"{terrain_class:02d}_class.pth" ), )