Skip to content

Commit

Permalink
change datasets and trained models folder organization
Browse files Browse the repository at this point in the history
  • Loading branch information
masafumiendo committed Mar 7, 2024
1 parent 3d3cf96 commit f9dccb3
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 48 deletions.
4 changes: 2 additions & 2 deletions scripts/generate_terrain_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ def main():
subset_index = 1
script_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
data_directory = os.path.join(
script_directory,
f"datasets/dataset{dataset_index:02d}/subset{subset_index:02d}/",
script_directory, f"datasets/dataset{dataset_index:02d}/"
)
grid_size = 64
resolution = 0.5
Expand Down Expand Up @@ -69,6 +68,7 @@ def main():
instance_count=10,
params_terrain_geometry=params_terrain_geometry,
params_terrain_coloring=params_terrain_coloring,
subset_index=subset_index,
)
generator_test.generate_dataset()

Expand Down
15 changes: 5 additions & 10 deletions scripts/test_slip_regressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ def main(device: str) -> None:
# Load all the trained and actual models and test them
for i in range(10):
# Load the training data
train_data = torch.load(os.path.join(data_directory, f"{i:02d}_class_data.pth"))
train_data = torch.load(
os.path.join(data_directory, f"observations/{i:02d}_class.pth")
)
train_x = train_data["train_x"].to(device=device)
train_y = train_data["train_y"].to(device=device)
# Initialize the GP model
Expand All @@ -70,22 +72,15 @@ def main(device: str) -> None:
# Load the trained model
model = load_model_state_dict(
model=model,
model_directory=os.path.join(
model_directory, f"learned_models/{i:02d}_terrain_class_model.pth"
),
model_directory=os.path.join(model_directory, f"models/{i:02d}_class.pth"),
device=device,
)

# Test the trained model
mean, lower, upper = model.predict(test_phis)

# Load the actual model for reference
with open(
os.path.join(
model_directory, f"actual_models/{i:02d}_terrain_class_model.pkl"
),
"rb",
) as f:
with open(os.path.join(data_directory, f"models/{i:02d}_class.pkl"), "rb") as f:
slip_model = pickle.load(f)
test_slips = slip_model.observe_slip(test_phis)

Expand Down
8 changes: 3 additions & 5 deletions scripts/test_terrain_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,11 @@ def main(device: str) -> None:
subset_index = 1
script_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
data_directory = os.path.join(
script_directory,
f"datasets/dataset{dataset_index:02d}/subset{subset_index:02d}/",
script_directory, f"datasets/dataset{dataset_index:02d}/"
)
# Set the model directory
model_directory = os.path.join(
script_directory,
f"trained_models/dataset{dataset_index:02d}/subset{subset_index:02d}/Unet/",
script_directory, f"trained_models/dataset{dataset_index:02d}/Unet/"
)
# Set the parameters for model training
params_model_training = ParamsModelTraining(
Expand All @@ -61,7 +59,7 @@ def main(device: str) -> None:
model = load_model_state_dict(model, model_directory, device)

# Load the test dataset
test_dataset = TerrainDataset(data_directory, "test")
test_dataset = TerrainDataset(data_directory, "test", subset_index)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Test the model
Expand Down
4 changes: 1 addition & 3 deletions scripts/train_slip_regressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,9 @@ def main(device: str) -> None:

# Set the model directory
dataset_index = 1
subset_index = 1
script_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
model_directory = os.path.join(
script_directory,
f"trained_models/dataset{dataset_index:02d}/subset{subset_index:02d}/GPR/",
script_directory, f"trained_models/dataset{dataset_index:02d}/GPR/"
)
data_directory = os.path.join(
script_directory, f"datasets/dataset{dataset_index:02d}/"
Expand Down
7 changes: 2 additions & 5 deletions scripts/train_terrain_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,13 @@ def main(device: str) -> None:
model = Unet(classes=10).to(device)
# Set the data directory
dataset_index = 1
subset_index = 1
script_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
data_directory = os.path.join(
script_directory,
f"datasets/dataset{dataset_index:02d}/subset{subset_index:02d}/",
script_directory, f"datasets/dataset{dataset_index:02d}/"
)
# Set the model directory
model_directory = os.path.join(
script_directory,
f"trained_models/dataset{dataset_index:02d}/subset{subset_index:02d}/Unet/",
script_directory, f"trained_models/dataset{dataset_index:02d}/Unet/"
)

# Set the parameters for model training
Expand Down
65 changes: 53 additions & 12 deletions src/data/dataset_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from tqdm import tqdm
from typing import Tuple
from typing import Tuple, Optional
import torch
import multiprocessing

Expand All @@ -33,6 +33,7 @@ def __init__(
num_selected_terrain_classes: int = 4,
params_terrain_geometry: ParamsTerrainGeometry = None,
params_terrain_coloring: ParamsTerrainColoring = None,
subset_index: Optional[int] = None,
) -> None:
"""
Initializes the DatasetGenerator with the specified parameters.
Expand All @@ -48,19 +49,27 @@ def __init__(
- num_selected_terrain_classes (int): The number of terrain classes to select for each environment.
- params_terrain_geometry (ParamsTerrainGeometry): The parameters for the terrain geometry.
- params_terrain_coloring (ParamsTerrainColoring): The parameters for the terrain coloring.
- subset_index (Optional[int]): The index of the subset for the test split.
"""
# Validate the data_split argument
if data_split not in ["train", "valid", "test"]:
raise ValueError("data_split must be one of 'train', 'valid', 'test'")
if data_split == "test" and subset_index is None:
raise ValueError("subset_index must be specified for generating test data.")
self.data_directory = data_directory
self.data_split = data_split
self.subset_index = (
subset_index if data_split == "test" else None
) # make sure subset_index is None for train and valid

self.grid_size = grid_size
self.resolution = resolution

self.environment_count = environment_count
self.instance_count = instance_count
self.num_total_terrain_classes = num_total_terrain_classes
self.num_selected_terrain_classes = num_selected_terrain_classes

self.params_terrain_geometry = params_terrain_geometry
self.params_terrain_coloring = params_terrain_coloring

Expand Down Expand Up @@ -106,9 +115,17 @@ def generate_dataset(self, processes: int = 4) -> None:
last_instance_seed = (
base_instance_seed + self.environment_count * self.instance_count
)
file_path = os.path.join(
self.data_directory, self.data_split, "seed_information.pt"
)
if self.data_split == "test":
file_path = os.path.join(
self.data_directory,
self.data_split,
f"subset{self.subset_index:02d}",
"seed_information.pt",
)
else:
file_path = os.path.join(
self.data_directory, self.data_split, "seed_information.pt"
)
os.makedirs(os.path.dirname(file_path), exist_ok=True)
torch.save(
{
Expand All @@ -135,9 +152,23 @@ def set_seed(self) -> Tuple[int, int]:
environment_seed = seed_information["environment_seed"]
base_instance_seed = seed_information["last_instance_seed"]
elif self.data_split == "test":
seed_information = torch.load(
os.path.join(self.data_directory, "valid", "seed_information.pt")
)
if self.subset_index is None:
raise ValueError(
"subset_index must be specified for generating test data."
)

if self.subset_index == 1:
data_directory = os.path.join(
self.data_directory, "valid", "seed_information.pt"
)
else:
data_directory = os.path.join(
self.data_directory,
"test",
f"subset{self.subset_index - 1:02d}",
"seed_information.pt",
)
seed_information = torch.load(data_directory)
environment_seed = seed_information["environment_seed"]
base_instance_seed = seed_information["last_instance_seed"]
return environment_seed, base_instance_seed
Expand Down Expand Up @@ -229,11 +260,21 @@ def generate_and_save_environment_group(
grid_map = self.generate_map_instance(seed=instance_seed)

# Save map instance
file_path = os.path.join(
self.data_directory,
self.data_split,
f"{environment_index:03d}_{instance_index:03d}.pt",
)
if self.data_split == "test":
# Save map instance with subset index
file_path = os.path.join(
self.data_directory,
self.data_split,
f"subset{self.subset_index:02d}",
f"{environment_index:03d}_{instance_index:03d}.pt",
)
else:
# Save map instance without subset index
file_path = os.path.join(
self.data_directory,
self.data_split,
f"{environment_index:03d}_{instance_index:03d}.pt",
)
os.makedirs(os.path.dirname(file_path), exist_ok=True)

torch.save(grid_map.tensor_data, file_path)
Expand Down
16 changes: 13 additions & 3 deletions src/data/terrain_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

import os
from typing import Tuple
from typing import Tuple, Optional
import torch
from torch.utils.data import Dataset

Expand All @@ -14,19 +14,29 @@ class TerrainDataset(Dataset):
Supports loading color maps and mask maps as well as converting them into a format suitable for model training or evaluation.
"""

def __init__(self, data_directory: str, data_split: str):
def __init__(
self, data_directory: str, data_split: str, subset_index: Optional[int] = None
):
"""
Initializes the TerrainDataset with the specified directory, and data split.
Parameters:
- data_directory (str): The directory containing the dataset.
- data_split (str): The dataset split ('train', 'valid', 'test').
- subset_index (Optional[int]): The index of the testing subset.
"""
# Validate the data_split argument
if data_split not in ["train", "valid", "test"]:
raise ValueError("data_split must be one of 'train', 'valid', 'test'")
if data_split == "test" and subset_index is None:
raise ValueError("subset index must be specified for the test split")

self.data_directory = os.path.join(data_directory, data_split + "/")
if data_split == "test":
self.data_directory = os.path.join(
data_directory, data_split, f"subset{subset_index:02d}/"
)
else:
self.data_directory = os.path.join(data_directory, data_split + "/")
self.data_indices = [
file
for file in os.listdir(self.data_directory)
Expand Down
16 changes: 8 additions & 8 deletions src/prediction_models/trainers/regressor_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ def __init__(
f"lr{self.params_model_training.learning_rate:.0e}_"
f"iters{self.params_model_training.num_iterations:03d}",
)
self.learned_models_directory = os.path.join(
self.model_directory, "learned_models"
)
self.learned_models_directory = os.path.join(self.model_directory, "models")
if not os.path.exists(self.learned_models_directory):
os.makedirs(self.learned_models_directory)

Expand All @@ -84,7 +82,8 @@ def __init__(
os.makedirs(data_directory)
self.data_directory = os.path.join(data_directory, "slip_models")
if not os.path.exists(self.data_directory):
os.makedirs(self.data_directory)
os.makedirs(os.path.join(self.data_directory, "models"))
os.makedirs(os.path.join(self.data_directory, "observations"))

def validate_minmax(self, minmax: Tuple[float, float]) -> Tuple[float, float]:
"""
Expand Down Expand Up @@ -229,22 +228,23 @@ def save(
"""
# Save the actual slip model
with open(
os.path.join(self.data_directory, f"{terrain_class:02d}_class_model.pkl"),
os.path.join(self.data_directory, f"models/{terrain_class:02d}_class.pkl"),
"wb",
) as f:
pickle.dump(slip_model, f)

# Save the training inputs and outputs as a dictionary
torch.save(
{"train_x": train_x, "train_y": train_y},
os.path.join(self.data_directory, f"{terrain_class:02d}_class_data.pth"),
os.path.join(
self.data_directory, f"observations/{terrain_class:02d}_class.pth"
),
)

# Save the learned regressor model
torch.save(
model.state_dict(),
os.path.join(
self.learned_models_directory,
f"{terrain_class:02d}_terrain_class_model.pth",
self.learned_models_directory, f"{terrain_class:02d}_class.pth"
),
)

0 comments on commit f9dccb3

Please sign in to comment.