From b4975fc227735ce4d8a89e0916f3273379ab0a3e Mon Sep 17 00:00:00 2001 From: mali-git Date: Thu, 11 Jul 2024 10:06:19 +0200 Subject: [PATCH 1/4] feat: create unique experiment id --- src/modalities/util.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/modalities/util.py b/src/modalities/util.py index 50d20964..cd5ac55c 100644 --- a/src/modalities/util.py +++ b/src/modalities/util.py @@ -1,9 +1,10 @@ import time +import uuid import warnings from datetime import datetime from enum import Enum from types import TracebackType -from typing import Callable, Dict, Generic, Type, TypeVar +from typing import Callable, Dict, Generic, Optional, Type, TypeVar import torch import torch.distributed as dist @@ -36,12 +37,14 @@ def get_callback_interval_in_batches_per_rank( return num_local_train_micro_batches_ret -def get_date_of_run(): - """create date and time for file save uniqueness - example: 2022-05-07__14-31-22' +def get_experiment_id_of_run(hash_length: Optional[int] = 8): + """create experiment ID including the date and time for file save uniqueness + example: 2022-05-07__14-31-22_fdh1xaj2' """ + random_uuid = uuid.uuid4().hex[:hash_length] date_of_run = datetime.now().strftime("%Y-%m-%d__%H-%M-%S") - return date_of_run + experiment_id = f"{date_of_run}_{random_uuid}" + return experiment_id def format_metrics_to_gb(item): From fbdd24114c811274f957fac31cb8fd31114ab51b Mon Sep 17 00:00:00 2001 From: mali-git Date: Thu, 11 Jul 2024 10:06:52 +0200 Subject: [PATCH 2/4] refactor: rename fct. --- src/modalities/config/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modalities/config/config.py b/src/modalities/config/config.py index 1db00b5a..f0acd4ee 100644 --- a/src/modalities/config/config.py +++ b/src/modalities/config/config.py @@ -26,7 +26,7 @@ ) from modalities.config.utils import parse_torch_device from modalities.running_env.env_utils import MixedPrecisionSettings, has_bfloat_support -from modalities.util import get_date_of_run, parse_enum_by_name +from modalities.util import get_experiment_id_of_run, parse_enum_by_name class ProcessGroupBackendType(LookupEnum): @@ -358,7 +358,7 @@ def cuda_env_resolver_fun(var_name: str) -> int: def modalities_env_resolver_fun(var_name: str) -> int: if var_name == "experiment_id": - return get_date_of_run() + return get_experiment_id_of_run() if var_name == "config_file_path": return config_file_path From 54809faa61bb68f69e6db39d99ec0f279b3377ad Mon Sep 17 00:00:00 2001 From: mali-git Date: Thu, 11 Jul 2024 10:55:20 +0200 Subject: [PATCH 3/4] refactor: create hash based on path --- src/modalities/config/config.py | 11 +++++++---- src/modalities/util.py | 8 +++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/modalities/config/config.py b/src/modalities/config/config.py index f0acd4ee..045909ee 100644 --- a/src/modalities/config/config.py +++ b/src/modalities/config/config.py @@ -1,3 +1,4 @@ +from functools import partial import os from pathlib import Path from typing import Annotated, Callable, Dict, List, Literal, Optional, Tuple @@ -356,18 +357,20 @@ def cuda_env_resolver_fun(var_name: str) -> int: int_env_variable_names = ["LOCAL_RANK", "WORLD_SIZE", "RANK"] return int(os.getenv(var_name)) if var_name in int_env_variable_names else os.getenv(var_name) - def modalities_env_resolver_fun(var_name: str) -> int: + def modalities_env_resolver_fun(var_name: str, config_file_path: Path) -> str | Path: if var_name == "experiment_id": - return get_experiment_id_of_run() - if var_name == "config_file_path": + return get_experiment_id_of_run(config_file_path=config_file_path) + elif var_name == "config_file_path": return config_file_path + else: + raise ValueError(f"Unknown modalities_env variable: {var_name}.") def node_env_resolver_fun(var_name: str) -> int: if var_name == "num_cpus": return os.cpu_count() OmegaConf.register_new_resolver("cuda_env", cuda_env_resolver_fun, replace=True) - OmegaConf.register_new_resolver("modalities_env", modalities_env_resolver_fun, replace=True) + OmegaConf.register_new_resolver("modalities_env", partial(modalities_env_resolver_fun, config_file_path=config_file_path), replace=True) OmegaConf.register_new_resolver("node_env", node_env_resolver_fun, replace=True) cfg = OmegaConf.load(config_file_path) diff --git a/src/modalities/util.py b/src/modalities/util.py index cd5ac55c..6210f707 100644 --- a/src/modalities/util.py +++ b/src/modalities/util.py @@ -1,3 +1,5 @@ +import hashlib +from pathlib import Path import time import uuid import warnings @@ -37,13 +39,13 @@ def get_callback_interval_in_batches_per_rank( return num_local_train_micro_batches_ret -def get_experiment_id_of_run(hash_length: Optional[int] = 8): +def get_experiment_id_of_run(config_file_path: Path, hash_length: Optional[int] = 8) -> str: """create experiment ID including the date and time for file save uniqueness example: 2022-05-07__14-31-22_fdh1xaj2' """ - random_uuid = uuid.uuid4().hex[:hash_length] + hash = hashlib.sha256(str(config_file_path).encode()).hexdigest()[:hash_length] date_of_run = datetime.now().strftime("%Y-%m-%d__%H-%M-%S") - experiment_id = f"{date_of_run}_{random_uuid}" + experiment_id = f"{date_of_run}_{hash}" return experiment_id From dc0edadb783f934362862fb30f13bd507132ef04 Mon Sep 17 00:00:00 2001 From: Max Luebbering Date: Thu, 11 Jul 2024 12:23:53 +0200 Subject: [PATCH 4/4] chore: removed unused import --- src/modalities/util.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/modalities/util.py b/src/modalities/util.py index 6210f707..62b61c40 100644 --- a/src/modalities/util.py +++ b/src/modalities/util.py @@ -1,10 +1,9 @@ import hashlib -from pathlib import Path import time -import uuid import warnings from datetime import datetime from enum import Enum +from pathlib import Path from types import TracebackType from typing import Callable, Dict, Generic, Optional, Type, TypeVar @@ -46,7 +45,7 @@ def get_experiment_id_of_run(config_file_path: Path, hash_length: Optional[int] hash = hashlib.sha256(str(config_file_path).encode()).hexdigest()[:hash_length] date_of_run = datetime.now().strftime("%Y-%m-%d__%H-%M-%S") experiment_id = f"{date_of_run}_{hash}" - return experiment_id + return experiment_id def format_metrics_to_gb(item): @@ -142,8 +141,9 @@ def get_all_reduced_value( ) return value -def get_module_class_from_name(module: torch.nn.Module, name:str) -> Type[torch.nn.Module] | None: - """ From Accelerate source code + +def get_module_class_from_name(module: torch.nn.Module, name: str) -> Type[torch.nn.Module] | None: + """From Accelerate source code (https://github.com/huggingface/accelerate/blob/1f7a79b428749f45187ec69485f2c966fe21926e/src/accelerate/utils/dataclasses.py#L1902) Gets a class from a module by its name. @@ -160,4 +160,4 @@ def get_module_class_from_name(module: torch.nn.Module, name:str) -> Type[torch. for child_module in modules_children: module_class = get_module_class_from_name(child_module, name) if module_class is not None: - return module_class \ No newline at end of file + return module_class