diff --git a/docs/evaluation.md b/docs/evaluation.md index ecce4d2c..99af8294 100644 --- a/docs/evaluation.md +++ b/docs/evaluation.md @@ -21,6 +21,7 @@ python src/eval.py --config-name=eval.yaml \ - `--config-name=eval.yaml`- sets task to be [`configs/eval.yaml`](../configs/eval.yaml) - `experiment=eval/tofu/default`- set experiment to use [`configs/eval/tofu/default.yaml`](../configs/eval/tofu/default.yaml) - `model=Llama-3.2-3B-Instruct`- override the default (`Llama-3.2-1B-Instruct`) model config to use [`configs/model/Llama-3.2-3B-Instruct`](../configs/model/Phi-3.5-mini-instruct.yaml). +- Output directory: constructed as `saves/eval/SAMPLE_EVAL` Run the MUSE-Books benchmark evaluation on a checkpoint of a Phi-3.5 model: diff --git a/docs/experiments.md b/docs/experiments.md index d61009b0..4aa1462d 100644 --- a/docs/experiments.md +++ b/docs/experiments.md @@ -11,6 +11,8 @@ The large number of component variants supported in this repository creates the At the core, three main Hydra configs—`train.yaml` (generic training), `eval.yaml` (running evaluation), and `unlearn.yaml` (unlearning training)—provide the base configuration for the main types of experiments. These are then extended by experiment-specific configs and command-line overrides. We set up experiment configs for common usecases like LLaMA-2 unlearning on TOFU, LLaMA-2 evaluation on MUSE etc. which set the required datasets, models, and base train and eval configs to make things easier. +Experiment output directories are constructed based on the task mode (`train` / `eval` / `unlearn`) and the task name (provided by the user) as `./saves/${mode}/${task_name}`. The experiment logging will display where the model checkpoints, logs and evaluation dumps are stored. + --- ### Table of Contents @@ -34,6 +36,7 @@ At the core, three main Hydra configs—`train.yaml` (generic training), `eval.y python src/train.py --config-name=train.yaml experiment=finetune/tofu/default task_name=SAMPLE_TRAIN ## runs an unlearning training using experiment details from configs/unlearn/tofu/default.yaml +# output directory will be constructed as: saves/unlearn/SAMPLE_UNLEARN python src/train.py --config-name=unlearn.yaml experiment=unlearn/tofu/default task_name=SAMPLE_TRAIN diff --git a/src/evals/base.py b/src/evals/base.py index 3d794d5a..3beb68a5 100644 --- a/src/evals/base.py +++ b/src/evals/base.py @@ -12,7 +12,9 @@ def __init__(self, name, eval_cfg, **kwargs): self.eval_cfg = eval_cfg self.metrics_cfg = self.eval_cfg.metrics self.metrics = self.load_metrics(self.metrics_cfg) - logger.info(f"Output directory {self.eval_cfg.output_dir}") + logger.info( + f"Evaluations stored in the experiment directory: {self.eval_cfg.output_dir}" + ) def get_logs_file_path(self, output_dir, suffix="EVAL"): """Returns the path to json file to store results""" @@ -71,10 +73,14 @@ def evaluate(self, model, output_dir=None, overwrite=None, **kwargs): logs_file_path = self.get_logs_file_path(output_dir) summary_file_path = self.get_logs_file_path(output_dir, suffix="SUMMARY") - # Load exisiting results from file if any. + # Load existing results from file if any. logs = self.load_logs_from_file(logs_file_path) if not overwrite else {} logger.info(f"***** Running {self.name} evaluation suite *****") + logger.info(f"Fine-grained evaluations will be saved to: {logs_file_path}") + logger.info( + f"Aggregated evaluations will be summarised in: {summary_file_path}" + ) for metric_name, metric_fn in self.metrics.items(): if not overwrite and metric_name in logs and logs[metric_name]: logger.info(f"Skipping {metric_name}, already evaluated.") @@ -100,7 +106,7 @@ def evaluate(self, model, output_dir=None, overwrite=None, **kwargs): ) if "agg_value" in result: logger.info(f"Result for metric {metric_name}:\t{result['agg_value']}") - self.save_logs(logs, logs_file_path) self.save_logs(self.summarize(logs), summary_file_path) + return logs diff --git a/src/trainer/__init__.py b/src/trainer/__init__.py index 7e195fa9..66774f2f 100644 --- a/src/trainer/__init__.py +++ b/src/trainer/__init__.py @@ -11,6 +11,10 @@ from trainer.unlearn.simnpo import SimNPO from trainer.unlearn.rmu import RMU +import logging + +logger = logging.getLogger(__name__) + TRAINER_REGISTRY: Dict[str, Any] = {} @@ -67,6 +71,9 @@ def load_trainer( template_args=template_args, **method_args, ) + logger.info( + f"{trainer_handler_name} Trainer loaded, output_dir: {trainer_args.output_dir}" + ) return trainer, trainer_args