From 6070021fe1a12a2d25717daa228528a39bc83fbd Mon Sep 17 00:00:00 2001 From: Wout Bittremieux Date: Mon, 31 Jul 2023 10:50:59 +0200 Subject: [PATCH 1/3] Rename every_n_train_steps to val_check_interval --- casanovo/config.py | 2 +- casanovo/config.yaml | 2 +- casanovo/denovo/model_runner.py | 2 +- docs/faq.md | 4 ++-- tests/conftest.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/casanovo/config.py b/casanovo/config.py index 7aec89bc..f2d24e3b 100644 --- a/casanovo/config.py +++ b/casanovo/config.py @@ -65,7 +65,7 @@ class Config: train_from_scratch=bool, save_top_k=int, model_save_folder_path=str, - every_n_train_steps=int, + val_check_interval=int, accelerator=str, devices=int, calculate_precision=bool, diff --git a/casanovo/config.yaml b/casanovo/config.yaml index 46fa63ea..c2816dd8 100644 --- a/casanovo/config.yaml +++ b/casanovo/config.yaml @@ -117,7 +117,7 @@ save_top_k: 5 # Path to saved checkpoints model_save_folder_path: "" # Model validation and checkpointing frequency in training steps -every_n_train_steps: 50_000 +val_check_interval: 50_000 # Calculate peptide and amino acid precision during training. this # is expensive, so we recommend against it. calculate_precision: False diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py index c622b345..759de0b6 100644 --- a/casanovo/denovo/model_runner.py +++ b/casanovo/denovo/model_runner.py @@ -193,7 +193,7 @@ def initialize_trainer(self, train: bool) -> None: max_epochs=self.config.max_epochs, num_sanity_val_steps=self.config.num_sanity_val_steps, strategy=self._get_strategy(), - val_check_interval=self.config.every_n_train_steps, + val_check_interval=self.config.val_check_interval, ) trainer_cfg.update(additional_cfg) diff --git a/docs/faq.md b/docs/faq.md index 269d09bf..3dac6858 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -60,8 +60,8 @@ Using the filename (column "filename") you can then retrieve the corresponding p By default, Casanovo saves a snapshot of the model weights after every 50,000 training steps. Note that the number of samples that are processed during a single training step depends on the batch size. -Therefore, when using the default training batch size of 32, this correspond to saving a model snapshot after every 1.6 million training samples. -You can optionally modify the snapshot frequency in the [config file](https://github.com/Noble-Lab/casanovo/blob/main/casanovo/config.yaml) (parameter `every_n_train_steps`), depending on your dataset size. +Therefore, when using the default training batch size of 32, this corresponds to saving a model snapshot after every 1.6 million training samples. +You can optionally modify the snapshot (and validation) frequency in the [config file](https://github.com/Noble-Lab/casanovo/blob/main/casanovo/config.yaml) (parameter `val_check_interval`), depending on your dataset size. Note that taking very frequent model snapshots will result in somewhat slower training time because Casanovo will evaluate its performance on the validation data for every snapshot. When saving a model snapshot, Casanovo will use the validation data to compute performance measures (training loss, validation loss, amino acid precision, and peptide precision) and print this information to the console and log file. diff --git a/tests/conftest.py b/tests/conftest.py index 5eb55979..6dcda9c9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -193,7 +193,7 @@ def tiny_config(tmp_path): "warmup_iters": 1, "max_iters": 1, "max_epochs": 20, - "every_n_train_steps": 1, + "val_check_interval": 1, "model_save_folder_path": str(tmp_path), "accelerator": "cpu", } From fa93da10b792134e815ba8af72b60115ab24b771 Mon Sep 17 00:00:00 2001 From: Wout Bittremieux Date: Mon, 31 Jul 2023 10:51:18 +0200 Subject: [PATCH 2/3] Disable check_val_every_n_epochs --- casanovo/denovo/model_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py index 759de0b6..8f5ff801 100644 --- a/casanovo/denovo/model_runner.py +++ b/casanovo/denovo/model_runner.py @@ -194,6 +194,7 @@ def initialize_trainer(self, train: bool) -> None: num_sanity_val_steps=self.config.num_sanity_val_steps, strategy=self._get_strategy(), val_check_interval=self.config.val_check_interval, + check_val_every_n_epoch=None, ) trainer_cfg.update(additional_cfg) From 04fc7de120ca78d788c08c37a120e1f486fbf08c Mon Sep 17 00:00:00 2001 From: Wout Bittremieux Date: Mon, 31 Jul 2023 10:53:06 +0200 Subject: [PATCH 3/3] Update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ddf1891d..4a4fcae9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - The CLI has been overhauled to use subcommands. - Upgraded to Lightning >=2.0 - Checkpointing is now configured to save the top-k models instead of all. +- `every_n_train_steps` has been renamed to `val_check_interval` in accordance to the corresponding Pytorch Lightning parameter. ### Fixed @@ -21,7 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Checkpoints now include model parameters, allowing for mismatches with the provided configuration file. - `accelerator` parameter now controls the accelerator (CPU, GPU, etc) that is used. - `devices` parameter controls the number of accelerators used. -- `every_n_train_steps` parameter now controls the frequency of both validation epochs and model checkpointing during training. +- `val_check_interval` parameter now controls the frequency of both validation epochs and model checkpointing during training. ### Changed