diff --git a/composer/algorithms/__init__.py b/composer/algorithms/__init__.py index 96f57b4f52..c5035a55f9 100755 --- a/composer/algorithms/__init__.py +++ b/composer/algorithms/__init__.py @@ -53,9 +53,8 @@ def apply(self, state: State, event: Event, logger: Logger): ColOutHparams, CutMixHparams, CutOutHparams, EMAHparams, FactorizeHparams, GhostBatchNormHparams, LabelSmoothingHparams, LayerFreezingHparams, MixUpHparams, NoOpModelHparams, ProgressiveResizingHparams, RandAugmentHparams, - SAMHparams, ScaleScheduleHparams, SelectiveBackpropHparams, - SeqLengthWarmupHparams, SqueezeExciteHparams, StochasticDepthHparams, - SWAHparams) + SAMHparams, SelectiveBackpropHparams, SeqLengthWarmupHparams, + SqueezeExciteHparams, StochasticDepthHparams, SWAHparams) from composer.algorithms.label_smoothing import LabelSmoothing from composer.algorithms.layer_freezing import LayerFreezing from composer.algorithms.mixup import MixUp @@ -63,7 +62,6 @@ def apply(self, state: State, event: Event, logger: Logger): from composer.algorithms.progressive_resizing import ProgressiveResizing from composer.algorithms.randaugment import RandAugment, RandAugmentTransform from composer.algorithms.sam import SAM -from composer.algorithms.scale_schedule import ScaleSchedule from composer.algorithms.selective_backprop import SelectiveBackprop from composer.algorithms.seq_length_warmup import SeqLengthWarmup from composer.algorithms.squeeze_excite import SqueezeExcite, SqueezeExcite2d, SqueezeExciteConv2d @@ -99,7 +97,6 @@ def apply(self, state: State, event: Event, logger: Logger): "RandAugment", "RandAugmentTransform", "SAM", - "ScaleSchedule", "SelectiveBackprop", "SeqLengthWarmup", "SqueezeExcite", @@ -128,7 +125,6 @@ def apply(self, state: State, event: Event, logger: Logger): "ProgressiveResizingHparams", "RandAugmentHparams", "SAMHparams", - "ScaleScheduleHparams", "SelectiveBackpropHparams", "SeqLengthWarmupHparams", "SqueezeExciteHparams", diff --git a/composer/algorithms/algorithm_registry.py b/composer/algorithms/algorithm_registry.py index 7798191adf..8ff0b00e1d 100755 --- a/composer/algorithms/algorithm_registry.py +++ b/composer/algorithms/algorithm_registry.py @@ -7,9 +7,8 @@ ColOutHparams, CutMixHparams, CutOutHparams, EMAHparams, FactorizeHparams, GhostBatchNormHparams, LabelSmoothingHparams, LayerFreezingHparams, MixUpHparams, NoOpModelHparams, ProgressiveResizingHparams, RandAugmentHparams, - SAMHparams, ScaleScheduleHparams, SelectiveBackpropHparams, - SeqLengthWarmupHparams, SqueezeExciteHparams, StochasticDepthHparams, - SWAHparams) + SAMHparams, SelectiveBackpropHparams, SeqLengthWarmupHparams, + SqueezeExciteHparams, StochasticDepthHparams, SWAHparams) from composer.core.algorithm import Algorithm registry: Dict[str, Type[AlgorithmHparams]] = { @@ -27,7 +26,6 @@ 'swa': SWAHparams, 'no_op_model': NoOpModelHparams, 'mixup': MixUpHparams, - 'scale_schedule': ScaleScheduleHparams, 'stochastic_depth': StochasticDepthHparams, 'colout': ColOutHparams, 'progressive_resizing': ProgressiveResizingHparams, diff --git a/composer/algorithms/hparams.py b/composer/algorithms/hparams.py index a18115dd5a..d003f45359 100755 --- a/composer/algorithms/hparams.py +++ b/composer/algorithms/hparams.py @@ -25,7 +25,6 @@ from composer.algorithms.progressive_resizing import ProgressiveResizing from composer.algorithms.randaugment import RandAugment from composer.algorithms.sam import SAM -from composer.algorithms.scale_schedule import ScaleSchedule from composer.algorithms.selective_backprop import SelectiveBackprop from composer.algorithms.seq_length_warmup import SeqLengthWarmup from composer.algorithms.squeeze_excite import SqueezeExcite @@ -307,16 +306,6 @@ def initialize_object(self) -> SAM: return SAM(**asdict(self)) -@dataclass -class ScaleScheduleHparams(AlgorithmHparams): - """See :class:`ScaleSchedule`""" - - ratio: float = hp.optional('Ratio to scale the schedule.', default=1.0) - - def initialize_object(self) -> "ScaleSchedule": - return ScaleSchedule(**asdict(self)) - - @dataclass class SelectiveBackpropHparams(AlgorithmHparams): """See :class:`SelectiveBackprop`""" diff --git a/composer/algorithms/scale_schedule/README.md b/composer/algorithms/scale_schedule/README.md deleted file mode 100644 index 38e1e2660b..0000000000 --- a/composer/algorithms/scale_schedule/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# 🗜️ Scale Schedule - -This method is deprecated. It has been replaced by the `scale_schedule_ratio` param supported directly by the Composer Trainer. For backwards compatibility, the Composer Trainer detects when this algorithm has been initialized and pulls the `ratio` param accordingly. diff --git a/composer/algorithms/scale_schedule/__init__.py b/composer/algorithms/scale_schedule/__init__.py deleted file mode 100644 index cf0de899e2..0000000000 --- a/composer/algorithms/scale_schedule/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright 2021 MosaicML. All Rights Reserved. - -"""Deprecated - do not use. Currently does not make any changes to the trainer. -Instead, use the ``scale_schedule_ratio`` parameter of the Composer Trainer. -""" - -from composer.algorithms.scale_schedule.scale_schedule import ScaleSchedule as ScaleSchedule - -__all__ = ['ScaleSchedule'] diff --git a/composer/algorithms/scale_schedule/metadata.json b/composer/algorithms/scale_schedule/metadata.json deleted file mode 100644 index 80eaff9dd4..0000000000 --- a/composer/algorithms/scale_schedule/metadata.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "scale_schedule": { - "name": "Scale Schedule", - "class_name": "ScaleSchedule", - "functional": "", - "tldr": "Scale the learning rate schedule by a factor", - "attribution": "Many", - "link": "https://www.mosaicml.com", - "domains": [], - "summary": "Changes the number of training steps by a dilation factor and dilating learning rate changes accordingly.", - "use": "Generally applicable" - } -} \ No newline at end of file diff --git a/composer/algorithms/scale_schedule/scale_schedule.py b/composer/algorithms/scale_schedule/scale_schedule.py deleted file mode 100644 index 1da1020498..0000000000 --- a/composer/algorithms/scale_schedule/scale_schedule.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2021 MosaicML. All Rights Reserved. - -import logging -import warnings -from typing import Optional - -from composer.core import Algorithm, Event, State -from composer.loggers import Logger - -log = logging.getLogger(__name__) - - -class ScaleSchedule(Algorithm): - """Deprecated - do not use. - - This algorithm is deprecated, and is being replaced by the scale_schedule_ratio param - supported directly by the Composer Trainer. For backwards compatibility, the Composer - Trainer detects when this algorithm has been initialized, and pulls the `ratio` param - accordingly. - - Args: - ratio (float, optional): The factor by which to scale the duration of the schedule. E.g., 0.5 - makes the schedule take half as long and 2.0 makes it - take twice as long. Default: ``1.0``. - """ - - def __init__(self, ratio: float = 1.0): - self.ratio = ratio - warnings.warn( - "ScaleScheduleDeprecationWarning: The scale schedule algorithm is deprecated. " - "Please instead use the scale_schedule_ratio parameter of the Composer Trainer.", - category=DeprecationWarning) - - def match(self, event: Event, state: State) -> bool: - """Run on Event.INIT. - - Args: - event (:class:`Event`): The current event. - state (:class:`State`): The current state. - Returns: - bool: True if this algorithm should run. - """ - return event == Event.INIT - - def apply(self, event: Event, state: State, logger: Logger) -> Optional[int]: - """No-op.""" diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py index cb3d6f83da..342468c511 100644 --- a/composer/trainer/trainer.py +++ b/composer/trainer/trainer.py @@ -82,7 +82,6 @@ from torchmetrics import Metric, MetricCollection import composer -from composer.algorithms import ScaleSchedule from composer.callbacks import CheckpointSaver from composer.core import Algorithm, Callback, DataSpec, Engine, Evaluator, Event, Precision, State, Time, Timestamp from composer.core.evaluator import evaluate_periodically @@ -630,12 +629,6 @@ def __init__( # self._use_grad_scaling() will raise a RuntimeError if grad scaling is not available when it is required warnings.filterwarnings(action="ignore", message="torch.cuda.amp.GradScaler") - # ScaleSchedule is a deprecated algorithm, but if it is used, updated SSR with its ratio. - # TODO(#434): Remove this completely. - for algorithm in ensure_tuple(algorithms): - if isinstance(algorithm, ScaleSchedule): - scale_schedule_ratio = algorithm.ratio - if isinstance(max_duration, str): max_duration = Time.from_timestring(max_duration) elif isinstance(max_duration, int): diff --git a/composer/yamls/algorithms/scale_schedule/0.25.yaml b/composer/yamls/algorithms/scale_schedule/0.25.yaml deleted file mode 100644 index afbc1165b5..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.25.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.25 diff --git a/composer/yamls/algorithms/scale_schedule/0.29.yaml b/composer/yamls/algorithms/scale_schedule/0.29.yaml deleted file mode 100644 index 1ff99ce4c8..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.29.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.29 diff --git a/composer/yamls/algorithms/scale_schedule/0.33.yaml b/composer/yamls/algorithms/scale_schedule/0.33.yaml deleted file mode 100644 index 95e6b4c74d..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.33.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.33 diff --git a/composer/yamls/algorithms/scale_schedule/0.36.yaml b/composer/yamls/algorithms/scale_schedule/0.36.yaml deleted file mode 100644 index 3a84b35436..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.36.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.36 diff --git a/composer/yamls/algorithms/scale_schedule/0.43.yaml b/composer/yamls/algorithms/scale_schedule/0.43.yaml deleted file mode 100644 index 0f6170c576..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.43.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.43 diff --git a/composer/yamls/algorithms/scale_schedule/0.45.yaml b/composer/yamls/algorithms/scale_schedule/0.45.yaml deleted file mode 100644 index 9f08ec313c..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.45.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.45 diff --git a/composer/yamls/algorithms/scale_schedule/0.50.yaml b/composer/yamls/algorithms/scale_schedule/0.50.yaml deleted file mode 100644 index 79ed866824..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.50.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.5 diff --git a/composer/yamls/algorithms/scale_schedule/0.55.yaml b/composer/yamls/algorithms/scale_schedule/0.55.yaml deleted file mode 100644 index bc1449ceb4..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.55.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.55 diff --git a/composer/yamls/algorithms/scale_schedule/0.56.yaml b/composer/yamls/algorithms/scale_schedule/0.56.yaml deleted file mode 100644 index 7a5f036e9d..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.56.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.56 diff --git a/composer/yamls/algorithms/scale_schedule/0.57.yaml b/composer/yamls/algorithms/scale_schedule/0.57.yaml deleted file mode 100644 index 4a09ed8f71..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.57.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.57 diff --git a/composer/yamls/algorithms/scale_schedule/0.64.yaml b/composer/yamls/algorithms/scale_schedule/0.64.yaml deleted file mode 100644 index ecd9a979ec..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.64.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.64 diff --git a/composer/yamls/algorithms/scale_schedule/0.67.yaml b/composer/yamls/algorithms/scale_schedule/0.67.yaml deleted file mode 100644 index 74c84d87ae..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.67.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.67 diff --git a/composer/yamls/algorithms/scale_schedule/0.71.yaml b/composer/yamls/algorithms/scale_schedule/0.71.yaml deleted file mode 100644 index bdeca7a0df..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.71.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.71 diff --git a/composer/yamls/algorithms/scale_schedule/0.73.yaml b/composer/yamls/algorithms/scale_schedule/0.73.yaml deleted file mode 100644 index 8819c40353..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.73.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.73 diff --git a/composer/yamls/algorithms/scale_schedule/0.75.yaml b/composer/yamls/algorithms/scale_schedule/0.75.yaml deleted file mode 100644 index 3f40f7b9f3..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.75.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.75 diff --git a/composer/yamls/algorithms/scale_schedule/0.78.yaml b/composer/yamls/algorithms/scale_schedule/0.78.yaml deleted file mode 100644 index f805699e93..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.78.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.78 diff --git a/composer/yamls/algorithms/scale_schedule/0.79.yaml b/composer/yamls/algorithms/scale_schedule/0.79.yaml deleted file mode 100644 index 57f7d43af6..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.79.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.79 diff --git a/composer/yamls/algorithms/scale_schedule/0.80.yaml b/composer/yamls/algorithms/scale_schedule/0.80.yaml deleted file mode 100644 index 448f14854f..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.80.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.80 diff --git a/composer/yamls/algorithms/scale_schedule/0.82.yaml b/composer/yamls/algorithms/scale_schedule/0.82.yaml deleted file mode 100644 index e5fb3ef640..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.82.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.82 diff --git a/composer/yamls/algorithms/scale_schedule/0.86.yaml b/composer/yamls/algorithms/scale_schedule/0.86.yaml deleted file mode 100644 index 05ee93fb2b..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.86.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.86 diff --git a/composer/yamls/algorithms/scale_schedule/0.89.yaml b/composer/yamls/algorithms/scale_schedule/0.89.yaml deleted file mode 100644 index d69b6553c1..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.89.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.89 diff --git a/composer/yamls/algorithms/scale_schedule/0.91.yaml b/composer/yamls/algorithms/scale_schedule/0.91.yaml deleted file mode 100644 index a79c7685ac..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.91.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.91 diff --git a/composer/yamls/algorithms/scale_schedule/0.93.yaml b/composer/yamls/algorithms/scale_schedule/0.93.yaml deleted file mode 100644 index fd0a4ec82a..0000000000 --- a/composer/yamls/algorithms/scale_schedule/0.93.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 0.93 diff --git a/composer/yamls/algorithms/scale_schedule/1.00.yaml b/composer/yamls/algorithms/scale_schedule/1.00.yaml deleted file mode 100644 index 67d75b0fd8..0000000000 --- a/composer/yamls/algorithms/scale_schedule/1.00.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 1.0 diff --git a/composer/yamls/algorithms/scale_schedule/1.25.yaml b/composer/yamls/algorithms/scale_schedule/1.25.yaml deleted file mode 100644 index 61381bccb1..0000000000 --- a/composer/yamls/algorithms/scale_schedule/1.25.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 1.25 diff --git a/composer/yamls/algorithms/scale_schedule/1.50.yaml b/composer/yamls/algorithms/scale_schedule/1.50.yaml deleted file mode 100644 index acb707d6d7..0000000000 --- a/composer/yamls/algorithms/scale_schedule/1.50.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 1.50 diff --git a/composer/yamls/algorithms/scale_schedule/2.00.yaml b/composer/yamls/algorithms/scale_schedule/2.00.yaml deleted file mode 100644 index 398d503114..0000000000 --- a/composer/yamls/algorithms/scale_schedule/2.00.yaml +++ /dev/null @@ -1 +0,0 @@ -ratio: 2.00 diff --git a/docs/source/method_cards/scale_schedule.md b/docs/source/method_cards/scale_schedule.md deleted file mode 120000 index ce0f3cb5db..0000000000 --- a/docs/source/method_cards/scale_schedule.md +++ /dev/null @@ -1 +0,0 @@ -../../../composer/algorithms/scale_schedule/README.md \ No newline at end of file diff --git a/docs/source/method_cards/scale_schedule.md b/docs/source/method_cards/scale_schedule.md new file mode 100644 index 0000000000..2fb934587a --- /dev/null +++ b/docs/source/method_cards/scale_schedule.md @@ -0,0 +1,115 @@ +# ⚖️ Scale Schedule + +![scale_schedule.png](https://storage.googleapis.com/docs.mosaicml.com/images/methods/scale_schedule.png) + +Tags: `Best Practice`, `Speedup` + +## TL;DR + +Scale Schedule changes the number of training steps by a dilation factor and dilating learning rate changes +accordingly. Doing so varies the training budget, making it possible to explore tradeoffs between cost (measured in +time or money) and the quality of the final model. + +## Attribution + +The number of training steps to perform is an important hyperparameter to tune when developing a model. This technique +appears implicitly throughout the deep learning literature. One example of a systematic study of this approach is the +*scan-SGD* technique in +[How Important is Importance Sampling for Deep Budgeted Training](https://openreview.net/forum?id=TqQ0oOzJlai) by +Eric Arazo, Diego Ortega, Paul Albert, Noel O'Connor, and Kevin McGuinness. Posted to OpenReview in 2020. + +## Hyperparameters + +- `ratio` - The ratio of the scaled learning rate schedule to the full learning rate schedule. For example, a ratio +of 0.8 would train for 80% as many steps as the original schedule. + +## Example Effects + +Changing the length of training will affect the final accuracy of the model. For example, training ResNet-50 on +ImageNet for the standard schedule in the `composer` library leads to final validation accuracy of 76.6%, while +using scale schedule with a ratio of 0.5 leads to final validation accuracy of 75.6%. Training for longer can lead +to diminishing returns or even overfitting and worse validation accuracy. In general, the cost of training is +proportional to the length of training when using scale schedule (assuming all other techniques, such as progressive +resizing, have their schedules scaled accordingly). + +```{note} +The warmup periods of schedulers are not scaled by the scale schedule ratio. +``` + +## Implementation Details + +Scale schedule is implemented as part of the {class}`~.Trainer` via the `scale_schedule_ratio` argument. +The trainer will scale the ``max_duration`` by the ``scale_schedule_ratio``, and also adjust non-warmup milestones +for the learning rate schedulers. + +Scale schedule supports all Composer Schedulers: + +```{eval-rst} +.. currentmodule:: composer.optim.scheduler +.. autosummary:: + :nosignatures: + + StepScheduler + MultiStepScheduler + MultiStepWithWarmupScheduler + ConstantScheduler + LinearScheduler + LinearWithWarmupScheduler + ExponentialScheduler + CosineAnnealingScheduler + CosineAnnealingWithWarmupScheduler + CosineAnnealingWarmRestartsScheduler + PolynomialScheduler +``` + +```{eval-rst} +.. seealso:: The :ref:`Scheduling Guide ` more information about Composer Schedulers. +``` + +Scale schedule also supports the following PyTorch Schedulers: +* {class}`~torch.optim.lr_scheduler.StepLR` +* {class}`~torch.optim.lr_scheduler.MultiStepLR` +* {class}`~torch.optim.lr_scheduler.ExponentialLR` +* {class}`~torch.optim.lr_scheduler.CosineAnnealingLR` +* {class}`~torch.optim.lr_scheduler.CosineAnnealingWarmRestarts`. + + +For example, the code below will scale the training time by half +(to 10 epochs) and also scale the learning rate schedule. + +```{eval-rst} +.. testcode:: + + from composer import Trainer + from composer.optim.scheduler import MultiStepScheduler + + trainer = Trainer( + ..., + max_duration="20ep", + schedulers=MultiStepScheduler(milestones=["10ep", "16ep"]), + scale_schedule_ratio=0.5, + ) + + # or equivalently, with default SSR=1.0: + + trainer = Trainer( + ..., + max_duration="10ep", + schedulers=MultiStepScheduler(milestones=["5ep", "8ep"]) + ) +``` + +For additional details on using the scale schedule ratio, see the {ref}`Scale Schedule Ratio ` +section in the schedulers guide. + +## Suggested Hyperparameters + +The default scale schedule ratio is 1.0. For a standard maximum number of epochs (these will differ depending on the +task), scaling down the learning rate schedule will lead to a monotonic decrease in accuracy. Increasing the scale +schedule ratio will often improve the accuracy up to a plateau, although this leads to longer training time and added +cost. + +## Composability + +As general rule, scale schedule can be applied in conjunction with any method. If other methods also perform actions +according to a schedule, it is important to modify their schedules to coincide with the altered number of epochs. diff --git a/docs/source/trainer/schedulers.rst b/docs/source/trainer/schedulers.rst index ccb639df32..e05e3e3efa 100644 --- a/docs/source/trainer/schedulers.rst +++ b/docs/source/trainer/schedulers.rst @@ -26,6 +26,8 @@ For PyTorch schedulers, we step every epoch by default. To instead step every ba Our experiments have shown better accuracy using stepwise schedulers, so it is the recommended setting in most cases. +.. _Composer Schedulers: + Composer Schedulers ------------------- @@ -81,6 +83,8 @@ Below are the supported schedulers found at :mod:`composer.optim.scheduler`. an optimizer directly. The trainer will handle binding the optimizer when it compiles the scheduler later. +.. _Scale Schedule Ratio: + Scale Schedule Ratio -------------------- diff --git a/tests/test_load.py b/tests/test_load.py index 2f8a319c8c..8c97b40bff 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -9,9 +9,7 @@ import composer import composer.algorithms as algorithms import composer.trainer as trainer -from composer.algorithms import ScaleScheduleHparams from composer.core.precision import Precision -from composer.datasets.hparams import SyntheticHparamsMixin from composer.trainer.devices import CPUDeviceHparams from tests.common import configure_dataset_hparams_for_synthetic, configure_model_hparams_for_synthetic @@ -46,7 +44,6 @@ def get_model_algs(model_name: str) -> List[str]: algs.remove('squeeze_excite') algs.remove('swa') algs.remove('mixup') - algs.remove('scale_schedule') algs.remove('stochastic_depth') algs.remove('colout') algs.remove('progressive_resizing') @@ -103,26 +100,3 @@ def test_load(model_name: str): my_trainer = trainer_hparams.initialize_object() assert isinstance(my_trainer, trainer.Trainer) - - -@pytest.mark.parametrize("ssr", ["0.25", "0.33", "0.50", "0.67", "0.75", "1.00", "1.25"]) -@pytest.mark.filterwarnings("ignore:ScaleScheduleDeprecationWarning:DeprecationWarning") -def test_scale_schedule_load(ssr: str): - trainer_hparams = trainer.load("classify_mnist") - trainer_hparams.precision = Precision.FP32 - algs = [f"scale_schedule/{ssr}"] - trainer_hparams.algorithms = algorithms.load_multiple(*algs) - assert isinstance(trainer_hparams.train_dataset, SyntheticHparamsMixin) - trainer_hparams.train_subset_num_batches = 1 - trainer_hparams.train_dataset.use_synthetic = True - - assert isinstance(trainer_hparams.val_dataset, SyntheticHparamsMixin) - trainer_hparams.eval_subset_num_batches = 1 - trainer_hparams.val_dataset.use_synthetic = True - trainer_hparams.device = CPUDeviceHparams() - assert len(trainer_hparams.algorithms) == 1 - alg = trainer_hparams.algorithms[0] - assert isinstance(alg, ScaleScheduleHparams) - assert alg.ratio == float(ssr) - my_trainer = trainer_hparams.initialize_object() - assert isinstance(my_trainer, trainer.Trainer) diff --git a/tests/trainer/test_scale_schedule.py b/tests/trainer/test_scale_schedule.py index d08909ed32..efabbedae1 100644 --- a/tests/trainer/test_scale_schedule.py +++ b/tests/trainer/test_scale_schedule.py @@ -8,10 +8,7 @@ from torch.optim import Optimizer from torch.optim.lr_scheduler import ExponentialLR -from composer.algorithms import ScaleScheduleHparams -from composer.core import State -from composer.core.callback import Callback -from composer.core.time import TimeUnit +from composer.core import Callback, State, TimeUnit from composer.core.types import PyTorchScheduler from composer.loggers.logger import Logger from composer.optim import MultiStepSchedulerHparams, SGDHparams @@ -99,17 +96,12 @@ def fit_start(self, state: State, logger: Logger) -> None: @pytest.mark.parametrize('ssr', [0.5, 0.75, 1.0]) -@pytest.mark.parametrize('use_algorithm', [ - False, - pytest.param(True, marks=pytest.mark.filterwarnings(r"ignore:.*ScaleScheduleDeprecationWarning.*")), -]) class TestScaleScheduleTrainer(): @pytest.mark.filterwarnings(r"ignore:.*Detected call of \`lr_schedule.*:UserWarning") def test_epochs_scaled( self, ssr: float, - use_algorithm: bool, composer_trainer_hparams: TrainerHparams, ): @@ -117,10 +109,8 @@ def test_epochs_scaled( composer_trainer_hparams.max_duration = '10ep' composer_trainer_hparams.schedulers = [MultiStepSchedulerHparams(milestones=['30ba', '50ba'], gamma=0.1)] - if use_algorithm: - composer_trainer_hparams.algorithms = [ScaleScheduleHparams(ratio=ssr)] - else: - composer_trainer_hparams.scale_schedule_ratio = ssr + composer_trainer_hparams.scale_schedule_ratio = ssr + trainer = composer_trainer_hparams.initialize_object() trainer = composer_trainer_hparams.initialize_object() trainer.state.callbacks.append(CheckScaleSchedule(ssr))