diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index 863a3a4a7e939..86ef3a9c91a4f 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -49,6 +49,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed missing reset when `ModelPruning` is applied with lottery ticket hypothesis ([#21191](https://github.com/Lightning-AI/pytorch-lightning/pull/21191)) +- Fixed LR Finder: compute gradients w.r.t. `log10(lr)` in exponential mode #([#21171](https://github.com/Lightning-AI/pytorch-lightning/pull/21171)) + + - Fixed preventing recursive symlink creation iwhen `save_last='link'` and `save_top_k=-1` ([#21186](https://github.com/Lightning-AI/pytorch-lightning/pull/21186)) diff --git a/src/lightning/pytorch/tuner/lr_finder.py b/src/lightning/pytorch/tuner/lr_finder.py index 5ef35dcd6d992..47a25555bcb73 100644 --- a/src/lightning/pytorch/tuner/lr_finder.py +++ b/src/lightning/pytorch/tuner/lr_finder.py @@ -187,7 +187,8 @@ def suggestion(self, skip_begin: int = 10, skip_end: int = 1) -> Optional[float] self._optimal_idx = None return None - gradients = torch.gradient(losses, spacing=[lrs])[0] # Compute the gradient of losses w.r.t. learning rates + spacing = [torch.log10(lrs)] if self.mode == "exponential" else [lrs] + gradients = torch.gradient(losses, spacing=spacing)[0] # Compute the gradient of losses w.r.t. learning rates min_grad = torch.argmin(gradients).item() all_losses_idx = torch.arange(len(self.results["loss"])) idx_non_skipped = all_losses_idx[skip_begin:-skip_end] diff --git a/tests/tests_pytorch/tuner/test_lr_finder.py b/tests/tests_pytorch/tuner/test_lr_finder.py index 81352ebe256ef..0e7f83fb5199e 100644 --- a/tests/tests_pytorch/tuner/test_lr_finder.py +++ b/tests/tests_pytorch/tuner/test_lr_finder.py @@ -606,18 +606,21 @@ def configure_optimizers(self): def test_gradient_correctness(): - """Test that torch.gradient uses correct spacing parameter.""" + """Test that gradients are computed with log-spaced learning rates in exponential mode.""" lr_finder = _LRFinder(mode="exponential", lr_min=1e-6, lr_max=1e-1, num_training=20) - # Synthetic example - lrs = torch.linspace(0, 2 * math.pi, steps=1000) + lrs = torch.logspace(-6, -1, steps=1000) losses = torch.sin(lrs) lr_finder.results = {"lr": lrs.tolist(), "loss": losses.tolist()} - # Test the suggestion method suggestion = lr_finder.suggestion(skip_begin=2, skip_end=2) assert suggestion is not None - assert abs(suggestion - math.pi) < 1e-2, "Suggestion should be close to pi for this synthetic example" + + losses_t = torch.tensor(lr_finder.results["loss"][2:-2]) + lrs_t = torch.tensor(lr_finder.results["lr"][2:-2]) + gradients = torch.gradient(losses_t, spacing=[torch.log10(lrs_t)])[0] + expected_idx = torch.argmin(gradients).item() + 2 + assert math.isclose(suggestion, lrs[expected_idx].item()) def test_lr_finder_callback_applies_lr_after_restore(tmp_path): @@ -738,15 +741,12 @@ def __init__(self): lrs_filtered = lrs[is_finite] if len(losses_filtered) >= 2: - # Test that gradient computation works and produces finite results - gradients = torch.gradient(losses_filtered, spacing=[lrs_filtered])[0] + spacing = [torch.log10(lrs_filtered)] if mode == "exponential" else [lrs_filtered] + gradients = torch.gradient(losses_filtered, spacing=spacing)[0] assert torch.isfinite(gradients).all(), f"Non-finite gradients in {mode} mode" assert len(gradients) == len(losses_filtered) - # Verify gradients with spacing differ from gradients without spacing gradients_no_spacing = torch.gradient(losses_filtered)[0] - - # For exponential mode, these should definitely be different, for linear mode, they might be similar if mode == "exponential": assert not torch.allclose(gradients, gradients_no_spacing, rtol=0.1), ( "Gradients should differ significantly in exponential mode when using proper spacing"