From e97d0c0b105feb365ab4450a7ce1f5919cdbfc6c Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Fri, 10 May 2024 12:00:02 +0200 Subject: [PATCH] `:reinitialize_direction_update` for quasi-Newton (#388) * :reinitialize_direction_update for quasi-Newton * properly initialize limited memory qN * Manopt.initialize_update! docs * bump tolerance * update changelog * change default nondescent behavior and improve coverage * more details in changelog * add note to changelog --- Changelog.md | 15 +++++++ Project.toml | 2 +- docs/src/solvers/quasi_Newton.md | 1 + src/plans/quasi_newton_plan.jl | 52 +++++++++++++++++------ src/solvers/quasi_Newton.jl | 18 +++++--- test/solvers/test_convex_bundle_method.jl | 2 +- test/solvers/test_quasi_Newton.jl | 38 ++++++++++++++--- 7 files changed, 101 insertions(+), 27 deletions(-) diff --git a/Changelog.md b/Changelog.md index 2e59dc09b7..7b9d18b3e4 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,6 +5,21 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.63] unreleased + +### Added + +* `:reinitialize_direction_update` option for quasi-Newton behavior when the direction is not a descent one. It is now the new default for `QuasiNewtonState`. +* Quasi-Newton direction update rules are now initialized upon start of the solver with the new internal function `initialize_update!`. + +### Fixed + +* ALM and EPM no longer keep a part of the quasi-Newton subsolver state between runs. + +### Changed + +* Quasi-Newton solvers: `:reinitialize_direction_update` is the new default behavior in case of detection of non-descent direction instead of `:step_towards_negative_gradient`. `:step_towards_negative_gradient` is still available when explicitly set using the `nondescent_direction_behavior` keyword argument. + ## [0.4.62] May 3, 2024 ### Changed diff --git a/Project.toml b/Project.toml index 997993e86d..ffbdcda676 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Manopt" uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" authors = ["Ronny Bergmann "] -version = "0.4.62" +version = "0.4.63" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" diff --git a/docs/src/solvers/quasi_Newton.md b/docs/src/solvers/quasi_Newton.md index 76522639d8..a32809043b 100644 --- a/docs/src/solvers/quasi_Newton.md +++ b/docs/src/solvers/quasi_Newton.md @@ -86,6 +86,7 @@ AbstractQuasiNewtonDirectionUpdate QuasiNewtonMatrixDirectionUpdate QuasiNewtonLimitedMemoryDirectionUpdate QuasiNewtonCautiousDirectionUpdate +Manopt.initialize_update! ``` ## Hessian update rules diff --git a/src/plans/quasi_newton_plan.jl b/src/plans/quasi_newton_plan.jl index 1642c9fa27..98588086d2 100644 --- a/src/plans/quasi_newton_plan.jl +++ b/src/plans/quasi_newton_plan.jl @@ -10,6 +10,13 @@ abstract type AbstractQuasiNewtonDirectionUpdate end get_message(::AbstractQuasiNewtonDirectionUpdate) = "" +""" + initialize_update!(s::AbstractQuasiNewtonDirectionUpdate) + +Initialize direction update. By default no change is made. +""" +initialize_update!(s::AbstractQuasiNewtonDirectionUpdate) = s + @doc raw""" AbstractQuasiNewtonUpdateRule @@ -413,6 +420,11 @@ function (d::QuasiNewtonMatrixDirectionUpdate{T})( get_vector!(M, r, p, -d.matrix \ get_coordinates(M, p, X, d.basis), d.basis) return r end +function initialize_update!(d::QuasiNewtonMatrixDirectionUpdate) + copyto!(d.matrix, I) + return d +end + @doc raw""" QuasiNewtonLimitedMemoryDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate @@ -584,6 +596,19 @@ function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st) return r end +""" + initialize_update!(d::QuasiNewtonLimitedMemoryDirectionUpdate) + +Initialize the limited memory direction update by emptying the memory buffers. +""" +function initialize_update!(d::QuasiNewtonLimitedMemoryDirectionUpdate) + empty!(d.memory_s) + empty!(d.memory_y) + fill!(d.ρ, 0) + fill!(d.ξ, 0) + return d +end + @doc raw""" QuasiNewtonCautiousDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate @@ -622,8 +647,8 @@ taking into account that the corresponding step size is chosen. # Constructor - QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonMatrixDirectionUpdate; θ = x -> x) - QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonLimitedMemoryDirectionUpdate; θ = x -> x) + QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonMatrixDirectionUpdate; θ = identity) + QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonLimitedMemoryDirectionUpdate; θ = identity) Generate a cautious update for either a matrix based or a limited memory based update rule. @@ -632,22 +657,17 @@ Generate a cautious update for either a matrix based or a limited memory based u [`QuasiNewtonMatrixDirectionUpdate`](@ref) [`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref) """ -mutable struct QuasiNewtonCautiousDirectionUpdate{U} <: +mutable struct QuasiNewtonCautiousDirectionUpdate{U,Tθ} <: AbstractQuasiNewtonDirectionUpdate where { - U<:Union{QuasiNewtonMatrixDirectionUpdate,QuasiNewtonLimitedMemoryDirectionUpdate{T}} -} where {T<:AbstractQuasiNewtonUpdateRule} + U<:Union{QuasiNewtonMatrixDirectionUpdate,QuasiNewtonLimitedMemoryDirectionUpdate} +} update::U - θ::Function + θ::Tθ end function QuasiNewtonCautiousDirectionUpdate( - update::U; θ::Function=x -> x -) where { - U<:Union{ - QuasiNewtonMatrixDirectionUpdate, - QuasiNewtonLimitedMemoryDirectionUpdate{T} where T<:AbstractQuasiNewtonUpdateRule, - }, -} - return QuasiNewtonCautiousDirectionUpdate{U}(update, θ) + update::U; θ::Function=identity +) where {U<:Union{QuasiNewtonMatrixDirectionUpdate,QuasiNewtonLimitedMemoryDirectionUpdate}} + return QuasiNewtonCautiousDirectionUpdate{U,typeof(θ)}(update, θ) end (d::QuasiNewtonCautiousDirectionUpdate)(mp, st) = d.update(mp, st) (d::QuasiNewtonCautiousDirectionUpdate)(r, mp, st) = d.update(r, mp, st) @@ -659,3 +679,7 @@ end function get_update_vector_transport(u::QuasiNewtonCautiousDirectionUpdate) return get_update_vector_transport(u.update) end +function initialize_update!(d::QuasiNewtonCautiousDirectionUpdate) + initialize_update!(d.update) + return d +end diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl index 58236da7a3..db430317a7 100644 --- a/src/solvers/quasi_Newton.jl +++ b/src/solvers/quasi_Newton.jl @@ -83,7 +83,7 @@ function QuasiNewtonState( retraction_method=retraction_method, vector_transport_method=vector_transport_method, ), - nondescent_direction_behavior::Symbol=:step_towards_negative_gradient, + nondescent_direction_behavior::Symbol=:reinitialize_direction_update, kwargs..., # collect but ignore rest to be more tolerant ) where { P, @@ -206,8 +206,8 @@ The ``k``th iteration consists of at 0 and strictly increasing at 0 for the cautious update. * `direction_update`: ([`InverseBFGS`](@ref)`()`) the update rule to use. * `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by - allocation (default) form `gradF(M, x)` or [`InplaceEvaluation`](@ref) in place of form `gradF!(M, X, x)`. -* `initial_operator`: (`Matrix{Float64}(I,n,n)`) initial matrix to use die the + allocation (default) form `gradF(M, p)` or [`InplaceEvaluation`](@ref) in place of form `gradF!(M, X, p)`. +* `initial_operator`: (`Matrix{Float64}(I, n, n)`) initial matrix to use die the approximation, where `n=manifold_dimension(M)`, see also `scale_initial_operator`. * `memory_size`: (`20`) limited memory, number of ``s_k, y_k`` to store. Set to a negative value to use a full memory representation @@ -221,10 +221,11 @@ The ``k``th iteration consists of * `stopping_criterion`: ([`StopAfterIteration`](@ref)`(max(1000, memory_size)) | `[`StopWhenGradientNormLess`](@ref)`(1e-6)`) specify a [`StoppingCriterion`](@ref) * `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport to use. -* `nondescent_direction_behavior`: (`:step_towards_negative_gradient`) specify how non-descent direction is handled. +* `nondescent_direction_behavior`: (`:reinitialize_direction_update`) specify how non-descent direction is handled. This can be - * ``:step_towards_negative_gradient` – the direction is replaced with negative gradient, a message is stored. + * `:step_towards_negative_gradient` – the direction is replaced with negative gradient, a message is stored. * `:ignore` – the check is not performed, so any computed direction is accepted. No message is stored. + * `:reinitialize_direction_update` – discards operator state stored in direction update rules. * any other value performs the check, keeps the direction but stores a message. A stored message can be displayed using [`DebugMessages`](@ref). @@ -370,6 +371,7 @@ function initialize_solver!(amp::AbstractManoptProblem, qns::QuasiNewtonState) get_gradient!(amp, qns.X, qns.p) copyto!(M, qns.sk, qns.p, qns.X) copyto!(M, qns.yk, qns.p, qns.X) + initialize_update!(qns.direction_update) return qns end function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter) @@ -379,10 +381,14 @@ function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter) if !(qns.nondescent_direction_behavior === :ignore) qns.nondescent_direction_value = real(inner(M, qns.p, qns.η, qns.X)) if qns.nondescent_direction_value > 0 - if qns.nondescent_direction_behavior === :step_towards_negative_gradient + if qns.nondescent_direction_behavior === :step_towards_negative_gradient || + qns.nondescent_direction_behavior === :reinitialize_direction_update copyto!(M, qns.η, qns.X) qns.η .*= -1 end + if qns.nondescent_direction_behavior === :reinitialize_direction_update + initialize_update!(qns.direction_update) + end end end α = qns.stepsize(mp, qns, iter, qns.η) diff --git a/test/solvers/test_convex_bundle_method.jl b/test/solvers/test_convex_bundle_method.jl index 90c6fa0c55..05e12862a0 100644 --- a/test/solvers/test_convex_bundle_method.jl +++ b/test/solvers/test_convex_bundle_method.jl @@ -166,7 +166,7 @@ using Manopt: estimate_sectional_curvature, ζ_1, ζ_2, close_point ) q = get_solver_result(cbm_s) m = median(M, data) - @test distance(M, q, m) < 1.5e-2 #with default params this is not very precise + @test distance(M, q, m) < 2e-2 #with default params this is not very precise # test the other stopping criterion mode q2 = convex_bundle_method( M, diff --git a/test/solvers/test_quasi_Newton.jl b/test/solvers/test_quasi_Newton.jl index a0995c32d0..f6bb37e678 100644 --- a/test/solvers/test_quasi_Newton.jl +++ b/test/solvers/test_quasi_Newton.jl @@ -1,10 +1,15 @@ using Manopt, Manifolds, Test using LinearAlgebra: I, eigvecs, tr, Diagonal -struct QuasiNewtonGradientDirectionUpdate{VT<:AbstractVectorTransportMethod} <: - AbstractQuasiNewtonDirectionUpdate +mutable struct QuasiNewtonGradientDirectionUpdate{VT<:AbstractVectorTransportMethod} <: + AbstractQuasiNewtonDirectionUpdate vector_transport_method::VT + num_times_init::Int +end +function QuasiNewtonGradientDirectionUpdate(vtm::AbstractVectorTransportMethod) + return QuasiNewtonGradientDirectionUpdate{typeof(vtm)}(vtm, 0) end + function (d::QuasiNewtonGradientDirectionUpdate)(mp, st) return get_gradient(st) end @@ -12,6 +17,15 @@ function (d::QuasiNewtonGradientDirectionUpdate)(r, mp, st) r .= get_gradient(st) return r end +function Manopt.initialize_update!(d::QuasiNewtonGradientDirectionUpdate) + d.num_times_init += 1 + return d +end + +struct QuasiNewtonTestDirectionUpdate{VT<:AbstractVectorTransportMethod} <: + AbstractQuasiNewtonDirectionUpdate + vector_transport_method::VT +end @testset "Riemannian quasi-Newton Methods" begin @testset "Show & Status" begin @@ -141,6 +155,8 @@ end @test norm(x_direction - x_solution) ≈ 0 atol = 10.0^(-14) end end + tdu = QuasiNewtonTestDirectionUpdate(ParallelTransport()) + @test Manopt.initialize_update!(tdu) === tdu end @testset "Rayleigh Quotient Minimization" begin @@ -347,9 +363,9 @@ end fc(::Euclidean, p) = real(p' * A * p) grad_fc(::Euclidean, p) = 2 * A * p p0 = [2.0, 1 + im] - @test_logs (:info,) set_manopt_parameter!(:Mode, "Tutorial") + @test_logs (:info,) Manopt.set_manopt_parameter!(:Mode, "Tutorial") p4 = quasi_Newton(M, fc, grad_fc, p0; stopoing_criterion=StopAfterIteration(3)) - @test_logs (:info,) set_manopt_parameter!(:Mode, "") + @test_logs (:info,) Manopt.set_manopt_parameter!(:Mode, "") @test fc(M, p4) ≤ fc(M, p0) end @@ -383,6 +399,7 @@ end M, copy(M, p); direction_update=QuasiNewtonGradientDirectionUpdate(ParallelTransport()), + nondescent_direction_behavior=:step_towards_negative_gradient, ) dqns = DebugSolverState(qns, DebugMessages(:Warning, :Once)) @test_logs ( @@ -395,12 +412,23 @@ end qns = QuasiNewtonState( M, - p; + copy(M, p); direction_update=QuasiNewtonGradientDirectionUpdate(ParallelTransport()), nondescent_direction_behavior=:step_towards_negative_gradient, ) @test_nowarn solve!(mp, qns) + @test qns.direction_update.num_times_init == 1 + + qns = QuasiNewtonState( + M, + copy(M, p); + direction_update=QuasiNewtonGradientDirectionUpdate(ParallelTransport()), + nondescent_direction_behavior=:reinitialize_direction_update, + ) + + @test_nowarn solve!(mp, qns) + @test qns.direction_update.num_times_init == 1001 end @testset "A Circle example" begin