Skip to content

:reinitialize_direction_update for quasi-Newton #388

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,21 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.4.63] unreleased

### Added

* `:reinitialize_direction_update` option for quasi-Newton behavior when the direction is not a descent one. It is now the new default for `QuasiNewtonState`.
* Quasi-Newton direction update rules are now initialized upon start of the solver with the new internal function `initialize_update!`.

### Fixed

* ALM and EPM no longer keep a part of the quasi-Newton subsolver state between runs.

### Changed

* Quasi-Newton solvers: `:reinitialize_direction_update` is the new default behavior in case of detection of non-descent direction instead of `:step_towards_negative_gradient`. `:step_towards_negative_gradient` is still available when explicitly set using the `nondescent_direction_behavior` keyword argument.

## [0.4.62] May 3, 2024

### Changed
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Manopt"
uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5"
authors = ["Ronny Bergmann <[email protected]>"]
version = "0.4.62"
version = "0.4.63"

[deps]
ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
Expand Down
1 change: 1 addition & 0 deletions docs/src/solvers/quasi_Newton.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ AbstractQuasiNewtonDirectionUpdate
QuasiNewtonMatrixDirectionUpdate
QuasiNewtonLimitedMemoryDirectionUpdate
QuasiNewtonCautiousDirectionUpdate
Manopt.initialize_update!
```

## Hessian update rules
Expand Down
52 changes: 38 additions & 14 deletions src/plans/quasi_newton_plan.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ abstract type AbstractQuasiNewtonDirectionUpdate end

get_message(::AbstractQuasiNewtonDirectionUpdate) = ""

"""
initialize_update!(s::AbstractQuasiNewtonDirectionUpdate)

Initialize direction update. By default no change is made.
"""
initialize_update!(s::AbstractQuasiNewtonDirectionUpdate) = s

@doc raw"""
AbstractQuasiNewtonUpdateRule

Expand Down Expand Up @@ -413,6 +420,11 @@ function (d::QuasiNewtonMatrixDirectionUpdate{T})(
get_vector!(M, r, p, -d.matrix \ get_coordinates(M, p, X, d.basis), d.basis)
return r
end
function initialize_update!(d::QuasiNewtonMatrixDirectionUpdate)
copyto!(d.matrix, I)
return d
end

@doc raw"""
QuasiNewtonLimitedMemoryDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate

Expand Down Expand Up @@ -584,6 +596,19 @@ function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st)
return r
end

"""
initialize_update!(d::QuasiNewtonLimitedMemoryDirectionUpdate)

Initialize the limited memory direction update by emptying the memory buffers.
"""
function initialize_update!(d::QuasiNewtonLimitedMemoryDirectionUpdate)
empty!(d.memory_s)
empty!(d.memory_y)
fill!(d.ρ, 0)
fill!(d.ξ, 0)
return d
end

@doc raw"""
QuasiNewtonCautiousDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate

Expand Down Expand Up @@ -622,8 +647,8 @@ taking into account that the corresponding step size is chosen.

# Constructor

QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonMatrixDirectionUpdate; θ = x -> x)
QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonLimitedMemoryDirectionUpdate; θ = x -> x)
QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonMatrixDirectionUpdate; θ = identity)
QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonLimitedMemoryDirectionUpdate; θ = identity)

Generate a cautious update for either a matrix based or a limited memory based update rule.

Expand All @@ -632,22 +657,17 @@ Generate a cautious update for either a matrix based or a limited memory based u
[`QuasiNewtonMatrixDirectionUpdate`](@ref)
[`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref)
"""
mutable struct QuasiNewtonCautiousDirectionUpdate{U} <:
mutable struct QuasiNewtonCautiousDirectionUpdate{U,Tθ} <:
AbstractQuasiNewtonDirectionUpdate where {
U<:Union{QuasiNewtonMatrixDirectionUpdate,QuasiNewtonLimitedMemoryDirectionUpdate{T}}
} where {T<:AbstractQuasiNewtonUpdateRule}
U<:Union{QuasiNewtonMatrixDirectionUpdate,QuasiNewtonLimitedMemoryDirectionUpdate}
}
update::U
θ::Function
θ::
end
function QuasiNewtonCautiousDirectionUpdate(
update::U; θ::Function=x -> x
) where {
U<:Union{
QuasiNewtonMatrixDirectionUpdate,
QuasiNewtonLimitedMemoryDirectionUpdate{T} where T<:AbstractQuasiNewtonUpdateRule,
},
}
return QuasiNewtonCautiousDirectionUpdate{U}(update, θ)
update::U; θ::Function=identity
) where {U<:Union{QuasiNewtonMatrixDirectionUpdate,QuasiNewtonLimitedMemoryDirectionUpdate}}
return QuasiNewtonCautiousDirectionUpdate{U,typeof(θ)}(update, θ)
end
(d::QuasiNewtonCautiousDirectionUpdate)(mp, st) = d.update(mp, st)
(d::QuasiNewtonCautiousDirectionUpdate)(r, mp, st) = d.update(r, mp, st)
Expand All @@ -659,3 +679,7 @@ end
function get_update_vector_transport(u::QuasiNewtonCautiousDirectionUpdate)
return get_update_vector_transport(u.update)
end
function initialize_update!(d::QuasiNewtonCautiousDirectionUpdate)
initialize_update!(d.update)
return d
end
18 changes: 12 additions & 6 deletions src/solvers/quasi_Newton.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ function QuasiNewtonState(
retraction_method=retraction_method,
vector_transport_method=vector_transport_method,
),
nondescent_direction_behavior::Symbol=:step_towards_negative_gradient,
nondescent_direction_behavior::Symbol=:reinitialize_direction_update,
kwargs..., # collect but ignore rest to be more tolerant
) where {
P,
Expand Down Expand Up @@ -206,8 +206,8 @@ The ``k``th iteration consists of
at 0 and strictly increasing at 0 for the cautious update.
* `direction_update`: ([`InverseBFGS`](@ref)`()`) the update rule to use.
* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by
allocation (default) form `gradF(M, x)` or [`InplaceEvaluation`](@ref) in place of form `gradF!(M, X, x)`.
* `initial_operator`: (`Matrix{Float64}(I,n,n)`) initial matrix to use die the
allocation (default) form `gradF(M, p)` or [`InplaceEvaluation`](@ref) in place of form `gradF!(M, X, p)`.
* `initial_operator`: (`Matrix{Float64}(I, n, n)`) initial matrix to use die the
approximation, where `n=manifold_dimension(M)`, see also `scale_initial_operator`.
* `memory_size`: (`20`) limited memory, number of ``s_k, y_k`` to store. Set to a negative
value to use a full memory representation
Expand All @@ -221,10 +221,11 @@ The ``k``th iteration consists of
* `stopping_criterion`: ([`StopAfterIteration`](@ref)`(max(1000, memory_size)) | `[`StopWhenGradientNormLess`](@ref)`(1e-6)`)
specify a [`StoppingCriterion`](@ref)
* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport to use.
* `nondescent_direction_behavior`: (`:step_towards_negative_gradient`) specify how non-descent direction is handled.
* `nondescent_direction_behavior`: (`:reinitialize_direction_update`) specify how non-descent direction is handled.
This can be
* ``:step_towards_negative_gradient` – the direction is replaced with negative gradient, a message is stored.
* `:step_towards_negative_gradient` – the direction is replaced with negative gradient, a message is stored.
* `:ignore` – the check is not performed, so any computed direction is accepted. No message is stored.
* `:reinitialize_direction_update` – discards operator state stored in direction update rules.
* any other value performs the check, keeps the direction but stores a message.
A stored message can be displayed using [`DebugMessages`](@ref).

Expand Down Expand Up @@ -370,6 +371,7 @@ function initialize_solver!(amp::AbstractManoptProblem, qns::QuasiNewtonState)
get_gradient!(amp, qns.X, qns.p)
copyto!(M, qns.sk, qns.p, qns.X)
copyto!(M, qns.yk, qns.p, qns.X)
initialize_update!(qns.direction_update)
return qns
end
function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter)
Expand All @@ -379,10 +381,14 @@ function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter)
if !(qns.nondescent_direction_behavior === :ignore)
qns.nondescent_direction_value = real(inner(M, qns.p, qns.η, qns.X))
if qns.nondescent_direction_value > 0
if qns.nondescent_direction_behavior === :step_towards_negative_gradient
if qns.nondescent_direction_behavior === :step_towards_negative_gradient ||
qns.nondescent_direction_behavior === :reinitialize_direction_update
copyto!(M, qns.η, qns.X)
qns.η .*= -1
end
if qns.nondescent_direction_behavior === :reinitialize_direction_update
initialize_update!(qns.direction_update)
end
end
end
α = qns.stepsize(mp, qns, iter, qns.η)
Expand Down
2 changes: 1 addition & 1 deletion test/solvers/test_convex_bundle_method.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ using Manopt: estimate_sectional_curvature, ζ_1, ζ_2, close_point
)
q = get_solver_result(cbm_s)
m = median(M, data)
@test distance(M, q, m) < 1.5e-2 #with default params this is not very precise
@test distance(M, q, m) < 2e-2 #with default params this is not very precise
# test the other stopping criterion mode
q2 = convex_bundle_method(
M,
Expand Down
38 changes: 33 additions & 5 deletions test/solvers/test_quasi_Newton.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
using Manopt, Manifolds, Test
using LinearAlgebra: I, eigvecs, tr, Diagonal

struct QuasiNewtonGradientDirectionUpdate{VT<:AbstractVectorTransportMethod} <:
AbstractQuasiNewtonDirectionUpdate
mutable struct QuasiNewtonGradientDirectionUpdate{VT<:AbstractVectorTransportMethod} <:
AbstractQuasiNewtonDirectionUpdate
vector_transport_method::VT
num_times_init::Int
end
function QuasiNewtonGradientDirectionUpdate(vtm::AbstractVectorTransportMethod)
return QuasiNewtonGradientDirectionUpdate{typeof(vtm)}(vtm, 0)
end

function (d::QuasiNewtonGradientDirectionUpdate)(mp, st)
return get_gradient(st)
end
function (d::QuasiNewtonGradientDirectionUpdate)(r, mp, st)
r .= get_gradient(st)
return r
end
function Manopt.initialize_update!(d::QuasiNewtonGradientDirectionUpdate)
d.num_times_init += 1
return d
end

struct QuasiNewtonTestDirectionUpdate{VT<:AbstractVectorTransportMethod} <:
AbstractQuasiNewtonDirectionUpdate
vector_transport_method::VT
end

@testset "Riemannian quasi-Newton Methods" begin
@testset "Show & Status" begin
Expand Down Expand Up @@ -141,6 +155,8 @@ end
@test norm(x_direction - x_solution) ≈ 0 atol = 10.0^(-14)
end
end
tdu = QuasiNewtonTestDirectionUpdate(ParallelTransport())
@test Manopt.initialize_update!(tdu) === tdu
end

@testset "Rayleigh Quotient Minimization" begin
Expand Down Expand Up @@ -347,9 +363,9 @@ end
fc(::Euclidean, p) = real(p' * A * p)
grad_fc(::Euclidean, p) = 2 * A * p
p0 = [2.0, 1 + im]
@test_logs (:info,) set_manopt_parameter!(:Mode, "Tutorial")
@test_logs (:info,) Manopt.set_manopt_parameter!(:Mode, "Tutorial")
p4 = quasi_Newton(M, fc, grad_fc, p0; stopoing_criterion=StopAfterIteration(3))
@test_logs (:info,) set_manopt_parameter!(:Mode, "")
@test_logs (:info,) Manopt.set_manopt_parameter!(:Mode, "")
@test fc(M, p4) ≤ fc(M, p0)
end

Expand Down Expand Up @@ -383,6 +399,7 @@ end
M,
copy(M, p);
direction_update=QuasiNewtonGradientDirectionUpdate(ParallelTransport()),
nondescent_direction_behavior=:step_towards_negative_gradient,
)
dqns = DebugSolverState(qns, DebugMessages(:Warning, :Once))
@test_logs (
Expand All @@ -395,12 +412,23 @@ end

qns = QuasiNewtonState(
M,
p;
copy(M, p);
direction_update=QuasiNewtonGradientDirectionUpdate(ParallelTransport()),
nondescent_direction_behavior=:step_towards_negative_gradient,
)

@test_nowarn solve!(mp, qns)
@test qns.direction_update.num_times_init == 1

qns = QuasiNewtonState(
M,
copy(M, p);
direction_update=QuasiNewtonGradientDirectionUpdate(ParallelTransport()),
nondescent_direction_behavior=:reinitialize_direction_update,
)

@test_nowarn solve!(mp, qns)
@test qns.direction_update.num_times_init == 1001
end

@testset "A Circle example" begin
Expand Down
Loading