From e97d0c0b105feb365ab4450a7ce1f5919cdbfc6c Mon Sep 17 00:00:00 2001
From: Mateusz Baran <mateuszbaran89@gmail.com>
Date: Fri, 10 May 2024 12:00:02 +0200
Subject: [PATCH] `:reinitialize_direction_update` for quasi-Newton (#388)

* :reinitialize_direction_update for quasi-Newton

* properly initialize limited memory qN

* Manopt.initialize_update! docs

* bump tolerance

* update changelog

* change default nondescent behavior and improve coverage

* more details in changelog

* add note to changelog
---
 Changelog.md                              | 15 +++++++
 Project.toml                              |  2 +-
 docs/src/solvers/quasi_Newton.md          |  1 +
 src/plans/quasi_newton_plan.jl            | 52 +++++++++++++++++------
 src/solvers/quasi_Newton.jl               | 18 +++++---
 test/solvers/test_convex_bundle_method.jl |  2 +-
 test/solvers/test_quasi_Newton.jl         | 38 ++++++++++++++---
 7 files changed, 101 insertions(+), 27 deletions(-)

diff --git a/Changelog.md b/Changelog.md
index 2e59dc09b7..7b9d18b3e4 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -5,6 +5,21 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.4.63] unreleased
+
+### Added
+
+* `:reinitialize_direction_update` option for quasi-Newton behavior when the direction is not a descent one. It is now the new default for `QuasiNewtonState`.
+* Quasi-Newton direction update rules are now initialized upon start of the solver with the new internal function `initialize_update!`.
+
+### Fixed
+
+* ALM and EPM no longer keep a part of the quasi-Newton subsolver state between runs.
+
+### Changed
+
+* Quasi-Newton solvers: `:reinitialize_direction_update` is the new default behavior in case of detection of non-descent direction instead of `:step_towards_negative_gradient`. `:step_towards_negative_gradient` is still available when explicitly set using the `nondescent_direction_behavior` keyword argument.
+
 ## [0.4.62] May 3, 2024
 
 ### Changed
diff --git a/Project.toml b/Project.toml
index 997993e86d..ffbdcda676 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Manopt"
 uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5"
 authors = ["Ronny Bergmann <manopt@ronnybergmann.net>"]
-version = "0.4.62"
+version = "0.4.63"
 
 [deps]
 ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
diff --git a/docs/src/solvers/quasi_Newton.md b/docs/src/solvers/quasi_Newton.md
index 76522639d8..a32809043b 100644
--- a/docs/src/solvers/quasi_Newton.md
+++ b/docs/src/solvers/quasi_Newton.md
@@ -86,6 +86,7 @@ AbstractQuasiNewtonDirectionUpdate
 QuasiNewtonMatrixDirectionUpdate
 QuasiNewtonLimitedMemoryDirectionUpdate
 QuasiNewtonCautiousDirectionUpdate
+Manopt.initialize_update!
 ```
 
 ## Hessian update rules
diff --git a/src/plans/quasi_newton_plan.jl b/src/plans/quasi_newton_plan.jl
index 1642c9fa27..98588086d2 100644
--- a/src/plans/quasi_newton_plan.jl
+++ b/src/plans/quasi_newton_plan.jl
@@ -10,6 +10,13 @@ abstract type AbstractQuasiNewtonDirectionUpdate end
 
 get_message(::AbstractQuasiNewtonDirectionUpdate) = ""
 
+"""
+    initialize_update!(s::AbstractQuasiNewtonDirectionUpdate)
+
+Initialize direction update. By default no change is made.
+"""
+initialize_update!(s::AbstractQuasiNewtonDirectionUpdate) = s
+
 @doc raw"""
     AbstractQuasiNewtonUpdateRule
 
@@ -413,6 +420,11 @@ function (d::QuasiNewtonMatrixDirectionUpdate{T})(
     get_vector!(M, r, p, -d.matrix \ get_coordinates(M, p, X, d.basis), d.basis)
     return r
 end
+function initialize_update!(d::QuasiNewtonMatrixDirectionUpdate)
+    copyto!(d.matrix, I)
+    return d
+end
+
 @doc raw"""
     QuasiNewtonLimitedMemoryDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate
 
@@ -584,6 +596,19 @@ function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st)
     return r
 end
 
+"""
+    initialize_update!(d::QuasiNewtonLimitedMemoryDirectionUpdate)
+
+Initialize the limited memory direction update by emptying the memory buffers.
+"""
+function initialize_update!(d::QuasiNewtonLimitedMemoryDirectionUpdate)
+    empty!(d.memory_s)
+    empty!(d.memory_y)
+    fill!(d.ρ, 0)
+    fill!(d.ξ, 0)
+    return d
+end
+
 @doc raw"""
     QuasiNewtonCautiousDirectionUpdate <: AbstractQuasiNewtonDirectionUpdate
 
@@ -622,8 +647,8 @@ taking into account that the corresponding step size is chosen.
 
 # Constructor
 
-    QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonMatrixDirectionUpdate; θ = x -> x)
-    QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonLimitedMemoryDirectionUpdate; θ = x -> x)
+    QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonMatrixDirectionUpdate; θ = identity)
+    QuasiNewtonCautiousDirectionUpdate(U::QuasiNewtonLimitedMemoryDirectionUpdate; θ = identity)
 
 Generate a cautious update for either a matrix based or a limited memory based update rule.
 
@@ -632,22 +657,17 @@ Generate a cautious update for either a matrix based or a limited memory based u
 [`QuasiNewtonMatrixDirectionUpdate`](@ref)
 [`QuasiNewtonLimitedMemoryDirectionUpdate`](@ref)
 """
-mutable struct QuasiNewtonCautiousDirectionUpdate{U} <:
+mutable struct QuasiNewtonCautiousDirectionUpdate{U,Tθ} <:
                AbstractQuasiNewtonDirectionUpdate where {
-    U<:Union{QuasiNewtonMatrixDirectionUpdate,QuasiNewtonLimitedMemoryDirectionUpdate{T}}
-} where {T<:AbstractQuasiNewtonUpdateRule}
+    U<:Union{QuasiNewtonMatrixDirectionUpdate,QuasiNewtonLimitedMemoryDirectionUpdate}
+}
     update::U
-    θ::Function
+    θ::Tθ
 end
 function QuasiNewtonCautiousDirectionUpdate(
-    update::U; θ::Function=x -> x
-) where {
-    U<:Union{
-        QuasiNewtonMatrixDirectionUpdate,
-        QuasiNewtonLimitedMemoryDirectionUpdate{T} where T<:AbstractQuasiNewtonUpdateRule,
-    },
-}
-    return QuasiNewtonCautiousDirectionUpdate{U}(update, θ)
+    update::U; θ::Function=identity
+) where {U<:Union{QuasiNewtonMatrixDirectionUpdate,QuasiNewtonLimitedMemoryDirectionUpdate}}
+    return QuasiNewtonCautiousDirectionUpdate{U,typeof(θ)}(update, θ)
 end
 (d::QuasiNewtonCautiousDirectionUpdate)(mp, st) = d.update(mp, st)
 (d::QuasiNewtonCautiousDirectionUpdate)(r, mp, st) = d.update(r, mp, st)
@@ -659,3 +679,7 @@ end
 function get_update_vector_transport(u::QuasiNewtonCautiousDirectionUpdate)
     return get_update_vector_transport(u.update)
 end
+function initialize_update!(d::QuasiNewtonCautiousDirectionUpdate)
+    initialize_update!(d.update)
+    return d
+end
diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl
index 58236da7a3..db430317a7 100644
--- a/src/solvers/quasi_Newton.jl
+++ b/src/solvers/quasi_Newton.jl
@@ -83,7 +83,7 @@ function QuasiNewtonState(
         retraction_method=retraction_method,
         vector_transport_method=vector_transport_method,
     ),
-    nondescent_direction_behavior::Symbol=:step_towards_negative_gradient,
+    nondescent_direction_behavior::Symbol=:reinitialize_direction_update,
     kwargs..., # collect but ignore rest to be more tolerant
 ) where {
     P,
@@ -206,8 +206,8 @@ The ``k``th iteration consists of
   at 0 and strictly increasing at 0 for the cautious update.
 * `direction_update`:        ([`InverseBFGS`](@ref)`()`) the update rule to use.
 * `evaluation`:              ([`AllocatingEvaluation`](@ref)) specify whether the gradient works by
-   allocation (default) form `gradF(M, x)` or [`InplaceEvaluation`](@ref) in place of form `gradF!(M, X, x)`.
-* `initial_operator`:        (`Matrix{Float64}(I,n,n)`) initial matrix to use die the
+   allocation (default) form `gradF(M, p)` or [`InplaceEvaluation`](@ref) in place of form `gradF!(M, X, p)`.
+* `initial_operator`:        (`Matrix{Float64}(I, n, n)`) initial matrix to use die the
   approximation, where `n=manifold_dimension(M)`, see also `scale_initial_operator`.
 * `memory_size`:             (`20`) limited memory, number of ``s_k, y_k`` to store. Set to a negative
   value to use a full memory representation
@@ -221,10 +221,11 @@ The ``k``th iteration consists of
 * `stopping_criterion`:      ([`StopAfterIteration`](@ref)`(max(1000, memory_size)) | `[`StopWhenGradientNormLess`](@ref)`(1e-6)`)
   specify a [`StoppingCriterion`](@ref)
 * `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport to use.
-* `nondescent_direction_behavior`: (`:step_towards_negative_gradient`) specify how non-descent direction is handled.
+* `nondescent_direction_behavior`: (`:reinitialize_direction_update`) specify how non-descent direction is handled.
   This can be
-  * ``:step_towards_negative_gradient` – the direction is replaced with negative gradient, a message is stored.
+  * `:step_towards_negative_gradient` – the direction is replaced with negative gradient, a message is stored.
   * `:ignore` – the check is not performed, so any computed direction is accepted. No message is stored.
+  * `:reinitialize_direction_update` – discards operator state stored in direction update rules.
   * any other value performs the check, keeps the direction but stores a message.
   A stored message can be displayed using [`DebugMessages`](@ref).
 
@@ -370,6 +371,7 @@ function initialize_solver!(amp::AbstractManoptProblem, qns::QuasiNewtonState)
     get_gradient!(amp, qns.X, qns.p)
     copyto!(M, qns.sk, qns.p, qns.X)
     copyto!(M, qns.yk, qns.p, qns.X)
+    initialize_update!(qns.direction_update)
     return qns
 end
 function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter)
@@ -379,10 +381,14 @@ function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter)
     if !(qns.nondescent_direction_behavior === :ignore)
         qns.nondescent_direction_value = real(inner(M, qns.p, qns.η, qns.X))
         if qns.nondescent_direction_value > 0
-            if qns.nondescent_direction_behavior === :step_towards_negative_gradient
+            if qns.nondescent_direction_behavior === :step_towards_negative_gradient ||
+                qns.nondescent_direction_behavior === :reinitialize_direction_update
                 copyto!(M, qns.η, qns.X)
                 qns.η .*= -1
             end
+            if qns.nondescent_direction_behavior === :reinitialize_direction_update
+                initialize_update!(qns.direction_update)
+            end
         end
     end
     α = qns.stepsize(mp, qns, iter, qns.η)
diff --git a/test/solvers/test_convex_bundle_method.jl b/test/solvers/test_convex_bundle_method.jl
index 90c6fa0c55..05e12862a0 100644
--- a/test/solvers/test_convex_bundle_method.jl
+++ b/test/solvers/test_convex_bundle_method.jl
@@ -166,7 +166,7 @@ using Manopt: estimate_sectional_curvature, ζ_1, ζ_2, close_point
         )
         q = get_solver_result(cbm_s)
         m = median(M, data)
-        @test distance(M, q, m) < 1.5e-2 #with default params this is not very precise
+        @test distance(M, q, m) < 2e-2 #with default params this is not very precise
         # test the other stopping criterion mode
         q2 = convex_bundle_method(
             M,
diff --git a/test/solvers/test_quasi_Newton.jl b/test/solvers/test_quasi_Newton.jl
index a0995c32d0..f6bb37e678 100644
--- a/test/solvers/test_quasi_Newton.jl
+++ b/test/solvers/test_quasi_Newton.jl
@@ -1,10 +1,15 @@
 using Manopt, Manifolds, Test
 using LinearAlgebra: I, eigvecs, tr, Diagonal
 
-struct QuasiNewtonGradientDirectionUpdate{VT<:AbstractVectorTransportMethod} <:
-       AbstractQuasiNewtonDirectionUpdate
+mutable struct QuasiNewtonGradientDirectionUpdate{VT<:AbstractVectorTransportMethod} <:
+               AbstractQuasiNewtonDirectionUpdate
     vector_transport_method::VT
+    num_times_init::Int
+end
+function QuasiNewtonGradientDirectionUpdate(vtm::AbstractVectorTransportMethod)
+    return QuasiNewtonGradientDirectionUpdate{typeof(vtm)}(vtm, 0)
 end
+
 function (d::QuasiNewtonGradientDirectionUpdate)(mp, st)
     return get_gradient(st)
 end
@@ -12,6 +17,15 @@ function (d::QuasiNewtonGradientDirectionUpdate)(r, mp, st)
     r .= get_gradient(st)
     return r
 end
+function Manopt.initialize_update!(d::QuasiNewtonGradientDirectionUpdate)
+    d.num_times_init += 1
+    return d
+end
+
+struct QuasiNewtonTestDirectionUpdate{VT<:AbstractVectorTransportMethod} <:
+       AbstractQuasiNewtonDirectionUpdate
+    vector_transport_method::VT
+end
 
 @testset "Riemannian quasi-Newton Methods" begin
     @testset "Show & Status" begin
@@ -141,6 +155,8 @@ end
                 @test norm(x_direction - x_solution) ≈ 0 atol = 10.0^(-14)
             end
         end
+        tdu = QuasiNewtonTestDirectionUpdate(ParallelTransport())
+        @test Manopt.initialize_update!(tdu) === tdu
     end
 
     @testset "Rayleigh Quotient Minimization" begin
@@ -347,9 +363,9 @@ end
         fc(::Euclidean, p) = real(p' * A * p)
         grad_fc(::Euclidean, p) = 2 * A * p
         p0 = [2.0, 1 + im]
-        @test_logs (:info,) set_manopt_parameter!(:Mode, "Tutorial")
+        @test_logs (:info,) Manopt.set_manopt_parameter!(:Mode, "Tutorial")
         p4 = quasi_Newton(M, fc, grad_fc, p0; stopoing_criterion=StopAfterIteration(3))
-        @test_logs (:info,) set_manopt_parameter!(:Mode, "")
+        @test_logs (:info,) Manopt.set_manopt_parameter!(:Mode, "")
         @test fc(M, p4) ≤ fc(M, p0)
     end
 
@@ -383,6 +399,7 @@ end
             M,
             copy(M, p);
             direction_update=QuasiNewtonGradientDirectionUpdate(ParallelTransport()),
+            nondescent_direction_behavior=:step_towards_negative_gradient,
         )
         dqns = DebugSolverState(qns, DebugMessages(:Warning, :Once))
         @test_logs (
@@ -395,12 +412,23 @@ end
 
         qns = QuasiNewtonState(
             M,
-            p;
+            copy(M, p);
             direction_update=QuasiNewtonGradientDirectionUpdate(ParallelTransport()),
             nondescent_direction_behavior=:step_towards_negative_gradient,
         )
 
         @test_nowarn solve!(mp, qns)
+        @test qns.direction_update.num_times_init == 1
+
+        qns = QuasiNewtonState(
+            M,
+            copy(M, p);
+            direction_update=QuasiNewtonGradientDirectionUpdate(ParallelTransport()),
+            nondescent_direction_behavior=:reinitialize_direction_update,
+        )
+
+        @test_nowarn solve!(mp, qns)
+        @test qns.direction_update.num_times_init == 1001
     end
 
     @testset "A Circle example" begin