JuliaManifolds · mateuszbaran · Dec 28, 2023 · Dec 28, 2023 · Dec 28, 2023 · Dec 28, 2023
diff --git a/Changelog.md b/Changelog.md
@@ -5,6 +5,16 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.4.46] unreleased
+
+### Changed
+
+* A message it printed when a line search from `LineSearches.jl` reports search failure.
+
+### Fixed
+
+* A bug in `LineSearches.jl` extension leading to slower convergence.
+
 ## [0.4.45] December 28, 2023
 
 ### Added

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Manopt"
 uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5"
 authors = ["Ronny Bergmann <[email protected]>"]
-version = "0.4.45"
+version = "0.4.46"
 
 [deps]
 ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"

diff --git a/benchmarks/benchmark_comparison.jl b/benchmarks/benchmark_comparison.jl
@@ -0,0 +1,250 @@
+using Revise
+using Optim, Manopt
+using Manifolds
+using LineSearches
+
+using Profile
+using ProfileView
+using BenchmarkTools
+using Plots
+
+"""
+    StopWhenGradientInfNormLess <: StoppingCriterion
+
+A stopping criterion based on the current gradient infinity norm in a basis arbitrarily
+chosen for each manifold.
+
+# Constructor
+
+    StopWhenGradientInfNormLess(ε::Float64)
+
+Create a stopping criterion with threshold `ε` for the gradient, that is, this criterion
+indicates to stop when [`get_gradient`](@ref) returns a gradient vector of norm less than `ε`.
+"""
+mutable struct StopWhenGradientInfNormLess <: StoppingCriterion
+    threshold::Float64
+    reason::String
+    at_iteration::Int
+    StopWhenGradientInfNormLess(ε::Float64) = new(ε, "", 0)
+end
+function (c::StopWhenGradientInfNormLess)(
+    mp::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int
+)
+    M = get_manifold(mp)
+    if i == 0 # reset on init
+        c.reason = ""
+        c.at_iteration = 0
+    end
+    if (norm(get_gradient(s), Inf) < c.threshold) && (i > 0)
+        c.reason = "The algorithm reached approximately critical point after $i iterations; the gradient norm ($(norm(M,get_iterate(s),get_gradient(s)))) is less than $(c.threshold).\n"
+        c.at_iteration = i
+        return true
+    end
+    return false
+end
+function Manopt.status_summary(c::StopWhenGradientInfNormLess)
+    has_stopped = length(c.reason) > 0
+    s = has_stopped ? "reached" : "not reached"
+    return "|grad f|ₒₒ < $(c.threshold): $s"
+end
+Manopt.indicates_convergence(c::StopWhenGradientInfNormLess) = true
+function Base.show(io::IO, c::StopWhenGradientInfNormLess)
+    return print(
+        io, "StopWhenGradientInfNormLess($(c.threshold))\n    $(status_summary(c))"
+    )
+end
+
+function f_rosenbrock(x)
+    result = 0.0
+    for i in 1:2:length(x)
+        result += (1.0 - x[i])^2 + 100.0 * (x[i + 1] - x[i]^2)^2
+    end
+    return result
+end
+function f_rosenbrock_manopt(::AbstractManifold, x)
+    return f_rosenbrock(x)
+end
+
+optimize(f_rosenbrock, [0.0, 0.0], Optim.NelderMead())
+
+function g_rosenbrock!(storage, x)
+    for i in 1:2:length(x)
+        storage[i] = -2.0 * (1.0 - x[i]) - 400.0 * (x[i + 1] - x[i]^2) * x[i]
+        storage[i + 1] = 200.0 * (x[i + 1] - x[i]^2)
+    end
+    return storage
+end
+
+optimize(f_rosenbrock, g_rosenbrock!, [0.0, 0.0], LBFGS())
+
+function g_rosenbrock_manopt!(M::AbstractManifold, storage, x)
+    g_rosenbrock!(storage, x)
+    if isnan(x[1])
+        error("nan")
+    end
+    riemannian_gradient!(M, storage, x, storage)
+    return storage
+end
+
+M = Euclidean(2)
+Manopt.NelderMead(M, f_rosenbrock_manopt)
+
+qn_opts = quasi_Newton(
+    M,
+    f_rosenbrock_manopt,
+    g_rosenbrock_manopt!,
+    [0.0, 0.0];
+    evaluation=InplaceEvaluation(),
+    return_state=true,
+)
+
+function test_f(f_manopt, g_manopt!, x0, N::Int)
+    M = Euclidean(N)
+    return quasi_Newton(
+        M, f_manopt, g_manopt!, x0; evaluation=InplaceEvaluation(), return_state=true
+    )
+end
+
+function prof()
+    N = 32
+    x0 = zeros(N)
+    test_f(f_rosenbrock_manopt, g_rosenbrock_manopt!, x0, N)
+
+    Profile.clear()
+    @profile for i in 1:100000
+        test_f(f_rosenbrock_manopt, g_rosenbrock_manopt!, x0, N)
+    end
+    return ProfileView.view()
+end
+
+function manifold_maker(name::Symbol, N, lib::Symbol)
+    if lib === :Manopt
+        if name === :Euclidean
+            return Euclidean(N)
+        elseif name === :Sphere
+            return Manifolds.Sphere(N - 1)
+        end
+    elseif lib === :Optim
+        if name === :Euclidean
+            return Optim.Flat()
+        elseif name === :Sphere
+            return Optim.Sphere()
+        end
+    else
+        error("Unknown library: $lib")
+    end
+end
+
+function generate_cmp(f, g!, f_manopt, g_manopt!; mem_len::Int=2)
+    plt = plot()
+    xlabel!("dimension")
+    ylabel!("time [ms]")
+    title!("Optimization times for $f")
+
+    times_manopt = Float64[]
+    times_optim = Float64[]
+
+    N_vals = [2^n for n in 1:3:16]
+    ls_hz = LineSearches.HagerZhang()
+
+    gtol = 1e-6
+    for manifold_name in [:Euclidean, :Sphere]
+        println("Benchmarking $f for gtol=$gtol on $manifold_name")
+        for N in N_vals
+            println("Benchmarking for N=$N")
+            M = manifold_maker(manifold_name, N, :Manopt)
+            method_optim = LBFGS(;
+                m=mem_len,
+                linesearch=ls_hz,
+                manifold=manifold_maker(manifold_name, N, :Optim),
+            )
+
+            x0 = zeros(N)
+            x0[1] = 1
+            manopt_sc = StopWhenGradientInfNormLess(gtol) | StopAfterIteration(1000)
+            bench_manopt = @benchmark quasi_Newton(
+                $M,
+                $f_manopt,
+                $g_manopt!,
+                $x0;
+                stepsize=$(Manopt.LineSearchesStepsize(ls_hz)),
+                evaluation=$(InplaceEvaluation()),
+                memory_size=$mem_len,
+                stopping_criterion=$(manopt_sc),
+            )
+
+            manopt_state = quasi_Newton(
+                M,
+                f_manopt,
+                g_manopt!,
+                x0;
+                stepsize=Manopt.LineSearchesStepsize(ls_hz),
+                evaluation=InplaceEvaluation(),
+                return_state=true,
+                memory_size=mem_len,
+                stopping_criterion=manopt_sc,
+            )
+            manopt_iters = get_count(manopt_state, :Iterations)
+            push!(times_manopt, median(bench_manopt.times) / 1000)
+            println("Manopt.jl time: $(median(bench_manopt.times) / 1000) ms")
+            println("Manopt.jl iterations: $(manopt_iters)")
+
+            options_optim = Optim.Options(; g_tol=gtol)
+            bench_optim = @benchmark optimize($f, $g!, $x0, $method_optim, $options_optim)
+
+            optim_state = optimize(
+                f_rosenbrock, g_rosenbrock!, x0, method_optim, options_optim
+            )
+            println("Optim.jl  time: $(median(bench_optim.times) / 1000) ms")
+            push!(times_optim, median(bench_optim.times) / 1000)
+            println("Optim.jl  iterations: $(optim_state.iterations)")
+        end
+        plot!(
+            N_vals, times_manopt; label="Manopt.jl ($manifold_name)", xaxis=:log, yaxis=:log
+        )
+        plot!(
+            N_vals, times_optim; label="Optim.jl ($manifold_name)", xaxis=:log, yaxis=:log
+        )
+    end
+    xticks!(N_vals, string.(N_vals))
+
+    return plt
+end
+
+#generate_cmp(f_rosenbrock, g_rosenbrock!, f_rosenbrock_manopt, g_rosenbrock_manopt!)
+
+function test_case_manopt()
+    N = 4
+    mem_len = 2
+    M = Manifolds.Euclidean(N)
+    ls_hz = LineSearches.HagerZhang()
+
+    x0 = zeros(N)
+    x0[1] = 1
+    manopt_sc = StopWhenGradientInfNormLess(1e-6) | StopAfterIteration(1000)
+
+    return quasi_Newton(
+        M,
+        f_rosenbrock_manopt,
+        g_rosenbrock_manopt!,
+        x0;
+        stepsize=Manopt.LineSearchesStepsize(ls_hz),
+        evaluation=InplaceEvaluation(),
+        return_state=true,
+        memory_size=mem_len,
+        stopping_criterion=manopt_sc,
+    )
+end
+
+function test_case_optim()
+    N = 4
+    mem_len = 2
+    ls_hz = LineSearches.HagerZhang()
+    method_optim = LBFGS(; m=mem_len, linesearch=ls_hz, manifold=Optim.Flat())
+    options_optim = Optim.Options(; g_tol=1e-6)
+
+    x0 = zeros(N)
+    x0[1] = 1
+    optim_state = optimize(f_rosenbrock, g_rosenbrock!, x0, method_optim, options_optim)
+    return optim_state
+end
diff --git a/ext/ManoptLineSearchesExt.jl b/ext/ManoptLineSearchesExt.jl
@@ -42,15 +42,15 @@ function (cs::Manopt.LineSearchesStepsize)(
         retract!(M, p_tmp, p, η, α, cs.retraction_method)
         get_gradient!(mp, X_tmp, p_tmp)
         vector_transport_to!(M, Y_tmp, p, η, p_tmp, cs.vector_transport_method)
-        return real(inner(M, p_tmp, Y_tmp, Y_tmp))
+        return real(inner(M, p_tmp, X_tmp, Y_tmp))
     end
     function ϕdϕ(α)
         # TODO: optimize?
         retract!(M, p_tmp, p, η, α, cs.retraction_method)
         get_gradient!(mp, X_tmp, p_tmp)
         vector_transport_to!(M, Y_tmp, p, η, p_tmp, cs.vector_transport_method)
         phi = f(M, p_tmp)
-        dphi = real(inner(M, p_tmp, Y_tmp, Y_tmp))
+        dphi = real(inner(M, p_tmp, X_tmp, Y_tmp))
         return (phi, dphi)
     end
 
@@ -59,6 +59,7 @@ function (cs::Manopt.LineSearchesStepsize)(
         return α
     catch ex
         if isa(ex, LineSearches.LineSearchException)
+            println(ex)
             # maybe indicate failure?
             return zero(dphi_0)
         else

diff --git a/src/plans/quasi_newton_plan.jl b/src/plans/quasi_newton_plan.jl
@@ -531,6 +531,7 @@ function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st)
         r .*= -1
         return r
     end
+    # backward pass
     for i in m:-1:1
         # what if we divide by zero here? Setting to zero ignores this in the next step
         # precompute in case inner is expensive
@@ -558,12 +559,17 @@ function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st)
     if (last_safe_index == -1)
         d.message = "$(d.message)$(length(d.message)>0 ? :"\n" : "")"
         d.message = "$(d.message) All memory yield zero inner products, falling back to a gradient step."
-        return -get_gradient(st)
+
+        r .*= -1
+        return r
     end
-    r .*= 1 / (d.ρ[last_safe_index] * norm(M, p, d.memory_y[last_safe_index])^2)
+    # initial scaling guess
+    r ./= d.ρ[last_safe_index] * norm(M, p, d.memory_y[last_safe_index])^2
+    # forward pass
     for i in eachindex(d.ρ)
         if abs(d.ρ[i]) > 0
-            r .+= (d.ξ[i] - d.ρ[i] * inner(M, p, d.memory_y[i], r)) .* d.memory_s[i]
+            coeff = d.ξ[i] - d.ρ[i] * inner(M, p, d.memory_y[i], r)
+            r .+= coeff .* d.memory_s[i]
         end
     end
     d.project && embed_project!(M, r, p, r)

diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl
@@ -119,7 +119,7 @@ function show(io::IO, qns::QuasiNewtonState)
     ## Parameters
     * direction update:        $(status_summary(qns.direction_update))
     * retraction method:       $(qns.retraction_method)
-    * vector trnasport method: $(qns.vector_transport_method)
+    * vector transport method: $(qns.vector_transport_method)
 
     ## Stepsize
     $(qns.stepsize)
@@ -617,8 +617,8 @@ function update_hessian!(
     end
 
     # add newest
-    push!(d.memory_s, st.sk)
-    push!(d.memory_y, st.yk)
+    push!(d.memory_s, copy(M, st.sk))
+    push!(d.memory_y, copy(M, st.yk))
     return d
 end