From 346d7f502297a2b903d44fa888d2c47dda6f850c Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 28 Dec 2024 14:57:49 +0100 Subject: [PATCH] Rework the remaining code to provide vgf fully and have the chain rule with smoothing implemented. --- src/plans/nonlinear_least_squares_plan.jl | 261 ++++++++++++++++------ src/plans/vectorial_plan.jl | 89 +++++--- src/solvers/LevenbergMarquardt.jl | 55 ++--- 3 files changed, 258 insertions(+), 147 deletions(-) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index ea074c6d50..39dc35834b 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -11,7 +11,7 @@ Specify a nonlinear least squares problem # Fields * `objective`: a [`AbstractVectorGradientFunction`](@ref)`{E}` containing both the vector of cost functions ``f_i`` as well as their gradients ``$(_tex(:grad)) f_i``` -* `smoothing`: a [`ManifoldHessianObjective`](@ref) of a smoothing function ``ρ: ℝ → ℝ``, hence including its first and second derivatives ``ρ'`` and ``ρ''``. +* `smoothing`: a [`ManifoldHessianObjective`](@ref) or a [`Vector of a smoothing function ``ρ: ℝ → ℝ``, hence including its first and second derivatives ``ρ'`` and ``ρ''``. This `NonlinearLeastSquaresObjective` then has the same [`AbstractEvaluationType`](@ref) `T` as the (inner) `objective. @@ -36,99 +36,219 @@ struct NonlinearLeastSquaresObjective{ smoothing::R end -# TODO: Write the ease-of-use constructor. +function NonlinearLeastSquaresObjective( + vgf::F; smoothing=:Identity +) where {F<:AbstractVectorGradientFunction} + s = smoothing_factory(smoothing) + return NonlinearLeastSquaresObjective(vgf, s) +end # Cost -# (a) with ρ +# (a) with for a single smoothing function function get_cost( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{ - E, - AbstractVectorFunction{E,<:FunctionVectorialType}, - <:AbstractManifoldHessianObjective, + E,AbstractVectorFunction{E,<:ComponentVectorialType},H }, p; vector_space=Rn, + kwargs..., +) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} + v = 0.0 + for i in 1:length(nlso.objective) + v += get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2) + end + return v +end +function get_cost( + M::AbstractManifold, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:FunctionVectorialType},H + }, + p; + vector_space=Rn, + value_cache=get_value(M, nlso.objective, p), +) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} + return sum( + get_cost(vector_space(1), nlso.smoothing, value_cache[i]) for + i in 1:length(value_cache) + ) +end +# (b) vectorial ρ +function get_cost( + M::AbstractManifold, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:ComponentVectorialType},<:AbstractVectorFunction + }, + p; + vector_space=Rn, + kwargs..., ) where {E<:AbstractEvaluationType} - return get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p)^2) + v = 0.0 + for i in 1:length(nlso.objective) + v += get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2, i) + end + return v end -# (b) without ρ function get_cost( - M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, p -) - residual_values = zeros(nlso.num_components) - nlso.f(M, residual_values, p) - return 1//2 * norm(residual_values)^2 + M::AbstractManifold, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:FunctionVectorialType},<:AbstractVectorFunction + }, + p; + vector_space=Rn, + value_cache=get_value(M, nlso.objective, p), +) where {E<:AbstractEvaluationType} + return sum( + get_cost(vector_space(1), nlso.smoothing, value_cache[i], i) for + i in 1:length(value_cache) + ) end -function get_jacobian!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, - J, - p, - basis_domain::AbstractBasis, +function get_jacobian( + dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, p; kwargs... ) where {mT} nlso = get_objective(dmp) - return copyto!(J, nlso.jacobian!!(get_manifold(dmp), p; basis_domain=basis_domain)) + M = get_manifold(dmp) + J = zeros(length(nlso.objective), manifold_dimension(M)) + get_jacobian!(M, J, nlso, p; kwargs...) + return J end function get_jacobian!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{InplaceEvaluation}}, - J, - p, - basis_domain::AbstractBasis, + dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, J, p; kwargs... ) where {mT} nlso = get_objective(dmp) - return nlso.jacobian!!(get_manifold(dmp), J, p; basis_domain=basis_domain) + M = get_manifold(dmp) + get_jacobian!(M, J, nlso, p; kwargs...) + return J end -# TODO: Replace once we have the Jacobian implemented -function get_gradient_from_Jacobian!( - M::AbstractManifold, - X, - nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, - p, - Jval=zeros(nlso.num_components, manifold_dimension(M)), +function get_jacobian( + M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) - basis_p = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - nlso.jacobian!!(M, Jval, p; basis_domain=basis_p) - residual_values = zeros(nlso.num_components) - nlso.f(M, residual_values, p) - get_vector!(M, X, p, transpose(Jval) * residual_values, basis_p) - return X + J = zeros(length(nlso.objective), manifold_dimension(M)) + get_jacobian!(M, J, nlso, p; kwargs...) + return J end - -function get_gradient( - M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{AllocatingEvaluation}, p -) - basis_x = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - Jval = nlso.jacobian!!(M, p; basis_domain=basis_x) - residual_values = nlso.f(M, p) - return get_vector(M, p, transpose(Jval) * residual_values, basis_x) +# Cases: (a) single smoothing function +function get_jacobian!( + M::AbstractManifold, + J, + nlso::NonlinearLeastSquaresObjective{E,AHVF,<:AbstractManifoldGradientObjective}, + p; + value_cache=get_value(M, nlso.objective, p), + kwargs..., +) where {E,AHVF} + get_jacobian!(M, J, nlso.objective, p; kwargs...) + for i in 1:length(nlso.objective) # s'(f_i(p)) * f_i'(p) + J[i, :] .*= get_gradient(vector_space(1), nlso.smoothing, value_cache[i]) + end + return J +end +# Cases: (b) vectorial smoothing function +function get_jacobian!( + M::AbstractManifold, + J, + nlso::NonlinearLeastSquaresObjective{E,AHVF,<:AbstractVectorGradientFunction}, + p; + basis::AbstractBasis=get_jacobian_basis(nlso.objective), + value_cache=get_value(M, nlso.objective, p), +) where {E,AHVF} + get_jacobian!(M, J, nlso.objective, p; basis=basis) + for i in 1:length(nlso.objective) # s_i'(f_i(p)) * f_i'(p) + J[i, :] .*= get_gradient(vector_space(1), nlso.smoothing, value_cache[i], i) + end + return J end + function get_gradient( - M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, p + M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) - basis_x = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - Jval = zeros(nlso.num_components, manifold_dimension(M)) - nlso.jacobian!!(M, Jval, p; basis_domain=basis_x) - residual_values = zeros(nlso.num_components) - nlso.f(M, residual_values, p) - return get_vector(M, p, transpose(Jval) * residual_values, basis_x) + X = zero_vector(M, p) + return get_gradient!(M, X, nlso, p; kwargs...) end - function get_gradient!( - M::AbstractManifold, X, nlso::NonlinearLeastSquaresObjective{AllocatingEvaluation}, p + M::AbstractManifold, + X, + nlso::NonlinearLeastSquaresObjective, + p; + basis=get_jacobian_basis(nlso.objective), + jacobian_cache=get_jacobian(M, nlso, p; basis=basis), + value_cache=get_residuals(M, nlso, p), ) - basis_x = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - Jval = nlso.jacobian!!(M, p; basis_domain=basis_x) - residual_values = nlso.f(M, p) - return get_vector!(M, X, p, transpose(Jval) * residual_values, basis_x) + return get_vector!(M, X, p, transpose(jacobian_cache) * value_cache, basis) end -function get_gradient!( - M::AbstractManifold, X, nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, p +# Residuals +# (a) with for a single smoothing function +function get_residuals( + M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) - get_gradient_from_Jacobian!(M, X, nlso, p) - return X + V = zeros(length(nlso.objective)) + return get_residuals!(M, V, nlso, p; kwargs...) +end +function get_residuals!( + M::AbstractManifold, + V, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:ComponentVectorialType},H + }, + p; + vector_space=Rn, + kwargs..., +) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} + for i in 1:length(nlso.objective) + V[i] = get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2) + end + return V +end +function get_residuals!( + M::AbstractManifold, + V, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:FunctionVectorialType},H + }, + p; + vector_space=Rn, + value_cache=get_value(M, nlso.objective, p), +) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} + for i in 1:length(value_cache) + V[i] = get_cost(vector_space(1), nlso.smoothing, value_cache[i]) + end + return V +end +# (b) vectorial ρ +function get_residuals!( + M::AbstractManifold, + V, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:ComponentVectorialType},<:AbstractVectorFunction + }, + p; + vector_space=Rn, + kwargs..., +) where {E<:AbstractEvaluationType} + for i in 1:length(nlso.objective) + V[i] = get_cost( + vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2, i + ) + end + return V +end +function get_residuals!( + M::AbstractManifold, + V, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:FunctionVectorialType},<:AbstractVectorFunction + }, + p; + vector_space=Rn, + value_cache=get_value(M, nlso.objective, p), +) where {E<:AbstractEvaluationType} + for i in 1:length(value_cache) + V[i] = get_cost(vector_space(1), nlso.smoothing, value_cache[i], i) + end + return V end @doc """ @@ -145,7 +265,7 @@ $(_var(:Field, :retraction_method)) * `residual_values`: value of ``F`` calculated in the solver setup or the previous iteration * `residual_values_temp`: value of ``F`` for the current proposal point $(_var(:Field, :stopping_criterion, "stop")) -* `jacF`: the current Jacobian of ``F`` +* `jacobian`: the current Jacobian of ``F`` * `gradient`: the current gradient of ``F`` * `step_vector`: the tangent vector at `x` that is used to move to the next point * `last_stepsize`: length of `step_vector` @@ -160,7 +280,7 @@ $(_var(:Field, :stopping_criterion, "stop")) # Constructor - LevenbergMarquardtState(M, initial_residual_values, initial_jacF; kwargs...) + LevenbergMarquardtState(M, initial_residual_values, initial_jacobian; kwargs...) Generate the Levenberg-Marquardt solver state. @@ -194,7 +314,7 @@ mutable struct LevenbergMarquardtState{ retraction_method::TRTM residual_values::Tresidual_values candidate_residual_values::Tresidual_values - jacF::TJac + jacobian::TJac X::TGrad step_vector::TGrad last_stepsize::Tparams @@ -207,7 +327,7 @@ mutable struct LevenbergMarquardtState{ function LevenbergMarquardtState( M::AbstractManifold, initial_residual_values::Tresidual_values, - initial_jacF::TJac; + initial_jacobian::TJac; p::P=rand(M), X::TGrad=zero_vector(M, p), stopping_criterion::StoppingCriterion=StopAfterIteration(200) | @@ -247,7 +367,7 @@ mutable struct LevenbergMarquardtState{ retraction_method, initial_residual_values, copy(initial_residual_values), - initial_jacF, + initial_jacobian, X, allocate(M, X), zero(Tparams), @@ -277,6 +397,7 @@ For a tuple `(s, α)`, the smoothing function is scaled by `α` as ``s_α(x) = which yields ``s_α'(x) = s'$(_tex(:bigl))($(_tex(:frac, "x", "α^2"))$(_tex(:bigr)))`` and ``s_α''(x)[X] = $(_tex(:bigl))($(_tex(:frac, "1", "α^2"))$(_tex(:bigr)))s''$(_tex(:bigl))($(_tex(:frac, "x", "α^2"))$(_tex(:bigr)))[X]``. For a tuple `(s, k)`, a [`VectorHessianFunction`](@ref) is returned, where every component is the smooting function indicated by `s` +If the argument already is a [`VectorHessianFunction`](@ref), it is returned unchanged. Finally for a tuple containing the above four cases, a [`VectorHessianFunction`](@ref) is returned, containing all smoothing functions with their repetitions mentioned @@ -284,9 +405,9 @@ containing all smoothing functions with their repetitions mentioned # Examples * `smoothing_factory(:Identity)`: returns the identity function as a single smoothing function -* `smoothing_factory(:Identity, 2)`: returns a `VectorHessianFunction` with two identity functions -* `smoothing_factory(mho, 0.5)`: returns a `ManifoldHessianObjective` with the scaled variant of the given `mho`, for example the one returned in the first example -* `smoothing_factory( ( (:Identity, 2), (:Huber, 3) ))`: returns a `VectorHessianFunction` with 5 components, the first 2 `:Identity` the last 3 `:Huber` +* `smoothing_factory(:Identity, 2)`: returns a [`VectorHessianFunction`](@ref) with two identity functions +* `smoothing_factory(mho, 0.5)`: returns a [`ManifoldHessianObjective`](@ref) with the scaled variant of the given `mho`, for example the one returned in the first example +* `smoothing_factory( ( (:Identity, 2), (:Huber, 3) ))`: returns a [`VectorHessianFunction`](@ref) with 5 components, the first 2 `:Identity` the last 3 `:Huber` # Currently available smoothing functions diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index 298e9dcc81..f9b2c03b6d 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -50,7 +50,7 @@ for example ``g_i(p) ∈ ℝ^m`` or ``\operatorname{grad} g_i(p) ∈ T_p\mathcal struct ComponentVectorialType <: AbstractVectorialType end @doc raw""" - FunctionVectorialType <: AbstractVectorialType + FunctionVectorialType{P<:AbstractPowerRepresentation} <: AbstractVectorialType A type to indicate that constraints are implemented one whole functions, for example ``g(p) ∈ ℝ^m`` or ``\operatorname{grad} g(p) ∈ (T_p\mathcal M)^m``. @@ -58,9 +58,14 @@ for example ``g(p) ∈ ℝ^m`` or ``\operatorname{grad} g(p) ∈ (T_p\mathcal M) This type internally stores the [`AbstractPowerRepresentation`](@ref), when it makes sense, especially for Hessian and gradient functions. """ -struct FunctionVectorialType <: AbstractVectorialType end +struct FunctionVectorialType{P<:AbstractPowerRepresentation} <: AbstractVectorialType + range::P +end + +get_range(vt::FunctionVectorialType) = vt.range +get_range(::AbstractVectorialType) = NestedPowerRepresentation() -# TODO store range type in this type instead of having a keyword argument in so many function. +FunctionVectorialType() = FunctionVectorialType(NestedPowerRepresentation()) @doc raw""" AbstractVectorFunction{E, FT} <: Function @@ -306,7 +311,7 @@ function get_hessian( p, X, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) Y = zero_vector(M, p) return get_hessian!(M, Y, vhf, p, X, i, range) @@ -318,7 +323,7 @@ function get_hessian( p, X, i=:, # as long as the length can be found it should work, see _vgf_index_to_length - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) n = _vgf_index_to_length(i, vhf.range_dimension) pM = PowerManifold(M, range, n) @@ -349,7 +354,7 @@ function get_hessian!( p, X, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} n = _vgf_index_to_length(i, vhf.range_dimension) pM = PowerManifold(M, range, n) @@ -368,7 +373,7 @@ function get_hessian!( p, X, i::Colon, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -386,7 +391,7 @@ function get_hessian!( p, X, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} n = _vgf_index_to_length(i, vhf.range_dimension) mP = PowerManifold(M, range, n) @@ -400,7 +405,7 @@ function get_hessian!( p, X, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} mP = PowerManifold(M, range, vhf.range_dimension) copyto!(M, Y, p, vhf.hessians!!(M, p, X)[mP, i]) @@ -428,7 +433,7 @@ function get_hessian!( p, X, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} n = _vgf_index_to_length(i, vhf.range_dimension) pM = PowerManifold(M, range, n) @@ -448,7 +453,7 @@ function get_hessian!( p, X, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} pM = PowerManifold(M, range, vhf.range_dimension...) P = fill(p, pM) @@ -464,7 +469,7 @@ function get_hessian!( p, X, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} #Single access for function is a bit expensive n = _vgf_index_to_length(i, vhf.range_dimension) @@ -547,7 +552,7 @@ function get_jacobian( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, p; basis::B=DefaultOrthonormalBasis(), - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.jacobian_type), ) where {FT,B<:AbstractBasis} n = vgf.range_dimension d = manifold_dimension(M, p) @@ -603,7 +608,7 @@ function get_jacobian( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, p; basis::B=DefaultOrthonormalBasis(), - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT,B<:AbstractBasis} n = vgf.range_dimension d = manifold_dimension(M, p) @@ -665,20 +670,19 @@ get_jacobian!(M::AbstractManifold, JF, vgf::AbstractVectorGradientFunction, p) # (a) We have a single gradient function function get_jacobian!( M::AbstractManifold, - JF, + J, vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, p; - basis::B=DefaultOrthonormalBasis(), - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + basis::B=get_jacobian_basis(vgf), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT,B<:AbstractBasis} n = vgf.range_dimension - d = manifold_dimension(M, p) gradients = vgf.jacobian!!(M, p) mP = PowerManifold(M, range, vgf.range_dimension) for i in 1:n - JF[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) + J[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) end - return JF + return J end # (b) We have a vector of gradient functions function get_jacobian!( @@ -755,7 +759,7 @@ function get_jacobian!( return JF end -get_jacobian_basis(vgf::AbstractVectorGradientFunction) = DefaultBasis() +get_jacobian_basis(vgf::AbstractVectorGradientFunction) = DefaultOrthonormalBasis() function get_jacobian_basis( vgf::AbstractVectorGradientFunction{F,G,<:CoordinateVectorialType} ) where {F,G} @@ -795,7 +799,7 @@ function get_gradient( vgf::AbstractVectorGradientFunction, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) X = zero_vector(M, p) return get_gradient!(M, X, vgf, p, i, range) @@ -806,7 +810,7 @@ function get_gradient( vgf::AbstractVectorGradientFunction, p, i=:, # as long as the length can be found it should work, see _vgf_index_to_length - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -847,7 +851,7 @@ function get_gradient!( }, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} JF = vgf.jacobian!!(M, p) get_vector!(M, X, p, JF[i, :], vgf.jacobian_type.basis) #convert rows to gradients @@ -861,7 +865,7 @@ function get_gradient!( }, p, i=:, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -889,7 +893,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -907,7 +911,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, p, i::Colon, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -924,7 +928,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) mP = PowerManifold(M, range, n) @@ -937,7 +941,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} mP = PowerManifold(M, range, vgf.range_dimension) copyto!(M, X, p, vgf.jacobian!!(M, p)[mP, i]) @@ -953,7 +957,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} # a type wise safe way to allocate what usually should yield a n-times-d matrix pM = PowerManifold(M, range, vgf.range_dimension...) @@ -974,7 +978,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} # a type wise safe way to allocate what usually should yield a n-times-d matrix pM = PowerManifold(M, range, vgf.range_dimension...) @@ -999,7 +1003,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=nothing, + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} return vgf.jacobian!![i](M, X, p) end @@ -1009,7 +1013,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -1028,7 +1032,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} pM = PowerManifold(M, range, vgf.range_dimension...) P = fill(p, pM) @@ -1043,7 +1047,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} #Single access for function is a bit expensive n = _vgf_index_to_length(i, vgf.range_dimension) @@ -1064,6 +1068,7 @@ get_gradient_function(vgf::VectorGradientFunction, recursive=false) = vgf.jacobi # ---- Value @doc raw""" get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p[, i=:]) + get_value!(M::AbstractManifold, V, vgf::AbstractVectorFunction, p[, i=:]) Evaluate the vector function [`VectorGradientFunction`](@ref) `vgf` at `p`. The `range` can be used to specify a potential range, but is currently only present for consistency. @@ -1075,6 +1080,8 @@ The `i` can be a linear index, you can provide * a `BitVector` specifying a selection * a `AbstractVector{<:Integer}` to specify indices * `:` to return the vector of all gradients, which is also the default + +This function can perform the evalutation inplace of `V`. """ get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p, i) function get_value( @@ -1100,7 +1107,17 @@ function get_value( ) where {E<:AbstractEvaluationType} return [f(M, p) for f in vgf.value!![i]] end - +function get_value!( + M::AbstractManifold, V, vgf::AbstractVectorFunction{E,<:FunctionVectorialType}, p, i=: +) where {E<:AbstractEvaluationType} + c = vgf.value!!(M, p) + if isa(c, Number) + V .= c + else + V .= c[i] + end + return V +end @doc raw""" get_value_function(vgf::VectorGradientFunction, recursive=false) diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index 830af00d3a..f5a83bd6a8 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -112,11 +112,10 @@ function LevenbergMarquardt( vgf::VectorGradientFunction, p; evaluation::AbstractEvaluationType=AllocatingEvaluation(), - smoothing=:Idenity, + smoothing=:Identity, kwargs..., ) - _smoothing = smoothing_factory(smoothing) - nlso = NonlinearLeastSquaresObjective(vgf, _smoothing; evaluation=evaluation) + nlso = NonlinearLeastSquaresObjective(vgf; smoothing=smoothing) return LevenbergMarquardt(M, nlso, p; evaluation=evaluation, kwargs...) end function LevenbergMarquardt( @@ -171,9 +170,9 @@ function LevenbergMarquardt!( β::Real=5.0, η::Real=0.2, damping_term_min::Real=0.1, - initial_residual_values=similar(p, get_objective(nlso).num_components), + initial_residual_values=similar(p, length(get_objective(nlso).objective)), initial_jacobian_f=similar( - p, get_objective(nlso).num_components, manifold_dimension(M) + p, length(get_objective(nlso).objective), manifold_dimension(M) ), kwargs..., #collect rest ) where {O<:Union{NonlinearLeastSquaresObjective,AbstractDecoratedManifoldObjective}} @@ -204,7 +203,7 @@ function initialize_solver!( lms::LevenbergMarquardtState, ) where {mT<:AbstractManifold} M = get_manifold(dmp) - lms.residual_values = get_objective(dmp).f(M, lms.p) + lms.residual_values = get_value(M, get_objective(dmp).objective, lms.p) lms.X = get_gradient(dmp, lms.p) return lms end @@ -213,37 +212,13 @@ function initialize_solver!( lms::LevenbergMarquardtState, ) where {mT<:AbstractManifold} M = get_manifold(dmp) - get_objective(dmp).f(M, lms.residual_values, lms.p) - # TODO: Replace with a call to the Jacobian - get_gradient_from_Jacobian!(M, lms.X, get_objective(dmp), lms.p, lms.jacF) + nlso = get_objective(dmp) + get_residuals!(M, lms.residual_values, nlso, lms.p) + get_jacobian!(M, lms.jacobian, nlso, lms.p) + get_gradient!(M, lms.X, nlso, lms.p; jacobian_cache=lms.jacobian) return lms end -function _maybe_get_basis(M::AbstractManifold, p, B::AbstractBasis) - if requires_caching(B) - return get_basis(M, p, B) - else - return B - end -end - -# TODO: Adapt to vectorial function call instead of .f - maybe even skip or rename? -# TODO: It will just ne “get_value” of the vgf - adapted by smoothing?. -function get_residuals!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, - residuals, - p, -) where {mT} - return copyto!(residuals, get_objective(dmp).f(get_manifold(dmp), p)) -end -function get_residuals!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{InplaceEvaluation}}, - residuals, - p, -) where {mT} - return get_objective(dmp).f(get_manifold(dmp), residuals, p) -end - function step_solver!( dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, lms::LevenbergMarquardtState, @@ -252,18 +227,16 @@ function step_solver!( # `o.residual_values` is either initialized by `initialize_solver!` or taken from the previous iteration M = get_manifold(dmp) nlso = get_objective(dmp) - # TODO: Replace with obtaining a basis from the vectorial function - basis_ox = _maybe_get_basis(M, lms.p, nlso.jacobian_tangent_basis) # a new Jacobian is only needed if the last step was successful if lms.last_step_successful - get_jacobian!(dmp, lms.jacF, lms.p) + get_jacobian!(dmp, lms.jacobian, lms.p) end λk = lms.damping_term * norm(lms.residual_values)^2 - JJ = transpose(lms.jacF) * lms.jacF + λk * I + JJ = transpose(lms.jacobian) * lms.jacobian + λk * I # `cholesky` is technically not necessary but it's the fastest method to solve the # problem because JJ is symmetric positive definite - grad_f_c = transpose(lms.jacF) * lms.residual_values + grad_f_c = transpose(lms.jacobian) * lms.residual_values sk = cholesky(JJ) \ -grad_f_c get_vector!(M, lms.X, lms.p, grad_f_c, basis_ox) @@ -272,11 +245,11 @@ function step_solver!( temp_x = retract(M, lms.p, lms.step_vector, lms.retraction_method) normFk2 = norm(lms.residual_values)^2 - get_residuals!(dmp, lms.candidate_residual_values, temp_x) + get_value!(M, lms.candidate_residual_values, nlso.objective, temp_x) ρk = (normFk2 - norm(lms.candidate_residual_values)^2) / ( - -2 * inner(M, lms.p, lms.X, lms.step_vector) - norm(lms.jacF * sk)^2 - + -2 * inner(M, lms.p, lms.X, lms.step_vector) - norm(lms.jacobian * sk)^2 - λk * norm(sk)^2 ) if ρk >= lms.η