From 56873a96bf15977239377742111269b7b2aba5f2 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Thu, 16 Nov 2023 20:06:45 +0100 Subject: [PATCH] First steps towards using vale (for now just fixing errors) --- .github/workflows/documenter.yml | 9 +- .gitignore | 1 + CONTRIBUTING.md | 6 +- Changelog.md | 114 +++++++++--------- docs/.vale.ini | 11 ++ docs/make.jl | 27 +++-- docs/src/about.md | 10 +- docs/src/extensions.md | 5 +- docs/src/functions/adjoint_differentials.md | 2 +- docs/src/functions/index.md | 2 +- docs/src/functions/proximal_maps.md | 6 +- docs/src/index.md | 6 +- docs/src/notation.md | 2 +- docs/src/plans/debug.md | 4 +- docs/src/plans/index.md | 26 ++-- docs/src/plans/objective.md | 40 +++--- docs/src/plans/problem.md | 6 +- docs/src/plans/record.md | 4 +- docs/src/plans/state.md | 14 +-- docs/src/plans/stepsize.md | 6 +- docs/src/plans/stopping_criteria.md | 16 +-- docs/src/solvers/ChambollePock.md | 7 +- docs/src/solvers/DouglasRachford.md | 4 +- docs/src/solvers/NelderMead.md | 4 +- .../adaptive-regularization-with-cubics.md | 10 +- docs/src/solvers/index.md | 10 +- .../solvers/primal_dual_semismooth_Newton.md | 5 +- docs/src/solvers/quasi_Newton.md | 4 +- .../truncated_conjugate_gradient_descent.md | 6 +- docs/src/solvers/trust_regions.md | 10 +- docs/src/tutorials/GeodesicRegression.md | 12 +- docs/src/tutorials/HowToDebug.md | 14 +-- docs/src/tutorials/InplaceGradient.md | 12 +- docs/styles/Vocab/Manopt/accept.txt | 68 +++++++++++ joss/paper.md | 2 +- src/plans/primal_dual_plan.jl | 2 +- src/solvers/ChambollePock.jl | 4 +- src/solvers/DouglasRachford.jl | 4 +- tutorials/AutomaticDifferentiation.qmd | 16 +-- tutorials/ConstrainedOptimization.qmd | 23 ++-- tutorials/CountAndCache.qmd | 12 +- tutorials/EmbeddingObjectives.qmd | 8 +- tutorials/GeodesicRegression.qmd | 12 +- tutorials/HowToDebug.qmd | 14 +-- tutorials/HowToRecord.qmd | 25 ++-- tutorials/ImplementASolver.qmd | 36 +++--- tutorials/ImplementOwnManifold.qmd | 22 ++-- tutorials/InplaceGradient.qmd | 6 +- tutorials/{Optimize!.qmd => Optimize.qmd} | 26 ++-- tutorials/StochasticGradientDescent.qmd | 6 +- 50 files changed, 395 insertions(+), 306 deletions(-) create mode 100644 docs/.vale.ini create mode 100644 docs/styles/Vocab/Manopt/accept.txt rename tutorials/{Optimize!.qmd => Optimize.qmd} (86%) diff --git a/.github/workflows/documenter.yml b/.github/workflows/documenter.yml index 9a60d03b47..2c1795093c 100644 --- a/.github/workflows/documenter.yml +++ b/.github/workflows/documenter.yml @@ -50,7 +50,14 @@ jobs: restore-keys: | ${{ runner.os }}-${{ env.cache-name }}- - name: "Documenter rendering (including Quarto)" - run: "docs/make.jl --quarto" + run: "docs/make.jl --quarto --prettyurls" + - name: "vale.sh spell check" + uses: errata-ai/vale-action@reviewdog + with: + files: docs/src + fail_on_error: true + filter_mode: nofilter + vale_flags: "--config=docs/.vale.ini" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.gitignore b/.gitignore index af0a1746a2..c1ce6ebd72 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ docs/.CondaPkg docs/src/tutorials/Optimize!_files docs/src/tutorials/*.html docs/src/changelog.md +docs/styles/Google diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 81126b7757..79eb8537ed 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -32,7 +32,7 @@ If you found a bug or want to propose a feature, we track our issues within the ### Add a missing method There is still a lot of methods for within the optimization framework of `Manopt.jl`, may it be functions, gradients, differentials, proximal maps, step size rules or stopping criteria. -If you notice a method missing and can contribute an implementation, please do so! +If you notice a method missing and can contribute an implementation, please do so, we help with the necessary details. Even providing a single new method is a good contribution. ### Provide a new algorithm @@ -55,7 +55,7 @@ where also their reproducible Quarto-Markdown files are stored. ### Code style We try to follow the [documentation guidelines](https://docs.julialang.org/en/v1/manual/documentation/) from the Julia documentation as well as [Blue Style](https://github.com/invenia/BlueStyle). -We run [`JuliaFormatter.jl`](https://github.com/domluna/JuliaFormatter.jl) on the repo in the way set in the `.JuliaFormatter.toml` file, which enforces a number of conventions consistent with the Blue Style. +We run [`JuliaFormatter.jl`](https://github.com/domluna/JuliaFormatter.jl) on the repository in the way set in the `.JuliaFormatter.toml` file, which enforces a number of conventions consistent with the Blue Style. We also follow a few internal conventions: @@ -68,5 +68,5 @@ We also follow a few internal conventions: - There should be no dangling `=` signs. - Always add a newline between things of different types (struct/method/const). - Always add a newline between methods for different functions (including mutating/nonmutating variants). -- Prefer to have no newline between methods for the same function; when reasonable, merge the docstrings. +- Prefer to have no newline between methods for the same function; when reasonable, merge the documentation strings. - All `import`/`using`/`include` should be in the main module file. diff --git a/Changelog.md b/Changelog.md index 402fa9d8d3..aa4a5f32b7 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,27 +5,27 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.4.42] – 06/11/2023 +## [0.4.42] - November 6, 2023 ### Added * add `Manopt.JuMP_Optimizer` implementing JuMP's solver interface -## [0.4.41] - 02/11/2023 +## [0.4.41] - November 2, 2023 ### Changed -– `trust_regions` is now more flexible and the sub solver (Steinhaug-Toint tCG by default) +* `trust_regions` is now more flexible and the sub solver (Steihaug-Toint tCG by default) can now be exchanged. -- `adaptive_regularization_with_cubics` is now more flexible as well, where it previously was a bit too +* `adaptive_regularization_with_cubics` is now more flexible as well, where it previously was a bit too much tightened to the Lanczos solver as well. -- Unified documentation notation and bumped dependencies to use DocumenterCitations 1.3 +* Unified documentation notation and bumped dependencies to use DocumenterCitations 1.3 -## [0.4.40] – 24/10/2023 +## [0.4.40] - October 24, 2023 ### Added -* add a `--help` argument to `docs/make.jl` to document all availabel command line arguments +* add a `--help` argument to `docs/make.jl` to document all available command line arguments * add a `--exclude-tutorials` argument to `docs/make.jl`. This way, when quarto is not available on a computer, the docs can still be build with the tutorials not being added to the menu such that documenter does not expect them to exist. @@ -36,14 +36,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * move the ARC CG subsolver to the main package, since `TangentSpace` is now already available from `ManifoldsBase`. -## [0.4.39] – 09/10/2023 +## [0.4.39] - October 9, 2023 ### Changes * also use the pair of a retraction and the inverse retraction (see last update) to perform the relaxation within the Douglas-Rachford algorithm. -## [0.4.38] – 08/10/2023 +## [0.4.38] - October 8, 2023 ### Changes @@ -53,7 +53,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Fix a lot of typos in the documentation -## [0.4.37] – 28/09/2023 +## [0.4.37] - September 28, 2023 ### Changes @@ -62,67 +62,66 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * generalize the internal reflection of Douglas-Rachford, such that is also works with an arbitrary pair of a reflection and an inverse reflection. -## [0.4.36] – 20/09/2023 +## [0.4.36] - September 20, 2023 ### Fixed -* Fixed a bug that caused non-matrix points and vectors to fail when working with approcimate +* Fixed a bug that caused non-matrix points and vectors to fail when working with approximate -## [0.4.35] – 14/09/2023 +## [0.4.35] - September 14, 2023 ### Added -* The access to functions of the objective is now unified and encapsulated in proper `get_` - functions. +* The access to functions of the objective is now unified and encapsulated in proper `get_` functions. -## [0.4.34] – 02/09/2023 +## [0.4.34] - September 02, 2023 ### Added -* an `ManifoldEuclideanGradientObjetive` to allow the cost, gradient, and Hessian and other +* an `ManifoldEuclideanGradientObjective` to allow the cost, gradient, and Hessian and other first or second derivative based elements to be Euclidean and converted when needed. * a keyword `objective_type=:Euclidean` for all solvers, that specifies that an Objective shall be created of the above type -## [0.4.33] - 24/08/2023 +## [0.4.33] - August 24, 2023 ### Added * `ConstantStepsize` and `DecreasingStepsize` now have an additional field `type::Symbol` to assess whether the step-size should be relatively (to the gradient norm) or absolutely constant. -## [0.4.32] - 23/08/2023 +## [0.4.32] - August 23, 2023 ### Added * The adaptive regularization with cubics (ARC) solver. -## [0.4.31] - 14/08/2023 +## [0.4.31] - August 14, 2023 ### Added * A `:Subsolver` keyword in the `debug=` keyword argument, that activates the new `DebugWhenActive`` to de/activate subsolver debug from the main solvers `DebugEvery`. -## [0.4.30] - 03/08/2023 +## [0.4.30] - August 3, 2023 ### Changed * References in the documentation are now rendered using [DocumenterCitations.jl](https://github.com/JuliaDocs/DocumenterCitations.jl) * Asymptote export now also accepts a size in pixel instead of its default `4cm` size and `render` can be deactivated setting it to `nothing`. -## [0.4.29] - 12/07/2023 +## [0.4.29] - July 12, 2023 ### Fixed * fixed a bug, where `cyclic_proximal_point` did not work with decorated objectives. -## [0.4.28] - 24/06/2023 +## [0.4.28] - June 24, 2023 ### Changed * `max_stepsize` was specialized for `FixedRankManifold` to follow Matlab Manopt. -## [0.4.27] - 15/06/2023 +## [0.4.27] - June 15, 2023 ### Added @@ -134,7 +133,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `initial_jacobian_f` also as keyword arguments, such that their default initialisations can be adapted, if necessary -## [0.4.26] - 11/06/2023 +## [0.4.26] - June 11, 2023 ### Added @@ -142,13 +141,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * add a `get_state` function * document `indicates_convergence`. -## [0.4.25] - 05/06/2023 +## [0.4.25] - June 5, 2023 ### Fixed * Fixes an allocation bug in the difference of convex algorithm -## [0.4.24] - 04/06/2023 +## [0.4.24] - June 4, 2023 ### Added @@ -158,7 +157,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * bump dependencies since the extension between Manifolds.jl and ManifoldsDiff.jl has been moved to Manifolds.jl -## [0.4.23] - 04/06/2023 +## [0.4.23] - June 4, 2023 ### Added @@ -168,13 +167,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * loosen constraints slightly -## [0.4.22] - 31/05/2023 +## [0.4.22] - May 31, 2023 ### Added * A tutorial on how to implement a solver -## [0.4.21] - 22/05/2023 +## [0.4.21] - May 22, 2023 ### Added @@ -187,55 +186,56 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * change solvers on the mid level (form `solver(M, objective, p)`) to also accept decorated objectives ### Changed + * Switch all Requires weak dependencies to actual weak dependencies starting in Julia 1.9 -## [0.4.20] - 11/05/2023 +## [0.4.20] - May 11, 2023 ### Changed * the default tolerances for the numerical `check_` functions were loosened a bit, such that `check_vector` can also be changed in its tolerances. -## [0.4.19] - 07/05/2023 +## [0.4.19] - May 7, 2023 ### Added -* the sub solver for `trust_regions` is now customizable, i.e. can be exchanged. +* the sub solver for `trust_regions` is now customizable and can now be exchanged. ### Changed * slightly changed the definitions of the solver states for ALM and EPM to be type stable -## [0.4.18] - 04/05/2023 +## [0.4.18] - May 4, 2023 ### Added * A function `check_Hessian(M, f, grad_f, Hess_f)` to numerically check the (Riemannian) Hessian of a function `f` -## [0.4.17] - 28/04/2023 +## [0.4.17] - April 28, 2023 ### Added * A new interface of the form `alg(M, objective, p0)` to allow to reuse - objectives without creating `AbstractManoptSolverState`s and calling `solve!`. This especially still allows for any decoration of the objective and/or the state using e.g. `debug=`, or `record=`. + objectives without creating `AbstractManoptSolverState`s and calling `solve!`. This especially still allows for any decoration of the objective and/or the state using `debug=`, or `record=`. ### Changed -* All solvers now have the initial point `p` as an optional parameter making it more accessible to first time users, e.g. `gradient_descent(M, f, grad_f)` +* All solvers now have the initial point `p` as an optional parameter making it more accessible to first time users, `gradient_descent(M, f, grad_f)` is equivalent to `gradient_descent(M, f, grad_f, rand(M))` ### Fixed * Unified the framework to work on manifold where points are represented by numbers for several solvers -## [0.4.16] - 18/04/2023 +## [0.4.16] - April 18, 2023 ### Fixed * the inner products used in `truncated_gradient_descent` now also work thoroughly on complex matrix manifolds -## [0.4.15] - 13/04/2023 +## [0.4.15] - April 13, 2023 ### Changed @@ -249,7 +249,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * support for `ManifoldsBase.jl` 0.13.x, since with the definition of `copy(M,p::Number)`, in 0.14.4, we now use that instead of defining it ourselves. -## [0.4.14] - 06/04/2023 +## [0.4.14] - April 06, 2023 ### Changed * `particle_swarm` now uses much more in-place operations @@ -257,15 +257,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed * `particle_swarm` used quite a few `deepcopy(p)` commands still, which were replaced by `copy(M, p)` -## [0.4.13] - 09/04/2023 +## [0.4.13] - April 09, 2023 ### Added * `get_message` to obtain messages from sub steps of a solver * `DebugMessages` to display the new messages in debug -* safeguards in Armijo linesearch and L-BFGS against numerical over- and underflow that report in messages +* safeguards in Armijo line search and L-BFGS against numerical over- and underflow that report in messages -## [0.4.12] - 04/04/2023 +## [0.4.12] - April 4, 2023 ### Added @@ -275,19 +275,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `difference_of_convex_proximal_point(M, prox_g, grad_h, p0)` * Introduce a `StopWhenGradientChangeLess` stopping criterion -## [0.4.11] - 27/04/2023 +## [0.4.11] - March 27, 2023 ### Changed * adapt tolerances in tests to the speed/accuracy optimized distance on the sphere in `Manifolds.jl` (part II) -## [0.4.10] - 26/04/2023 +## [0.4.10] - March 26, 2023 ### Changed * adapt tolerances in tests to the speed/accuracy optimized distance on the sphere in `Manifolds.jl` -## [0.4.9] – 03/03/2023 +## [0.4.9] - March 3, 2023 ### Added @@ -295,7 +295,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 to be used within Manopt.jl, introduce the [manoptjl.org/stable/extensions/](https://manoptjl.org/stable/extensions/) page to explain the details. -## [0.4.8] - 21/02/2023 +## [0.4.8] - February 21, 2023 ### Added @@ -308,26 +308,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * changed the `show` methods of `AbstractManoptSolverState`s to display their `state_summary * Move tutorials to be rendered with Quarto into the documentation. -## [0.4.7] - 14/02/2023 +## [0.4.7] - February 14, 2023 ### Changed -* Bump [compat] entry of ManifoldDiff to also include 0.3 +* Bump `[compat]` entry of ManifoldDiff to also include 0.3 -## [0.4.6] - 03/02/2023 +## [0.4.6] - February 3, 2023 ### Fixed * Fixed a few stopping criteria even indicated to stop before the algorithm started. -## [0.4.5] - 24/01/2023 +## [0.4.5] - January 24, 2023 ### Changed * the new default functions that include `p` are used where possible * a first step towards faster storage handling -## [0.4.4] - 20/01/2023 +## [0.4.4] - January 20, 2023 ### Added @@ -338,29 +338,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * fix a type in `HestenesStiefelCoefficient` -## [0.4.3] - 17/01/2023 +## [0.4.3] - January 17, 2023 ### Fixed * the CG coefficient `β` can now be complex * fix a bug in `grad_distance` -## [0.4.2] - 16/01/2023 +## [0.4.2] - January 16, 2023 ### Changed -* the usage of `inner` in linesearch methods, such that they work well with +* the usage of `inner` in line search methods, such that they work well with complex manifolds as well -## [0.4.1] - 15/01/2023 +## [0.4.1] - January 15, 2023 ### Fixed * a `max_stepsize` per manifold to avoid leaving the injectivity radius, which it also defaults to -## [0.4.0] - 10/01/2023 +## [0.4.0] - January 10, 2023 ### Added diff --git a/docs/.vale.ini b/docs/.vale.ini new file mode 100644 index 0000000000..f4db122493 --- /dev/null +++ b/docs/.vale.ini @@ -0,0 +1,11 @@ +StylesPath = styles +MinAlertLevel = error +Vocab = Manopt + +Packages = Google + +[*.md] +BasedOnStyles = Vale, Google +TokenIgnores = \ + \$.+?\$, \ + \]\(@(ref|id|cite).+?\), \ diff --git a/docs/make.jl b/docs/make.jl index 93fe503b35..1bc8163c57 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -14,6 +14,7 @@ Arguments this can be used if you do not have Quarto installed to still be able to render the docs locally on this machine. This option should not be set on CI. * `--help` - print this help and exit without rendering the documentation +* `--prettyurls` – toggle the prettyurls part to true (which is otherwise only true on CI) * `--quarto` – run the Quarto notebooks from the `tutorials/` folder before generating the documentation this has to be run locally at least once for the `tutorials/*.md` files to exist that are included in the documentation (see `--exclude-tutorials`) for the alternative. @@ -93,24 +94,24 @@ end ## Build titorials menu tutorials_menu = "How to..." => [ - "🏔️ Get started: Optimize!" => "tutorials/Optimize!.md", - "Speedup using Inplace computations" => "tutorials/InplaceGradient.md", - "Use Automatic Differentiation" => "tutorials/AutomaticDifferentiation.md", - "Define Objectives in the Embedding" => "tutorials/EmbeddingObjectives.md", - "Count and use a Cache" => "tutorials/CountAndCache.md", - "Print Debug Output" => "tutorials/HowToDebug.md", + "🏔️ Get started: optimize." => "tutorials/Optimize.md", + "Speedup using in-place computations" => "tutorials/InplaceGradient.md", + "Use automatic differentiation" => "tutorials/AutomaticDifferentiation.md", + "Define objectives in the embedding" => "tutorials/EmbeddingObjectives.md", + "Count and use a cache" => "tutorials/CountAndCache.md", + "Print debug output" => "tutorials/HowToDebug.md", "Record values" => "tutorials/HowToRecord.md", - "Implement a Solver" => "tutorials/ImplementASolver.md", - "Optimize on your own Manifold" => "tutorials/ImplementOwnManifold.md", - "Do Constrained Optimization" => "tutorials/ConstrainedOptimization.md", - "Do Geodesic Regression" => "tutorials/GeodesicRegression.md", + "Implement a solver" => "tutorials/ImplementASolver.md", + "Optimize on your own manifold" => "tutorials/ImplementOwnManifold.md", + "Do constrained optimization" => "tutorials/ConstrainedOptimization.md", + "Do geodesic regression" => "tutorials/GeodesicRegression.md", ] # (e) ...finally! make docs bib = CitationBibliography(joinpath(@__DIR__, "src", "references.bib"); style=:alpha) makedocs(; format=Documenter.HTML(; - prettyurls = get(ENV, "CI", nothing) == "true" || ("--prettyurls" ∈ ARGS), - false, assets=["assets/favicon.ico", "assets/citations.css"] + prettyurls=(get(ENV, "CI", nothing) == "true") || ("--prettyurls" ∈ ARGS), + assets=["assets/favicon.ico", "assets/citations.css"], ), modules=[ Manopt, @@ -155,7 +156,7 @@ makedocs(; "Conjugate gradient descent" => "solvers/conjugate_gradient_descent.md", "Cyclic Proximal Point" => "solvers/cyclic_proximal_point.md", "Difference of Convex" => "solvers/difference_of_convex.md", - "Douglas–Rachford" => "solvers/DouglasRachford.md", + "Douglas—Rachford" => "solvers/DouglasRachford.md", "Exact Penalty Method" => "solvers/exact_penalty_method.md", "Frank-Wolfe" => "solvers/FrankWolfe.md", "Gradient Descent" => "solvers/gradient_descent.md", diff --git a/docs/src/about.md b/docs/src/about.md index e38f780134..312df00f22 100644 --- a/docs/src/about.md +++ b/docs/src/about.md @@ -9,7 +9,7 @@ The following people contributed * [Willem Diepeveen](https://www.maths.cam.ac.uk/person/wd292) implemented the [primal-dual Riemannian semismooth Newton](@ref PDRSSNSolver) solver. * Even Stephansen Kjemsås contributed to the implementation of the [Frank Wolfe Method](@ref FrankWolfe) solver * Mathias Ravn Munkvold contributed most of the implementation of the [Adaptive Regularization with Cubics](@ref ARSSection) solver -* [Tom-Christian Riemer](https://www.tu-chemnitz.de/mathematik/wire/mitarbeiter.php) Riemer implemented the [trust regions](@ref trust_regions) and [quasi Newton](solvers/quasi_Newton.md) solvers. +* [Tom-Christian Riemer](https://www.tu-chemnitz.de/mathematik/wire/mitarbeiter.php) implemented the [trust regions](@ref trust_regions) and [quasi Newton](solvers/quasi_Newton.md) solvers. * [Manuel Weiss](https://scoop.iwr.uni-heidelberg.de/author/manuel-weiß/) implemented most of the [conjugate gradient update rules](@ref cg-coeffs) ...as well as various [contributors](https://github.com/JuliaManifolds/Manopt.jl/graphs/contributors) providing small extensions, finding small bugs and mistakes and fixing them by opening [PR](https://github.com/JuliaManifolds/Manopt.jl/pulls)s. @@ -23,14 +23,14 @@ to clone/fork the repository or open an issue. `Manopt.jl` belongs to the Manopt family: -* [manopt.org](https://www.manopt.org) – The Matlab version of Manopt, see also their :octocat: [GitHub repository](https://github.com/NicolasBoumal/manopt) -* [pymanopt.org](https://www.pymanopt.org/) – The Python version of Manopt – providing also several AD backends, see also their :octocat: [GitHub repository](https://github.com/pymanopt/pymanopt) +* [manopt.org](https://www.manopt.org) The Matlab version of Manopt, see also their :octocat: [GitHub repository](https://github.com/NicolasBoumal/manopt) +* [pymanopt.org](https://www.pymanopt.org/) The Python version of Manopt providing also several AD backends, see also their :octocat: [GitHub repository](https://github.com/pymanopt/pymanopt) but there are also more packages providing tools on manifolds: * [Jax Geometry](https://bitbucket.org/stefansommer/jaxgeometry/src/main/) (Python/Jax) for differential geometry and stochastic dynamics with deep learning * [Geomstats](https://geomstats.github.io) (Python with several backends) focusing on statistics and machine learning :octocat: [GitHub repository](https://github.com/geomstats/geomstats) -* [Geoopt](https://geoopt.readthedocs.io/en/latest/) (Python & PyTorch) – Riemannian ADAM & SGD. :octocat: [GitHub repository](https://github.com/geoopt/geoopt) -* [McTorch](https://github.com/mctorch/mctorch) (Python & PyToch) – Riemannian SGD, Adagrad, ASA & CG. +* [Geoopt](https://geoopt.readthedocs.io/en/latest/) (Python & PyTorch) Riemannian ADAM & SGD. :octocat: [GitHub repository](https://github.com/geoopt/geoopt) +* [McTorch](https://github.com/mctorch/mctorch) (Python & PyToch) Riemannian SGD, Adagrad, ASA & CG. * [ROPTLIB](https://www.math.fsu.edu/~whuang2/papers/ROPTLIB.htm) (C++) a Riemannian OPTimization LIBrary :octocat: [GitHub repository](https://github.com/whuang08/ROPTLIB) * [TF Riemopt](https://github.com/master/tensorflow-riemopt) (Python & TensorFlow) Riemannian optimization using TensorFlow diff --git a/docs/src/extensions.md b/docs/src/extensions.md index 8ed1b36bab..9169d63766 100644 --- a/docs/src/extensions.md +++ b/docs/src/extensions.md @@ -53,7 +53,7 @@ Manopt.LineSearchesStepsize ## Manifolds.jl -When loading `Manifolds.jl` we introduce the following further functions +Loading `Manifolds.jl` introduces the following additional functions ```@docs mid_point @@ -64,7 +64,8 @@ Manopt.max_stepsize(::FixedRankMatrices, ::Any) ## JuMP.jl Manopt can be used using the [JuMP.jl](https://github.com/jump-dev/JuMP.jl) interface. -The manifold is provided in the `@variable` macro. Note that until now, only variables (points on manifolds) are supported, that are arrays, i.e. especially structs do not yet work. +The manifold is provided in the `@variable` macro. Note that until now, +only variables (points on manifolds) are supported, that are arrays, especially structs do not yet work. The algebraic expression of the objective function is specified in the `@objective` macro. The `descent_state_type` attribute specifies the solver. diff --git a/docs/src/functions/adjoint_differentials.md b/docs/src/functions/adjoint_differentials.md index 640d2e4457..19eee1e133 100644 --- a/docs/src/functions/adjoint_differentials.md +++ b/docs/src/functions/adjoint_differentials.md @@ -1,4 +1,4 @@ -# [Adjoint Differentials](@id adjointDifferentialFunctions) +# [Adjoint differentials](@id adjointDifferentialFunctions) ```@autodocs Modules = [Manopt] diff --git a/docs/src/functions/index.md b/docs/src/functions/index.md index a05a453087..4220b52aa9 100644 --- a/docs/src/functions/index.md +++ b/docs/src/functions/index.md @@ -1,7 +1,7 @@ # Functions There are several functions required within optimization, most prominently -[costFunctions](@ref CostFunctions) and [gradients](@ref GradientFunctions). This package includes +[cost functions](@ref CostFunctions) and [gradients](@ref GradientFunctions). This package includes several cost functions and corresponding gradients, but also corresponding [proximal maps](@ref proximalMapFunctions) for variational methods manifold-valued data. Most of these functions require the evaluation of diff --git a/docs/src/functions/proximal_maps.md b/docs/src/functions/proximal_maps.md index a3eb4912c1..83a46aeefd 100644 --- a/docs/src/functions/proximal_maps.md +++ b/docs/src/functions/proximal_maps.md @@ -14,9 +14,9 @@ the geodesic distance on ``\mathcal M``. While it might still be difficult to compute the minimizer, there are several proximal maps known (locally) in closed form. Furthermore if ``x^{\star} ∈ \mathcal M`` is a minimizer of ``\varphi``, then -``\displaystyle\operatorname{prox}_{λ\varphi}(x^\star) = x^\star,`` - -i.e. a minimizer is a fixed point of the proximal map. +```math +\operatorname{prox}_{λ\varphi}(x^\star) = x^\star. +``` This page lists all proximal maps available within Manopt. To add you own, just extend the `functions/proximal_maps.jl` file. diff --git a/docs/src/index.md b/docs/src/index.md index 631b1ca6c0..daf8f7db5d 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -20,7 +20,7 @@ or in other words: find the point ``p`` on the manifold, where ``f`` reaches its It belongs to the “Manopt family”, which includes [Manopt](https://manopt.org) (Matlab) and [pymanopt.org](https://www.pymanopt.org/) (Python). If you want to delve right into `Manopt.jl` check out the -[Get started: Optimize!](tutorials/Optimize!.md) tutorial. +[🏔️ Get started: optimize.](tutorials/Optimize.md) tutorial. `Manopt.jl` makes it easy to use an algorithm for your favourite manifold as well as a manifold for your favourite algorithm. It already provides @@ -79,8 +79,8 @@ Several functions are available, implemented on an arbitrary manifold, [cost fun ### Visualization -To visualize and interpret results, `Manopt.jl` aims to provide both easy plot functions as well as [exports](@ref Exports). Furthermore a system to get [debug](@ref DebugSection) during the iterations of an algorithms as well as [record](@ref RecordSection) capabilities, i.e. to record a specified tuple of values per iteration, most prominently [`RecordCost`](@ref) and -[`RecordIterate`](@ref). Take a look at the [Get Started: Optimize!](tutorials/Optimize!.md) tutorial on how to easily activate this. +To visualize and interpret results, `Manopt.jl` aims to provide both easy plot functions as well as [exports](@ref Exports). Furthermore a system to get [debug](@ref DebugSection) during the iterations of an algorithms as well as [record](@ref RecordSection) capabilities, for example to record a specified tuple of values per iteration, most prominently [`RecordCost`](@ref) and +[`RecordIterate`](@ref). Take a look at the [🏔️ Get started: optimize.](tutorials/Optimize.md) tutorial on how to easily activate this. ## Literature diff --git a/docs/src/notation.md b/docs/src/notation.md index 9d74aa127e..910ff44c06 100644 --- a/docs/src/notation.md +++ b/docs/src/notation.md @@ -1,6 +1,6 @@ # Notation -In this package, we follow the notation introduced in [Manifolds.jl – Notation](https://juliamanifolds.github.io/Manifolds.jl/latest/misc/notation.html) +In this package, we follow the notation introduced in [Manifolds.jl Notation](https://juliamanifolds.github.io/Manifolds.jl/latest/misc/notation.html) with the following additional notation diff --git a/docs/src/plans/debug.md b/docs/src/plans/debug.md index e248bca0de..e3c46eb2ad 100644 --- a/docs/src/plans/debug.md +++ b/docs/src/plans/debug.md @@ -1,4 +1,4 @@ -# [Debug Output](@id DebugSection) +# [Debug output](@id DebugSection) ```@meta CurrentModule = Manopt @@ -14,7 +14,7 @@ Order = [:type, :function] Private = true ``` -## Technical Details: The Debug Solver +## Technical details The decorator to print debug during the iterations can be activated by decorating the state of a solver and implementing diff --git a/docs/src/plans/index.md b/docs/src/plans/index.md index 195f1fc905..33c5489193 100644 --- a/docs/src/plans/index.md +++ b/docs/src/plans/index.md @@ -5,13 +5,13 @@ CurrentModule = Manopt ``` For any optimisation performed in `Manopt.jl` -we need information about both the optimisation task or “problem” at hand as well as the solver and all its parameters. +information is required about both the optimisation task or “problem” at hand as well as the solver and all its parameters. This together is called a __plan__ in `Manopt.jl` and it consists of two data structures: -* The [Manopt Problem](@ref ProblemSection) describes all _static_ data of our task, most prominently the manifold and the objective. -* The [Solver State](@ref SolverStateSection) describes all _varying_ data and parameters for the solver we aim to use. This also means that each solver has its own data structure for the state. +* The [Manopt Problem](@ref ProblemSection) describes all _static_ data of a task, most prominently the manifold and the objective. +* The [Solver State](@ref SolverStateSection) describes all _varying_ data and parameters for the solver that is used. This also means that each solver has its own data structure for the state. -By splitting these two parts, we can use one problem and solve it using different solvers. +By splitting these two parts, one problem can be define an then be solved using different solvers. Still there might be the need to set certain parameters within any of these structures. For that there is @@ -24,23 +24,23 @@ Manopt.status_summary Where the following Symbols are used The following symbols are used. -The column “generic” refers to a short hand that might be used – for readability if clear from context. +The column “generic” refers to a short hand that might be used for readability if clear from context. | Symbol | Used in | Description | generic | | :----------- | :------: | ;-------------------------------------------------------- | :------ | | `:active` | [`DebugWhenActive`](@ref) | activity of the debug action stored within | | | `:Basepoint` | [`TangentSpace`]() | the point the tangent space is at | `:p` | -| `:Cost` | generic |the cost function (e.g. within an objective, as pass down) | | +| `:Cost` | generic |the cost function (within an objective, as pass down) | | | `:Debug` | [`DebugSolverState`](@ref) | the stored `debugDictionary` | | -| `:Gradient` | generic |the gradient function (e.g. within an objective, as pass down) | | -| `:Iterate` | generic | the (current) iterate – similar to [`set_iterate!`](@ref) – within a state | | -| `:Manifold` | generic |the manifold (e.g. within a problem, as pass down) | | -| `:Objective` | generic | the objective (e.g. within a problem, as pass down) | | -| `:SubProblem` | generic | the sub problem (e.g. within a state, as pass down) | | -| `:SubState` | generic | the sub state (e.g. within a state, as pass down) | | +| `:Gradient` | generic |the gradient function (within an objective, as pass down) | | +| `:Iterate` | generic | the (current) iterate, similar to [`set_iterate!`](@ref), within a state | | +| `:Manifold` | generic |the manifold (within a problem, as pass down) | | +| `:Objective` | generic | the objective (within a problem, as pass down) | | +| `:SubProblem` | generic | the sub problem (within a state, as pass down) | | +| `:SubState` | generic | the sub state (within a state, as pass down) | | | `:λ` | [`ProximalDCCost`](@ref), [`ProximalDCGrad`](@ref) | set the proximal parameter within the proximal sub objective elements | | | `:p` | generic | a certain point | | | `:X` | generic | a certain tangent vector | | | `:TrustRegionRadius` | [`TrustRegionsState`](@ref) | the trust region radius | `:σ` | -| `:ρ`, `:u` | [`ExactPenaltyCost`](@ref), [`ExactPenaltyGrad`](@ref) | Parameters within the exact penalty objetive | | +| `:ρ`, `:u` | [`ExactPenaltyCost`](@ref), [`ExactPenaltyGrad`](@ref) | Parameters within the exact penalty objective | | | `:ρ`, `:μ`, `:λ` | [`AugmentedLagrangianCost`](@ref) and [`AugmentedLagrangianGrad`](@ref) | Parameters of the Lagrangian function | | diff --git a/docs/src/plans/objective.md b/docs/src/plans/objective.md index fdaa7aa39c..d4e3642a0c 100644 --- a/docs/src/plans/objective.md +++ b/docs/src/plans/objective.md @@ -1,4 +1,4 @@ -# [A Manifold Objective](@id ObjectiveSection) +# [A manifold objective](@id ObjectiveSection) ```@meta CurrentModule = Manopt @@ -11,7 +11,7 @@ AbstractManifoldObjective AbstractDecoratedManifoldObjective ``` -Which has two main different possibilities for its containing functions concerning the evaluation mode – not necessarily the cost, but for example gradient in an [`AbstractManifoldGradientObjective`](@ref). +Which has two main different possibilities for its containing functions concerning the evaluation mode, not necessarily the cost, but for example gradient in an [`AbstractManifoldGradientObjective`](@ref). ```@docs AbstractEvaluationType @@ -21,7 +21,7 @@ evaluation_type ``` -## Decorators for Objectives +## Decorators for objectives An objective can be decorated using the following trait and function to initialize @@ -31,15 +31,15 @@ is_objective_decorator decorate_objective! ``` -### [Embedded Objectives](@id ManifoldEmbeddedObjective) +### [Embedded objectives](@id ManifoldEmbeddedObjective) ```@docs EmbeddedManifoldObjective ``` -### [Cache Objective](@id CacheSection) +### [Cache objective](@id CacheSection) -Since single function calls, e.g. to the cost or the gradient, might be expensive, +Since single function calls, for example to the cost or the gradient, might be expensive, a simple cache objective exists as a decorator, that caches one cost value or gradient. It can be activated/used with the `cache=` keyword argument available for every solver. @@ -57,7 +57,7 @@ A first generic cache is always available, but it only caches one gradient and o SimpleManifoldCachedObjective ``` -#### A Generic Cache +#### A generic cache For the more advanced cache, you need to implement some type of cache yourself, that provides a `get!` and implement [`init_caches`](@ref). @@ -68,13 +68,13 @@ ManifoldCachedObjective init_caches ``` -### [Count Objective](@id ManifoldCountObjective) +### [Count objective](@id ManifoldCountObjective) ```@docs ManifoldCountObjective ``` -### Internal Decorators +### Internal decorators ```@docs ReturnManifoldObjective @@ -82,7 +82,7 @@ ReturnManifoldObjective ## Specific Objective typed and their access functions -### Cost Objective +### Cost objective ```@docs AbstractManifoldCostObjective @@ -101,7 +101,7 @@ and internally get_cost_function ``` -### Gradient Objectives +### Gradient objectives ```@docs AbstractManifoldGradientObjective @@ -130,37 +130,37 @@ and internally get_gradient_function ``` -#### Internal Helpers +#### Internal helpers ```@docs get_gradient_from_Jacobian! ``` -### Subgradient Objective +### Subgradient objective ```@docs ManifoldSubgradientObjective ``` -#### Access Functions +#### Access functions ```@docs get_subgradient ``` -### Proximal Map Objective +### Proximal map objective ```@docs ManifoldProximalMapObjective ``` -#### Access Functions +#### Access functions ```@docs get_proximal_map ``` -### Hessian Objective +### Hessian objective ```@docs AbstractManifoldHessianObjective @@ -180,7 +180,7 @@ and internally get_hessian_function ``` -### Primal-Dual based Objectives +### Primal-dual based objectives ```@docs AbstractPrimalDualManifoldObjective @@ -200,7 +200,7 @@ get_primal_prox linearized_forward_operator ``` -### Constrained Objective +### Constrained objective Besides the [`AbstractEvaluationType`](@ref) there is one further property to distinguish among constraint functions, especially the gradients of the constraints. @@ -235,7 +235,7 @@ get_grad_inequality_constraints get_grad_inequality_constraints! ``` -### Subproblem Objective +### Subproblem objective This objective can be use when the objective of a sub problem solver still needs access to the (outer/main) objective. diff --git a/docs/src/plans/problem.md b/docs/src/plans/problem.md index 5a4678e394..21ac9bf28b 100644 --- a/docs/src/plans/problem.md +++ b/docs/src/plans/problem.md @@ -1,4 +1,4 @@ -# [A Manopt Problem](@id ProblemSection) +# [A Manopt problem](@id ProblemSection) ```@meta CurrentModule = Manopt @@ -12,13 +12,13 @@ get_objective get_manifold ``` -Usually, such a problem is determined by the manifold or domain of the optimisation and the objective with all its properties used within an algorithm – see [The Objective](@ref ObjectiveSection). For that we can just use +Usually, such a problem is determined by the manifold or domain of the optimisation and the objective with all its properties used within an algorithm, see [The Objective](@ref ObjectiveSection). For that one can just use ```@docs DefaultManoptProblem ``` -The exception to these are the primal dual-based solvers ([Chambolle-Pock](@ref ChambollePockSolver) and the [PD Semismooth Newton](@ref PDRSSNSolver)]), which both need two manifolds as their domain(s), hence there also exists a +The exception to these are the primal dual-based solvers ([Chambolle-Pock](@ref ChambollePockSolver) and the [PD Semi-smooth Newton](@ref PDRSSNSolver)), which both need two manifolds as their domains, hence there also exists a ```@docs TwoManifoldProblem diff --git a/docs/src/plans/record.md b/docs/src/plans/record.md index 96c7605958..19630f0b01 100644 --- a/docs/src/plans/record.md +++ b/docs/src/plans/record.md @@ -10,7 +10,7 @@ On the one hand, the high-level interfaces provide a `record=` keyword, that acc For example recording the gradient from the [`GradientDescentState`](@ref) is automatically available, as explained in the [`gradient_descent`](@ref) solver. -## [Record Solver States](@id RecordSolverState) +## [Record solver states](@id RecordSolverState) ```@autodocs Modules = [Manopt] @@ -23,7 +23,7 @@ see [recording values](@ref RecordSection) for details on the decorated solver. Further specific [`RecordAction`](@ref)s can be found when specific types of [`AbstractManoptSolverState`](@ref) define them on their corresponding site. -## Technical Details: The Record Solver +## Technical details ```@docs initialize_solver!(amp::AbstractManoptProblem, rss::RecordSolverState) diff --git a/docs/src/plans/state.md b/docs/src/plans/state.md index 29cb603e29..aa661ea736 100644 --- a/docs/src/plans/state.md +++ b/docs/src/plans/state.md @@ -1,4 +1,4 @@ -# [The Solver State](@id SolverStateSection) +# [The solver state](@id SolverStateSection) ```@meta CurrentModule = Manopt @@ -6,7 +6,7 @@ CurrentModule = Manopt Given an [`AbstractManoptProblem`](@ref), that is a certain optimisation task, the state specifies the solver to use. It contains the parameters of a solver and all -fields necessary during the algorithm, e.g. the current iterate, a [`StoppingCriterion`](@ref) +fields necessary during the algorithm, for example the current iterate, a [`StoppingCriterion`](@ref) or a [`Stepsize`](@ref). ```@docs @@ -17,9 +17,9 @@ Manopt.get_count Since every subtype of an [`AbstractManoptSolverState`](@ref) directly relate to a solver, the concrete states are documented together with the corresponding [solvers](@ref SolversSection). -This page documents the general functionality available for every state. +This page documents the general features available for every state. -A first example is to access, i.e. obtain or set, the current iterate. +A first example is to obtain or set, the current iterate. This might be useful to continue investigation at the current iterate, or to set up a solver for a next experiment, respectively. ```@docs @@ -42,7 +42,7 @@ Furthermore, to access the stopping criterion use get_stopping_criterion ``` -## Decorators for AbstractManoptSolverState +## Decorators for `AbstractManoptSolverState`s A solver state can be decorated using the following trait and function to initialize @@ -60,7 +60,7 @@ ReturnSolverState as well as [`DebugSolverState`](@ref) and [`RecordSolverState`](@ref). -## State Actions +## State actions A state action is a struct for callback functions that can be attached within for example the just mentioned debug decorator or the record decorator. @@ -88,7 +88,7 @@ _storage_copy_vector _storage_copy_point ``` -## Abstract States +## Abstract states In a few cases it is useful to have a hierarchy of types. These are diff --git a/docs/src/plans/stepsize.md b/docs/src/plans/stepsize.md index 7979393e2e..76bac209b8 100644 --- a/docs/src/plans/stepsize.md +++ b/docs/src/plans/stepsize.md @@ -1,13 +1,13 @@ -# [Stepsize and Linesearch](@id Stepsize) +# [Stepsize and line search](@id Stepsize) ```@meta CurrentModule = Manopt ``` -Most iterative algorithms determine a direction along which the algorithm will proceed and +Most iterative algorithms determine a direction along which the algorithm shall proceed and determine a step size to find the next iterate. How advanced the step size computation can be implemented depends (among others) on the properties the corresponding problem provides. -Within `Manopt.jl`, the step size determination is implemented as a `functor` which is a subtype of [`Stepsize`](@refbased on +Within `Manopt.jl`, the step size determination is implemented as a `functor` which is a subtype of [`Stepsize`](@ref) based on ```@docs Stepsize diff --git a/docs/src/plans/stopping_criteria.md b/docs/src/plans/stopping_criteria.md index 1e76a6cd1f..53d4173ebe 100644 --- a/docs/src/plans/stopping_criteria.md +++ b/docs/src/plans/stopping_criteria.md @@ -1,6 +1,6 @@ -# [Stopping Criteria](@id StoppingCriteria) +# [Stopping criteria](@id StoppingCriteria) -Stopping criteria are implemented as a `functor`, i.e. inherit from the base type +Stopping criteria are implemented as a `functor` and inherit from the base type ```@docs StoppingCriterion @@ -12,16 +12,16 @@ They can also be grouped, which is summarized in the type of a set of criteria StoppingCriterionSet ``` -Then the stopping criteria `s` might have certain internal values to check against, -and this is done when calling them as a function `s(amp::AbstractManoptProblem, ams::AbstractManoptSolverState)`, +The stopping criteria `s` might have certain internal values/fields it uses to verify against. +This is done when calling them as a function `s(amp::AbstractManoptProblem, ams::AbstractManoptSolverState)`, where the [`AbstractManoptProblem`](@ref) and the [`AbstractManoptSolverState`](@ref) together represent the current state of the solver. The functor returns either `false` when the stopping criterion is not fulfilled or `true` otherwise. One field all criteria should have is the `s.reason`, a string giving the reason to stop, see [`get_reason`](@ref). -## Stopping Criteria +## Generic stopping criteria The following generic stopping criteria are available. Some require that, for example, -the corresponding [`AbstractManoptSolverState`](@ref) have a field `gradient` when the criterion should check that. +the corresponding [`AbstractManoptSolverState`](@ref) have a field `gradient` when the criterion should access that. Further stopping criteria might be available for individual solvers. @@ -32,9 +32,9 @@ Order = [:type] Filter = t -> t != StoppingCriterion && t != StoppingCriterionSet ``` -## Functions for Stopping Criteria +## Functions for stopping criteria -There are a few functions to update, combine and modify stopping criteria, especially to update internal values even for stopping criteria already being used within an [`AbstractManoptSolverState`](@ref) structure. +There are a few functions to update, combine, and modify stopping criteria, especially to update internal values even for stopping criteria already being used within an [`AbstractManoptSolverState`](@ref) structure. ```@autodocs Modules = [Manopt] diff --git a/docs/src/solvers/ChambollePock.md b/docs/src/solvers/ChambollePock.md index 0fc28d8b95..930c22cddf 100644 --- a/docs/src/solvers/ChambollePock.md +++ b/docs/src/solvers/ChambollePock.md @@ -1,6 +1,6 @@ # [The Riemannian Chambolle-Pock Algorithm](@id ChambollePockSolver) -The Riemannian Chambolle–Pock is a generalization of the Chambolle–Pock algorithm [ChambollePock:2011](@citet*) +The Riemannian Chambolle—Pock is a generalization of the Chambolle—Pock algorithm [ChambollePock:2011](@citet*) It is also known as primal-dual hybrid gradient (PDHG) or primal-dual proximal splitting (PDPS) algorithm. In order to minimize over ``p∈\mathcal M`` the cost function consisting of @@ -14,14 +14,13 @@ F(p) + G(Λ(p)), where ``F:\mathcal M → \overline{ℝ}``, ``G:\mathcal N → \overline{ℝ}``, and ``Λ:\mathcal M →\mathcal N``. -If the manifolds ``\mathcal M`` or ``\mathcal N`` are not Hadamard, it has to be considered locally, -i.e. on geodesically convex sets ``\mathcal C \subset \mathcal M`` and ``\mathcal D \subset\mathcal N`` +If the manifolds ``\mathcal M`` or ``\mathcal N`` are not Hadamard, it has to be considered locally only, that is on geodesically convex sets ``\mathcal C \subset \mathcal M`` and ``\mathcal D \subset\mathcal N`` such that ``Λ(\mathcal C) \subset \mathcal D``. The algorithm is available in four variants: exact versus linearized (see `variant`) as well as with primal versus dual relaxation (see `relax`). For more details, see [BergmannHerzogSilvaLouzeiroTenbrinckVidalNunez:2021](@citet*). -In the following we note the case of the exact, primal relaxed Riemannian Chambolle–Pock algorithm. +In the following we note the case of the exact, primal relaxed Riemannian Chambolle—Pock algorithm. Given base points ``m∈\mathcal C``, ``n=Λ(m)∈\mathcal D``, initial primal and dual values ``p^{(0)} ∈\mathcal C``, ``ξ_n^{(0)} ∈T_n^*\mathcal N``, diff --git a/docs/src/solvers/DouglasRachford.md b/docs/src/solvers/DouglasRachford.md index 3a4dacf0f5..74a2c10f0d 100644 --- a/docs/src/solvers/DouglasRachford.md +++ b/docs/src/solvers/DouglasRachford.md @@ -1,6 +1,6 @@ -# [Douglas–Rachford Algorithm](@id DRSolver) +# [Douglas—Rachford Algorithm](@id DRSolver) -The (Parallel) Douglas–Rachford ((P)DR) Algorithm was generalized to Hadamard +The (Parallel) Douglas—Rachford ((P)DR) Algorithm was generalized to Hadamard manifolds in [BergmannPerschSteidl:2016](@cite). The aim is to minimize the sum diff --git a/docs/src/solvers/NelderMead.md b/docs/src/solvers/NelderMead.md index 9a0985c087..897aca2a5c 100644 --- a/docs/src/solvers/NelderMead.md +++ b/docs/src/solvers/NelderMead.md @@ -1,4 +1,4 @@ -# [Nelder Mead Method](@id NelderMeadSolver) +# [Nelder Mead method](@id NelderMeadSolver) ```@meta CurrentModule = Manopt @@ -21,7 +21,7 @@ CurrentModule = Manopt NelderMeadSimplex ``` -## Additional Stopping Criteria +## Additional stopping criteria ```@docs StopWhenPopulationConcentrated diff --git a/docs/src/solvers/adaptive-regularization-with-cubics.md b/docs/src/solvers/adaptive-regularization-with-cubics.md index ad6ab2cc9c..473f51b6cb 100644 --- a/docs/src/solvers/adaptive-regularization-with-cubics.md +++ b/docs/src/solvers/adaptive-regularization-with-cubics.md @@ -1,4 +1,4 @@ -# [Adaptive regularization with Cubics](@id ARSSection) +# [Adaptive regularization with cubics](@id ARSSection) @@ -21,13 +21,13 @@ AdaptiveRegularizationState There are several ways to approach the subsolver. The default is the first one. -## Lanczos Iteration +## Lanczos iteration ```@docs Manopt.LanczosState ``` -## (Conjugate) Gradient Descent +## (Conjugate) gradient descent There is a generic objective, that implements the sub problem @@ -42,12 +42,12 @@ arc_obj = AdaptiveRagularizationWithCubicsModelObjective(mho, σ) sub_problem = DefaultProblem(TangentSpaceAt(M,p), arc_obj) ``` -where `mho` is the hessian objective of `f` to solve. +where `mho` is the Hessian objective of `f` to solve. Then use this for the `sub_problem` keyword and use your favourite gradient based solver for the `sub_state` keyword, for example a [`ConjugateGradientDescentState`](@ref) -## Additional Stopping Criteria +## Additional stopping criteria ```@docs StopWhenAllLanczosVectorsUsed diff --git a/docs/src/solvers/index.md b/docs/src/solvers/index.md index 808bf7425f..820fe15ad2 100644 --- a/docs/src/solvers/index.md +++ b/docs/src/solvers/index.md @@ -18,9 +18,9 @@ The following algorithms are currently available [Chambolle-Pock](@ref ChambollePockSolver) | [`ChambollePock`](@ref), [`ChambollePockState`](@ref) (using [`TwoManifoldProblem`](@ref)) | ``f=F+G(Λ\cdot)``, ``\operatorname{prox}_{σ F}``, ``\operatorname{prox}_{τ G^*}``, ``Λ`` | [Conjugate Gradient Descent](@ref CGSolver) | [`conjugate_gradient_descent`](@ref), [`ConjugateGradientDescentState`](@ref) | ``f``, ``\operatorname{grad} f`` [Cyclic Proximal Point](@ref CPPSolver) | [`cyclic_proximal_point`](@ref), [`CyclicProximalPointState`](@ref) | ``f=\sum f_i``, ``\operatorname{prox}_{\lambda f_i}`` | -[Difference of Convex Algorithm](@ref DCASolver) | [`difference_of_convex_algorithm`](@ref), [`DifferenceOfConvexState`](@ref) | ``f=g-h``, ``∂h``, and e.g. ``g``, ``\operatorname{grad} g`` | -[Difference of Convex Proximal Point](@ref DCPPASolver) | [`difference_of_convex_proximal_point`](@ref), [`DifferenceOfConvexProximalState`](@ref) | ``f=g-h``, ``∂h``, and e.g. ``g``, ``\operatorname{grad} g`` | -[Douglas–Rachford](@ref DRSolver) | [`DouglasRachford`](@ref), [`DouglasRachfordState`](@ref) | ``f=\sum f_i``, ``\operatorname{prox}_{\lambda f_i}`` | +[Difference of Convex Algorithm](@ref DCASolver) | [`difference_of_convex_algorithm`](@ref), [`DifferenceOfConvexState`](@ref) | ``f=g-h``, ``∂h``, and for example ``g``, ``\operatorname{grad} g`` | +[Difference of Convex Proximal Point](@ref DCPPASolver) | [`difference_of_convex_proximal_point`](@ref), [`DifferenceOfConvexProximalState`](@ref) | ``f=g-h``, ``∂h``, and for example ``g``, ``\operatorname{grad} g`` | +[Douglas—Rachford](@ref DRSolver) | [`DouglasRachford`](@ref), [`DouglasRachfordState`](@ref) | ``f=\sum f_i``, ``\operatorname{prox}_{\lambda f_i}`` | [Exact Penalty Method](@ref ExactPenaltySolver) | [`exact_penalty_method`](@ref), [`ExactPenaltyMethodState`](@ref) | ``f``, ``\operatorname{grad} f``, ``g``, ``\operatorname{grad} g_i``, ``h``, ``\operatorname{grad} h_j`` | [Frank-Wolfe algorithm](@ref FrankWolfe) | [`Frank_Wolfe_method`](@ref), [`FrankWolfeState`](@ref) | sub-problem solver | [Gradient Descent](@ref GradientDescentSolver) | [`gradient_descent`](@ref), [`GradientDescentState`](@ref) | ``f``, ``\operatorname{grad} f`` | @@ -80,7 +80,7 @@ If you provide an immutable point `p` or the `rand(M)` point is immutable, like The third variant works in place of `p`, so it is mandatory. -This first interface would set up the objective and pass all keywords on the the +This first interface would set up the objective and pass all keywords on the objective based call. ### The objective-based call @@ -90,7 +90,7 @@ new_solver(M, obj, p=rand(M); kwargs...) new_solver!(M, obj, p; kwargs...) ``` -Here the objective would be created beforehand, e.g. to compare different solvers on the +Here the objective would be created beforehand for example to compare different solvers on the same objective, and for the first variant the start point is optional. Keyword arguments include decorators like `debug=` or `record=` as well as algorithm specific ones. diff --git a/docs/src/solvers/primal_dual_semismooth_Newton.md b/docs/src/solvers/primal_dual_semismooth_Newton.md index e007783d05..dba9cec9fb 100644 --- a/docs/src/solvers/primal_dual_semismooth_Newton.md +++ b/docs/src/solvers/primal_dual_semismooth_Newton.md @@ -10,11 +10,10 @@ F(p) + G(Λ(p)), where ``F:\mathcal M → \overline{ℝ}``, ``G:\mathcal N → \overline{ℝ}``, and ``Λ:\mathcal M →\mathcal N``. -If the manifolds ``\mathcal M`` or ``\mathcal N`` are not Hadamard, it has to be considered locally, -i.e. on geodesically convex sets ``\mathcal C \subset \mathcal M`` and ``\mathcal D \subset\mathcal N`` +If the manifolds ``\mathcal M`` or ``\mathcal N`` are not Hadamard, it has to be considered locally only, that is on geodesically convex sets ``\mathcal C \subset \mathcal M`` and ``\mathcal D \subset\mathcal N`` such that ``Λ(\mathcal C) \subset \mathcal D``. -The algorithm comes down to applying the Riemannian semismooth Newton method to the rewritten primal-dual optimality conditions, i.e., we define the vector field ``X: \mathcal{M} \times \mathcal{T}_{n}^{*} \mathcal{N} \rightarrow \mathcal{T} \mathcal{M} \times \mathcal{T}_{n}^{*} \mathcal{N}`` as +The algorithm comes down to applying the Riemannian semismooth Newton method to the rewritten primal-dual optimality conditions. We define the vector field ``X: \mathcal{M} \times \mathcal{T}_{n}^{*} \mathcal{N} \rightarrow \mathcal{T} \mathcal{M} \times \mathcal{T}_{n}^{*} \mathcal{N}`` as ```math X\left(p, \xi_{n}\right):=\left(\begin{array}{c} diff --git a/docs/src/solvers/quasi_Newton.md b/docs/src/solvers/quasi_Newton.md index a14e992344..a306e09225 100644 --- a/docs/src/solvers/quasi_Newton.md +++ b/docs/src/solvers/quasi_Newton.md @@ -11,7 +11,7 @@ ## Background -The aim is to minimize a real-valued function on a Riemannian manifold, i.e. +The aim is to minimize a real-valued function on a Riemannian manifold, that is ```math \min f(x), \quad x ∈ \mathcal{M}. @@ -49,7 +49,7 @@ For specific quasi-Newton updates, the fulfillment of the Riemannian curvature c g_{x_{k+1}}(s_k, y_k) > 0 ``` -holds, is a requirement for the inheritance of the self-adjointness and positive definiteness of the ``\mathcal{H}_k`` or ``\mathcal{B}_k`` to the operator ``\mathcal{H}_{k+1}`` or ``\mathcal{B}_{k+1}``. Unfortunately, the fulfillment of the Riemannian curvature condition is not given by a step size ``\alpha_k > 0`` that satisfies the generalized Wolfe conditions. However, in order to create a positive definite operator ``\mathcal{H}_{k+1}`` or ``\mathcal{B}_{k+1}`` in each iteration, the so-called locking condition was introduced in [Huang, Gallican, Absil, SIAM J. Optim., 2015](@cite HuangGallivanAbsil:2015), which requires that the isometric vector transport ``T^S``, which is used in the update formula, and its associate retraction ``R`` fulfill +holds, is a requirement for the inheritance of the self-adjointness and positive definiteness of the ``\mathcal{H}_k`` or ``\mathcal{B}_k`` to the operator ``\mathcal{H}_{k+1}`` or ``\mathcal{B}_{k+1}``. Unfortunately, the fulfillment of the Riemannian curvature condition is not given by a step size ``\alpha_k > 0`` that satisfies the generalized Wolfe conditions. However, in order to create a positive definite operator ``\mathcal{H}_{k+1}`` or ``\mathcal{B}_{k+1}`` in each iteration, the so-called locking condition was introduced in [HuangGallivanAbsil:2015](@cite), which requires that the isometric vector transport ``T^S``, which is used in the update formula, and its associate retraction ``R`` fulfil ```math T^{S}{x, ξ_x}(ξ_x) = β T^{R}{x, ξ_x}(ξ_x), \quad β = \frac{\lVert ξ_x \rVert_x}{\lVert T^{R}{x, ξ_x}(ξ_x) \rVert_{R_{x}(ξ_x)}}, diff --git a/docs/src/solvers/truncated_conjugate_gradient_descent.md b/docs/src/solvers/truncated_conjugate_gradient_descent.md index 31da535568..f56594cc18 100644 --- a/docs/src/solvers/truncated_conjugate_gradient_descent.md +++ b/docs/src/solvers/truncated_conjugate_gradient_descent.md @@ -1,4 +1,4 @@ -# [Steihaug-Toint Truncated Conjugate-Gradient Method](@id tCG) +# [Steihaug-Toint truncated conjugate gradient method](@id tCG) Solve the constraint optimization problem on the tangent space @@ -27,7 +27,7 @@ Here ``\mathcal H_p`` is either the Hessian ``\operatorname{Hess} f(p)`` or a li TruncatedConjugateGradientState ``` -## Stopping Criteria +## Stopping criteria ```@docs StopWhenResidualIsReducedByFactorOrPower @@ -38,7 +38,7 @@ update_stopping_criterion!(::StopWhenResidualIsReducedByFactorOrPower, ::Val{:Re update_stopping_criterion!(::StopWhenResidualIsReducedByFactorOrPower, ::Val{:ResidualFactor}, ::Any) ``` -## Trust Region Model +## Trust region model ```@docs TrustRegionModelObjective diff --git a/docs/src/solvers/trust_regions.md b/docs/src/solvers/trust_regions.md index 8cf54a4c56..1291a259f9 100644 --- a/docs/src/solvers/trust_regions.md +++ b/docs/src/solvers/trust_regions.md @@ -1,4 +1,4 @@ -# [The Riemannian Trust-Regions Solver](@id trust_regions) +# [The Riemannian trust-regions solver](@id trust_regions) Minimize a function @@ -6,11 +6,11 @@ Minimize a function \operatorname*{\arg\,min}_{p ∈ \mathcal{M}}\ f(p) ``` -by using the Riemannian trust-regions solver following [AbsilBakerGallivan:2006](@cite), -i.e. by building a lifted model at the ``k``th iterate ``p_k`` by locally mapping the +by using the Riemannian trust-regions solver following [AbsilBakerGallivan:2006](@cite) a model is build by +lifting the objective at the ``k``th iterate ``p_k`` by locally mapping the cost function ``f`` to the tangent space as ``f_k: T_{p_k}\mathcal M → \mathbb R`` as ``f_k(X) = f(\operatorname{retr}_{p_k}(X))``. -We then define the trust region subproblem as +The trust region subproblem is then defined as ```math \operatorname*{arg\,min}_{X ∈ T_{p_k}\mathcal M}\ m_k(X), @@ -45,7 +45,7 @@ TrustRegionsState ## Approximation of the Hessian -We currently provide a few different methods to approximate the Hessian. +Several different methods to approximate the Hessian are available. ```@docs ApproxHessianFiniteDifference diff --git a/docs/src/tutorials/GeodesicRegression.md b/docs/src/tutorials/GeodesicRegression.md index 8681d2ce15..1856e10ed3 100644 --- a/docs/src/tutorials/GeodesicRegression.md +++ b/docs/src/tutorials/GeodesicRegression.md @@ -55,8 +55,8 @@ p^* = d^* - t^*X^* and hence the linear regression result is the line $γ_{p^*,X^*}(t) = p^* + tX^*$. -On a Riemannian manifold we can phrase this as an optimization problem on the [tangent bundle](https://en.wikipedia.org/wiki/Tangent_bundle), -i.e. the disjoint union of all tangent spaces, as +On a Riemannian manifold we can phrase this as an optimization problem on the [tangent bundle](https://en.wikipedia.org/wiki/Tangent_bundle), which is +the disjoint union of all tangent spaces, as ``` math \operatorname*{arg\,min}_{(p,X) \in \mathrm{T}\mathcal M} F(p,X) @@ -115,7 +115,7 @@ end ``` For the Euclidean case, the result is given by the first principal component of a principal component analysis, -see [PCR](https://en.wikipedia.org/wiki/Principal_component_regression), i.e. with $p^* = \frac{1}{n}\displaystyle\sum_{i=1}^n d_i$ +see [PCR](https://en.wikipedia.org/wiki/Principal_component_regression), which is given by $p^* = \frac{1}{n}\displaystyle\sum_{i=1}^n d_i$ and the direction $X^*$ is obtained by defining the zero mean data matrix ``` math @@ -218,7 +218,7 @@ inner( 0.002487393068917863 -But we also started with one of the best scenarios, i.e. equally spaced points on a geodesic obstructed by noise. +But we also started with one of the best scenarios of equally spaced points on a geodesic obstructed by noise. This gets worse if you start with less evenly distributed data @@ -350,7 +350,7 @@ geo_conn_highlighted2 = shortest_geodesic( ## Unlabeled Data -If we are not given time points $t_i$, then the optimization problem extends – informally speaking – +If we are not given time points $t_i$, then the optimization problem extends informally speaking, to also finding the “best fitting” (in the sense of smallest error). To formalize, the objective function here reads @@ -431,7 +431,7 @@ end ``` Finally, we additionally look for a fixed point $x=(p,X) ∈ \mathrm{T}\mathcal M$ at -the gradient with respect to $t∈\mathbb R^n$, i.e. the second component, which is given by +the gradient with respect to $t∈\mathbb R^n$, the second component, which is given by ``` math (\operatorname{grad}F_2(t))_i diff --git a/docs/src/tutorials/HowToDebug.md b/docs/src/tutorials/HowToDebug.md index fb5c1e3f6b..b5248f40b8 100644 --- a/docs/src/tutorials/HowToDebug.md +++ b/docs/src/tutorials/HowToDebug.md @@ -1,4 +1,4 @@ -# How to Print Debug Output +# How to print debug output Ronny Bergmann This tutorial aims to illustrate how to perform debug output. For that we consider an @@ -44,12 +44,12 @@ Any solver accepts the keyword `debug=`, which in the simplest case can be set t - the last number in the array is used with [`DebugEvery`](@ref) to print the debug only every $i$th iteration. - Any Symbol is converted into certain debug prints -Certain symbols starting with a capital letter are mapped to certain prints, e.g. `:Cost` is mapped to [`DebugCost`](@ref)`()` to print the current cost function value. A full list is provided in the [`DebugActionFactory`](@ref). +Certain symbols starting with a capital letter are mapped to certain prints, for example `:Cost` is mapped to [`DebugCost`](@ref)`()` to print the current cost function value. A full list is provided in the [`DebugActionFactory`](@ref). A special keyword is `:Stop`, which is only added to the final debug hook to print the stopping criterion. Any symbol with a small letter is mapped to fields of the [`AbstractManoptSolverState`](@ref) which is used. This way you can easily print internal data, if you know their names. -Let’s look at an example first: If we want to print the current iteration number, the current cost function value as well as the value `ϵ` from the [`ExactPenaltyMethodState`](@ref). To keep the amount of print at a reasonable level, we want to only print the debug every 25th iteration. +Let’s look at an example first: If we want to print the current iteration number, the current cost function value as well as the value `ϵ` from the [`ExactPenaltyMethodState`](@ref). To keep the amount of print at a reasonable level, we want to only print the debug every twentyfifth iteration. Then we can write @@ -68,13 +68,13 @@ p1 = exact_penalty_method( The value of the variable (ϵ) is smaller than or equal to its threshold (1.0e-6). The algorithm performed a step with a change (6.5347623783315016e-9) less than 1.0e-6. -## Advanced Debug output +## Advanced debug output There is two more advanced variants that can be used. The first is a tuple of a symbol and a string, where the string is used as the format print, that most [`DebugAction`](@ref)s have. The second is, to directly provide a `DebugAction`. We can for example change the way the `:ϵ` is printed by adding a format string and use [`DebugCost`](@ref)`()` which is equivalent to using `:Cost`. -Especially with the format change, the lines are more coniststent in length. +Especially with the format change, the lines are more consistent in length. ``` julia p2 = exact_penalty_method( @@ -91,7 +91,7 @@ p2 = exact_penalty_method( The value of the variable (ϵ) is smaller than or equal to its threshold (1.0e-6). The algorithm performed a step with a change (6.5347623783315016e-9) less than 1.0e-6. -You can also write your own [`DebugAction`](@ref) functor, where the function to implement has the same signature as the `step` function, that is an [`AbstractManoptProblem`](@ref), an [`AbstractManoptSolverState`](@ref), as well as the current iterate. For example the already mentioned \[`DebugDivider](@ref)`(s)\` is given as +You can also write your own [`DebugAction`](@ref) functor, where the function to implement has the same signature as the `step` function, that is an [`AbstractManoptProblem`](@ref), an [`AbstractManoptSolverState`](@ref), as well as the current iterate. For example the already mentioned[`DebugDivider`](@ref)`(s)` is given as ``` julia mutable struct DebugDivider{TIO<:IO} <: DebugAction @@ -107,7 +107,7 @@ end or you could implement that of course just for your specific problem or state. -## Subsolver Debug +## Subsolver debug most subsolvers have a `sub_kwargs` keyword, such that you can pass keywords to the sub solver as well. This works well if you do not plan to change the subsolver. If you do you can wrap your own `solver_state=` argument in a [`decorate_state!`](@ref) and pass a `debug=` password to this function call. Keywords in a keyword have to be passed as pairs (`:debug => [...]`). diff --git a/docs/src/tutorials/InplaceGradient.md b/docs/src/tutorials/InplaceGradient.md index e408fac188..9cc5872001 100644 --- a/docs/src/tutorials/InplaceGradient.md +++ b/docs/src/tutorials/InplaceGradient.md @@ -1,11 +1,11 @@ -# Speedup using Inplace Evaluation +# Speedup using in-place evaluation Ronny Bergmann When it comes to time critical operations, a main ingredient in Julia is given by -mutating functions, i.e. those that compute in place without additional memory +mutating functions, that is those that compute in place without additional memory allocations. In the following, we illustrate how to do this with `Manopt.jl`. -Let’s start with the same function as in [Get Started: Optimize!](https://manoptjl.org/stable/tutorials/Optimize!.html) +Let’s start with the same function as in [🏔️ Get started: optimize.](https://manoptjl.org/stable/tutorials/Optimize.html) and compute the mean of some points, only that here we use the sphere $\mathbb S^{30}$ and $n=800$ points. @@ -62,7 +62,7 @@ We can also benchmark this as Time (median): 49.552 ms ┊ GC (median): 5.41% Time (mean ± σ): 50.151 ms ± 1.731 ms ┊ GC (mean ± σ): 5.56% ± 0.64% - ▂▃ █▃▃▆ ▂ + ▂▃ █▃▃▆ ▂ ▅████████▅█▇█▄▅▇▁▅█▅▇▄▇▅▁▅▄▄▄▁▄▁▁▁▄▄▁▁▁▁▁▁▄▁▁▁▁▁▁▄▁▄▁▁▁▁▁▁▄ ▄ 48.3 ms Histogram: frequency by time 56.6 ms < @@ -97,7 +97,7 @@ end For the actual call to the solver, we first have to generate an instance of `GradF!` and tell the solver, that the gradient is provided in an [`InplaceEvaluation`](https://manoptjl.org/stable/plans/objective/#Manopt.InplaceEvaluation). -We can further also use [`gradient_descent!`](https://manoptjl.org/stable/solvers/gradient_descent/#Manopt.gradient_descent!) to even work inplace of the initial point we pass. +We can further also use [`gradient_descent!`](https://manoptjl.org/stable/solvers/gradient_descent/#Manopt.gradient_descent!) to even work in-place of the initial point we pass. ``` julia grad_f2! = GradF!(data, similar(data[1])) @@ -120,7 +120,7 @@ We can again benchmark this Time (median): 28.001 ms ┊ GC (median): 0.00% Time (mean ± σ): 28.412 ms ± 1.079 ms ┊ GC (mean ± σ): 0.73% ± 2.24% - ▁▅▇█▅▂▄ ▁ + ▁▅▇█▅▂▄ ▁ ▄▁███████▆█▇█▄▆▃▃▃▃▁▁▃▁▁▃▁▃▃▁▄▁▁▃▃▁▁▄▁▁▃▅▃▃▃▁▃▃▁▁▁▁▁▁▁▁▃▁▁▃ ▃ 27.4 ms Histogram: frequency by time 31.9 ms < diff --git a/docs/styles/Vocab/Manopt/accept.txt b/docs/styles/Vocab/Manopt/accept.txt new file mode 100644 index 0000000000..daa3167ffb --- /dev/null +++ b/docs/styles/Vocab/Manopt/accept.txt @@ -0,0 +1,68 @@ +Absil +Adagrad +Adjoint +adjoint +Armijo +Bergmann +Chambolle +Constantin +Diepeveen +Dornig +Douglas +cubic +Frobenius +functor +geodesically +Geomstats +Geoopt +Grassmann +Hadamard +Hessian +injectivity +interim +Jax +JuMP.jl +Lanczos +LineSearches.jl +Manifolds.jl +ManifoldsBase.jl +manopt +manopt.org +Manopt +Manopt.jl +Munkvold +Mead +Nelder +parametrising +Parametrising +Pock +preconditioner +prox +pullback +pushforward +Rachford +Ravn +reimplement +representer +Riemannian +Riemer +Riemopt +Riesz +Rosenbrock +Steihaug +Stiefel +semismooth +Stephansen +stepsize +Stepsize +Subgradient +subgradient +Subsolver +subsolver +summand +supertype +th +Tom-Christian +Toint +Willem +vectorial \ No newline at end of file diff --git a/joss/paper.md b/joss/paper.md index 861cbb4249..fed7814785 100644 --- a/joss/paper.md +++ b/joss/paper.md @@ -67,7 +67,7 @@ In the current version 0.3.17 of `Manopt.jl` the following algorithms are availa * Conjugate Gradient Descent ([`conjugate_gradient_descent`](https://manoptjl.org/v0.3/solvers/conjugate_gradient_descent.html)), which includes eight direction update rules using the `coefficient` keyword: [`SteepestDirectionUpdateRule`](https://manoptjl.org/v0.3/solvers/conjugate_gradient_descent.html#Manopt.SteepestDirectionUpdateRule), [`ConjugateDescentCoefficient`](https://manoptjl.org/v0.3/solvers/conjugate_gradient_descent.html#Manopt.ConjugateDescentCoefficient). [`DaiYuanCoefficient`](https://manoptjl.org/v0.3/solvers/conjugate_gradient_descent.html#Manopt.DaiYuanCoefficient), [`FletcherReevesCoefficient`](https://manoptjl.org/v0.3/solvers/conjugate_gradient_descent.html#Manopt.FletcherReevesCoefficient), [`HagerZhangCoefficient`](https://manoptjl.org/v0.3/solvers/conjugate_gradient_descent.html#Manopt.HagerZhangCoefficient), [`HeestenesStiefelCoefficient`](https://manoptjl.org/v0.3/solvers/conjugate_gradient_descent.html#Manopt.HeestenesStiefelCoefficient), [`LiuStoreyCoefficient`](https://manoptjl.org/v0.3/solvers/conjugate_gradient_descent.html#Manopt.LiuStoreyCoefficient), and [`PolakRibiereCoefficient`](https://manoptjl.org/v0.3/solvers/conjugate_gradient_descent.html#Manopt.PolakRibiereCoefficient) * Cyclic Proximal Point ([`cyclic_proximal_point`](https://manoptjl.org/v0.3/solvers/cyclic_proximal_point.html)) [@Bacak:2014:1] -* (parallel) Douglas–Rachford ([`DouglasRachford`](https://manoptjl.org/v0.3/solvers/DouglasRachford.html)) [@BergmannPerschSteidl:2016:1] +* (parallel) Douglas—Rachford ([`DouglasRachford`](https://manoptjl.org/v0.3/solvers/DouglasRachford.html)) [@BergmannPerschSteidl:2016:1] * Gradient Descent ([`gradient_descent`](https://manoptjl.org/v0.3/solvers/gradient_descent.html)), including direction update rules ([`IdentityUpdateRule`](https://manoptjl.org/v0.3/solvers/gradient_descent.html#Manopt.IdentityUpdateRule) for the classical gradient descent) to perform [`MomentumGradient`](https://manoptjl.org/v0.3/solvers/gradient_descent.html#Manopt.MomentumGradient), [`AverageGradient`](https://manoptjl.org/v0.3/solvers/gradient_descent.html#Manopt.AverageGradient), and [`Nesterov`](https://manoptjl.org/v0.3/solvers/gradient_descent.html#Manopt.Nesterov) types * Nelder-Mead ([`NelderMead`](https://manoptjl.org/v0.3/solvers/NelderMead.html)) * Particle-Swarm Optimization ([`particle_swarm`](https://manoptjl.org/v0.3/solvers/particle_swarm.html)) [@BorckmansIshtevaAbsil2010] diff --git a/src/plans/primal_dual_plan.jl b/src/plans/primal_dual_plan.jl index c14571a3de..6d1177f257 100644 --- a/src/plans/primal_dual_plan.jl +++ b/src/plans/primal_dual_plan.jl @@ -679,7 +679,7 @@ function dual_residual( throw( DomainError( apds.variant, - "Unknown Chambolle–Pock variant, allowed are `:exact` or `:linearized`.", + "Unknown Chambolle—Pock variant, allowed are `:exact` or `:linearized`.", ), ) end diff --git a/src/solvers/ChambollePock.jl b/src/solvers/ChambollePock.jl index 6ae6fb740e..006849d29a 100644 --- a/src/solvers/ChambollePock.jl +++ b/src/solvers/ChambollePock.jl @@ -173,7 +173,7 @@ end evaluation=AllocatingEvaluation() ) -Perform the Riemannian Chambolle–Pock algorithm. +Perform the Riemannian Chambolle—Pock algorithm. Given a `cost` function ``\mathcal E:\mathcal M → ℝ`` of the form ```math @@ -260,7 +260,7 @@ end @doc raw""" ChambollePock(M, N, cost, x0, ξ0, m, n, prox_F, prox_G_dual, adjoint_linear_operator) -Perform the Riemannian Chambolle–Pock algorithm in place of `x`, `ξ`, and potentially `m`, +Perform the Riemannian Chambolle—Pock algorithm in place of `x`, `ξ`, and potentially `m`, `n` if they are not fixed. See [`ChambollePock`](@ref) for details and optional parameters. """ function ChambollePock!( diff --git a/src/solvers/DouglasRachford.jl b/src/solvers/DouglasRachford.jl index d66bf0d10e..ab96485475 100644 --- a/src/solvers/DouglasRachford.jl +++ b/src/solvers/DouglasRachford.jl @@ -175,8 +175,8 @@ If you provide a [`ManifoldProximalMapObjective`](@ref) `mpo` instead, the proxi a [`StoppingCriterion`](@ref). * `parallel` – (`false`) clarify that we are doing a parallel DR, i.e. on a `PowerManifold` manifold with two proxes. This can be used to trigger - parallel Douglas–Rachford if you enter with two proxes. Keep in mind, that a - parallel Douglas–Rachford implicitly works on a `PowerManifold` manifold and + parallel Douglas—Rachford if you enter with two proxes. Keep in mind, that a + parallel Douglas—Rachford implicitly works on a `PowerManifold` manifold and its first argument is the result then (assuming all are equal after the second prox. diff --git a/tutorials/AutomaticDifferentiation.qmd b/tutorials/AutomaticDifferentiation.qmd index 2721ae2fb2..1e233667f6 100644 --- a/tutorials/AutomaticDifferentiation.qmd +++ b/tutorials/AutomaticDifferentiation.qmd @@ -10,8 +10,8 @@ While by default we use [FiniteDifferences.jl](https://juliadiff.org/FiniteDiffe In this tutorial we will take a look at a few possibilities to approximate or derive the gradient of a function $f:\mathcal M \to ℝ$ on a Riemannian manifold, without computing it yourself. There are mainly two different philosophies: -1. Working _instrinsically_, i.e. staying on the manifold and in the tangent spaces. Here, we will consider approximating the gradient by forward differences. -2. Working in an embedding – there we can use all tools from functions on Euclidean spaces – finite differences or automatic differenciation – and then compute the corresponding Riemannian gradient from there. +1. Working _intrinsically_, that is staying on the manifold and in the tangent spaces. Here, we will consider approximating the gradient by forward differences. +2. Working in an embedding where all tools from functions on Euclidean spaces can be used, like finite differences or automatic differentiation, and then compute the corresponding Riemannian gradient from there. ```{julia} #| echo: false @@ -34,7 +34,7 @@ Random.seed!(42); A first idea is to generalize (multivariate) finite differences to Riemannian manifolds. Let $X_1,\ldots,X_d ∈ T_p\mathcal M$ denote an orthonormal basis of the tangent space $T_p\mathcal M$ at the point $p∈\mathcal M$ on the Riemannian manifold. -We can generalize the notion of a directional derivative, i.e. for the “direction” $Y∈T_p\mathcal M$. Let $c\colon [-ε,ε]$, $ε>0$, be a curve with $c(0) = p$, $\dot c(0) = Y$, e.g. $c(t)= \exp_p(tY)$. We obtain +We can generalize the notion of a directional derivative to a “direction” $Y∈T_p\mathcal M$. Let $c\colon [-ε,ε]$, $ε>0$, be a curve with $c(0) = p$, $\dot c(0) = Y$, for example $c(t)= \exp_p(tY)$. We obtain ```math Df(p)[Y] = \left. \frac{d}{dt} \right|_{t=0} f(c(t)) = \lim_{t \to 0} \frac{1}{t}(f(\exp_p(tY))-f(p)) @@ -46,7 +46,7 @@ We can approximate $Df(p)[X]$ by a finite difference scheme for an $h>0$ as DF(p)[Y] ≈ G_h(Y) := \frac{1}{h}(f(\exp_p(hY))-f(p)) ``` -Furthermore the gradient $\operatorname{grad}f$ is the Riesz representer of the differential, ie. +Furthermore the gradient $\operatorname{grad}f$ is the Riesz representer of the differential: ```math Df(p)[Y] = g_p(\operatorname{grad}f(p), Y),\qquad \text{ for all } Y ∈ T_p\mathcal M @@ -141,7 +141,7 @@ or in words: we have to change the Riesz representer of the (restricted/projecte ### A Continued Example -We continue with the Rayleigh Quotient from before, now just starting with the defintion of the Euclidean case in the embedding, the function $F$. +We continue with the Rayleigh Quotient from before, now just starting with the definition of the Euclidean case in the embedding, the function $F$. ```{julia} F(x) = x' * A * x / (x' * x); @@ -165,11 +165,11 @@ X3 = grad_f2_AD(M, p) norm(M, p, X1 - X3) ``` -### An Example for a Nonisometrically Embedded Manifold +### An Example for a Non-isometrically Embedded Manifold on the manifold $\mathcal P(3)$ of symmetric positive definite matrices. -The following function computes (half) the distance squared (with respect to the linear affine metric) on the manifold $\mathcal P(3)$ to the identity, i.e. $I_3$. Denoting the unit matrix we consider the function +The following function computes (half) the distance squared (with respect to the linear affine metric) on the manifold $\mathcal P(3)$ to the identity matrix $I_3$. Denoting the unit matrix we consider the function ```math G(q) @@ -214,7 +214,7 @@ end G1 = grad_G_FD(N, q) ``` -Now, we can again compare this to the (known) solution of the gradient, namely the gradient of (half of) the distance squared, i.e. $G(q) = \frac{1}{2}d^2_{\mathcal P(3)}(q,I_3)$ is given by $\operatorname{grad} G(q) = -\operatorname{log}_q I_3$, where $\operatorname{log}$ is the [logarithmic map](https://juliamanifolds.github.io/Manifolds.jl/latest/manifolds/symmetricpositivedefinite.html#Base.log-Tuple{SymmetricPositiveDefinite,%20Vararg{Any,%20N}%20where%20N}) on the manifold. +Now, we can again compare this to the (known) solution of the gradient, namely the gradient of (half of) the distance squared $G(q) = \frac{1}{2}d^2_{\mathcal P(3)}(q,I_3)$ is given by $\operatorname{grad} G(q) = -\operatorname{log}_q I_3$, where $\operatorname{log}$ is the [logarithmic map](https://juliamanifolds.github.io/Manifolds.jl/latest/manifolds/symmetricpositivedefinite.html#Base.log-Tuple{SymmetricPositiveDefinite,%20Vararg{Any,%20N}%20where%20N}) on the manifold. ```{julia} G2 = -log(N, q, Matrix{Float64}(I, 3, 3)) diff --git a/tutorials/ConstrainedOptimization.qmd b/tutorials/ConstrainedOptimization.qmd index 339b2934f6..4cd4659397 100644 --- a/tutorials/ConstrainedOptimization.qmd +++ b/tutorials/ConstrainedOptimization.qmd @@ -16,7 +16,7 @@ A constraint optimisation problem is given by &\quad h(p) = 0,\\ \end{align*} ``` -where $f\colon \mathcal M → ℝ$ is a cost function, and $g\colon \mathcal M → ℝ^m$ and $h\colon \mathcal M → ℝ^n$ are the inequality and equality constraints, respectively. The $\leq$ and $=$ in (P) are meant elementwise. +where $f\colon \mathcal M → ℝ$ is a cost function, and $g\colon \mathcal M → ℝ^m$ and $h\colon \mathcal M → ℝ^n$ are the inequality and equality constraints, respectively. The $\leq$ and $=$ in (P) are meant element-wise. This can be seen as a balance between moving constraints into the geometry of a manifold $\mathcal M$ and keeping some, since they can be handled well in algorithms, see [BergmannHerzog:2019](@cite), [LiuBoumal:2019](@cite) for details. @@ -34,7 +34,7 @@ using Distributions, LinearAlgebra, Manifolds, Manopt, Random Random.seed!(42); ``` -In this tutorial we want to look at different ways to specify the problem and its implications. We start with specifying an example problems to illustrayte the different available forms. +In this tutorial we want to look at different ways to specify the problem and its implications. We start with specifying an example problems to illustrate the different available forms. We will consider the problem of a Nonnegative PCA, cf. Section 5.1.2 in [LiuBoumal:2019](@cite) @@ -44,7 +44,7 @@ let $v_0 ∈ ℝ^d$, $\lVert v_0 \rVert=1$ be given spike signal, that is a sign Z = \sqrt{σ} v_0v_0^{\mathrm{T}}+N, ``` -where $\sigma$ is a signal-to-noise ratio and $N$ is a matrix with random entries, where the diagonal entries are distributed with zero mean and standard deviation $1/d$ on the off-diagonals and $2/d$ on the daigonal +where $\sigma$ is a signal-to-noise ratio and $N$ is a matrix with random entries, where the diagonal entries are distributed with zero mean and standard deviation $1/d$ on the off-diagonals and $2/d$ on the diagonal ```{julia} d = 150; # dimension of v0 @@ -71,7 +71,7 @@ or in the previous notation $f(p) = -p^{\mathrm{T}}Zp^{\mathrm{T}}$ and $g(p) = M = Sphere(d - 1) ``` -## A first Augmented Lagrangian Run +## A first augmented Lagrangian run We first defined $f$ and $g$ as usual functions @@ -134,10 +134,9 @@ f(M, v1) maximum( g(M, v1) ) ``` -## A faster Augmented Lagrangian Run +## A faster augmented Lagrangian run - -Now this is a little slow, so we can modify two things, that we will directly do both – but one could also just change one of these – : +Now this is a little slow, so we can modify two things: 1. Gradients should be evaluated in place, so for example @@ -183,9 +182,9 @@ maximum(g(M, v2)) These are the very similar to the previous values but the solver took much less time and less memory allocations. -## Exact Penalty Method +## Exact penalty method -As a second solver, we have the [Exact Penalty Method](https://manoptjl.org/stable/solvers/exact_penalty_method/), which currenlty is available with two smoothing variants, which make an inner solver for smooth optimisationm, that is by default again [quasi Newton] possible: +As a second solver, we have the [Exact Penalty Method](https://manoptjl.org/stable/solvers/exact_penalty_method/), which currently is available with two smoothing variants, which make an inner solver for smooth optimization, that is by default again [quasi Newton] possible: [`LogarithmicSumOfExponentials`](https://manoptjl.org/stable/solvers/exact_penalty_method/#Manopt.LogarithmicSumOfExponentials) and [`LinearQuadraticHuber`](https://manoptjl.org/stable/solvers/exact_penalty_method/#Manopt.LinearQuadraticHuber). We compare both here as well. The first smoothing technique is the default, so we can just call @@ -227,9 +226,9 @@ f(M, v4) maximum(g(M, v4)) ``` -## Comparing to the unconstraint solver +## Comparing to the unconstrained solver -We can compare this to the _global_ optimum on the sphere, which is the unconstraint optimisation problem; we can just use Quasi Newton. +We can compare this to the _global_ optimum on the sphere, which is the unconstrained optimisation problem, where we can just use Quasi Newton. Note that this is much faster, since every iteration of the algorithms above does a quasi-Newton call as well. @@ -243,7 +242,7 @@ Note that this is much faster, since every iteration of the algorithms above doe f(M, w1) ``` -But for sure here the constraints here are not fulfilled and we have veru positive entries in $g(w_1)$ +But for sure here the constraints here are not fulfilled and we have quite positive entries in $g(w_1)$ ```{julia} maximum(g(M, w1)) diff --git a/tutorials/CountAndCache.qmd b/tutorials/CountAndCache.qmd index 623ef40c2b..f1b29621a4 100644 --- a/tutorials/CountAndCache.qmd +++ b/tutorials/CountAndCache.qmd @@ -1,9 +1,9 @@ --- -title: "How to Count and Cache Function Calls" +title: "How to count and cache function calls" author: Ronny Bergmann --- -In this tutorial, we want to investigate the caching and counting (i.e. statistics) features +In this tutorial, we want to investigate the caching and counting (statistics) features of [Manopt.jl](https://manoptjl.org). We will reuse the optimization tasks from the introductory tutorial [Get Started: Optimize!](https://manoptjl.org/stable/tutorials/Optimize!.html). @@ -164,7 +164,7 @@ But since both the cost and the gradient require the computation of the matrix-v ### The [`ManifoldCostGradientObjective`](@ref) approach -The [`ManifoldCostGradientObjective`](@ref) uses a combined function to compute both the gradient and the cost at the same time. We define the inplace variant as +The [`ManifoldCostGradientObjective`](@ref) uses a combined function to compute both the gradient and the cost at the same time. We define the in-place variant as ```{julia} function g_grad_g!(M::AbstractManifold, X, p) @@ -222,7 +222,7 @@ An alternative to the previous approach is the usage of a functor that introduce of the result of computing `A*p`. We additionally have to store `p` though, since we have to check that we are still evaluating the cost and/or gradient at the same point at which the cached `A*p` was computed. -We again consider the (more efficient) inplace variant. +We again consider the (more efficient) in-place variant. This can be done as follows ```{julia} @@ -338,5 +338,5 @@ it is about the same effort both time and allocation-wise. ## Summary While the second approach of [`ManifoldCostGradientObjective`](@ref) is very easy to implement, both the storage and the (local) cache approach are more efficient. -All three are an improvement over the first implementation without sharing interms results. -The results with storage or cache have further advantage of being more flexible, i.e. the stored information could also be reused in a third function, for example when also computing the Hessian. \ No newline at end of file +All three are an improvement over the first implementation without sharing interims results. +The results with storage or cache have further advantage of being more flexible, since the stored information could also be reused in a third function, for example when also computing the Hessian. \ No newline at end of file diff --git a/tutorials/EmbeddingObjectives.qmd b/tutorials/EmbeddingObjectives.qmd index dada11ba4a..e4a0a17469 100644 --- a/tutorials/EmbeddingObjectives.qmd +++ b/tutorials/EmbeddingObjectives.qmd @@ -7,7 +7,7 @@ Specifying a cost function $f\colon \mathcal M \to \mathbb R$ on a manifold is usually the model one starts with. Specifying its gradient $\operatorname{grad} f\colon\mathcal M \to T\mathcal M$, or more precisely $\operatorname{grad}f(p) \in T_p\mathcal M$, and eventually a Hessian $\operatorname{Hess} f\colon T_p\mathcal M \to T_p\mathcal M$ are then necessary to perform optimization. Since these might be challenging to compute, especially when manifolds and differential geometry are not -the main area of a user – easier to use methods might be welcome. +the main area of a user, easier to use methods might be welcome. This tutorial discusses how to specify $f$ in the embedding as $\tilde f$, maybe only locally around the manifold, and use the Euclidean gradient $∇ \tilde f$ and Hessian $∇^2 \tilde f$ within `Manopt.jl`. @@ -86,12 +86,12 @@ and the [`check_Hessian`](@ref), which requires a bit more tolerance in its line check_Hessian(M, f, grad_f, Hess_f; plot=true, throw_error=true, atol=1e-15) ``` -While they look reasonable here and were already derived – for the general case this derivation +While they look reasonable here and were already derived, for the general case this derivation might be more complicated. Luckily there exist two functions in [`ManifoldDiff.jl`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/) that are implemented for several manifolds from [`Manifolds.jl`](https://github.com/JuliaManifolds/Manifolds.jl), namely [`riemannian_gradient`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/library/#ManifoldDiff.riemannian_gradient-Tuple{AbstractManifold,%20Any,%20Any})`(M, p, eG)` that converts a Riemannian gradient -`eG=`$\nabla \tilde f(p)$ into a the Riemannain one $\operatorname{grad} f(p)$ +`eG=`$\nabla \tilde f(p)$ into a the Riemannian one $\operatorname{grad} f(p)$ and [`riemannian_Hessian`](https://juliamanifolds.github.io/ManifoldDiff.jl/stable/library/#ManifoldDiff.riemannian_Hessian-Tuple{AbstractManifold,%20Any,%20Any,%20Any,%20Any})`(M, p, eG, eH, X)` which converts the Euclidean Hessian `eH=`$\nabla^2 \tilde f(p)[X]$ into $\operatorname{Hess} f(p)[X]$, where we also require the Euclidean gradient `eG=`$\nabla \tilde f(p)$. @@ -181,7 +181,7 @@ distance(M, q1, q2) This conversion also works for the gradients of constraints, and is passed down to -subsolvers by deault when these are created using the Euclidean objective $f$, $\nabla f$ and $\nabla^2 f$. +subsolvers by default when these are created using the Euclidean objective $f$, $\nabla f$ and $\nabla^2 f$. ## Summary diff --git a/tutorials/GeodesicRegression.qmd b/tutorials/GeodesicRegression.qmd index 4f9906784e..e5baa82b02 100644 --- a/tutorials/GeodesicRegression.qmd +++ b/tutorials/GeodesicRegression.qmd @@ -98,8 +98,8 @@ p^* = d^* - t^*X^* and hence the linear regression result is the line $γ_{p^*,X^*}(t) = p^* + tX^*$. -On a Riemannian manifold we can phrase this as an optimization problem on the [tangent bundle](https://en.wikipedia.org/wiki/Tangent_bundle), -i.e. the disjoint union of all tangent spaces, as +On a Riemannian manifold we can phrase this as an optimization problem on the [tangent bundle](https://en.wikipedia.org/wiki/Tangent_bundle), which is +the disjoint union of all tangent spaces, as ```math \operatorname*{arg\,min}_{(p,X) \in \mathrm{T}\mathcal M} F(p,X) @@ -158,7 +158,7 @@ end ``` For the Euclidean case, the result is given by the first principal component of a principal component analysis, -see [PCR](https://en.wikipedia.org/wiki/Principal_component_regression), i.e. with $p^* = \frac{1}{n}\displaystyle\sum_{i=1}^n d_i$ +see [PCR](https://en.wikipedia.org/wiki/Principal_component_regression) which is given by $p^* = \frac{1}{n}\displaystyle\sum_{i=1}^n d_i$ and the direction $X^*$ is obtained by defining the zero mean data matrix ```math @@ -261,7 +261,7 @@ inner( ) ``` -But we also started with one of the best scenarios, i.e. equally spaced points on a geodesic obstructed by noise. +But we also started with one of the best scenarios of equally spaced points on a geodesic obstructed by noise. This gets worse if you start with less evenly distributed data @@ -336,7 +336,7 @@ render_asymptote(img_folder * "/regression_result2.asy"; render=render_size); ## Unlabeled Data -If we are not given time points $t_i$, then the optimization problem extends – informally speaking – +If we are not given time points $t_i$, then the optimization problem extends, informally speaking, to also finding the “best fitting” (in the sense of smallest error). To formalize, the objective function here reads @@ -415,7 +415,7 @@ end ``` Finally, we addionally look for a fixed point $x=(p,X) ∈ \mathrm{T}\mathcal M$ at -the gradient with respect to $t∈\mathbb R^n$, i.e. the second component, which is given by +the gradient with respect to $t∈\mathbb R^n$, the second component, which is given by ```math (\operatorname{grad}F_2(t))_i diff --git a/tutorials/HowToDebug.qmd b/tutorials/HowToDebug.qmd index 4d703c6b66..fd460df2e2 100644 --- a/tutorials/HowToDebug.qmd +++ b/tutorials/HowToDebug.qmd @@ -1,5 +1,5 @@ --- -title: "How to Print Debug Output" +title: "How to print debug output" author: Ronny Bergmann --- @@ -56,12 +56,12 @@ Any solver accepts the keyword `debug=`, which in the simplest case can be set t * the last number in the array is used with [`DebugEvery`](@ref) to print the debug only every $i$th iteration. * Any Symbol is converted into certain debug prints -Certain symbols starting with a capital letter are mapped to certain prints, e.g. `:Cost` is mapped to [`DebugCost`](@ref)`()` to print the current cost function value. A full list is provided in the [`DebugActionFactory`](@ref). +Certain symbols starting with a capital letter are mapped to certain prints, for example `:Cost` is mapped to [`DebugCost`](@ref)`()` to print the current cost function value. A full list is provided in the [`DebugActionFactory`](@ref). A special keyword is `:Stop`, which is only added to the final debug hook to print the stopping criterion. Any symbol with a small letter is mapped to fields of the [`AbstractManoptSolverState`](@ref) which is used. This way you can easily print internal data, if you know their names. -Let's look at an example first: If we want to print the current iteration number, the current cost function value as well as the value `ϵ` from the [`ExactPenaltyMethodState`](@ref). To keep the amount of print at a reasonable level, we want to only print the debug every 25th iteration. +Let's look at an example first: If we want to print the current iteration number, the current cost function value as well as the value `ϵ` from the [`ExactPenaltyMethodState`](@ref). To keep the amount of print at a reasonable level, we want to only print the debug every twentyfifth iteration. Then we can write @@ -72,13 +72,13 @@ p1 = exact_penalty_method( ); ``` -## Advanced Debug output +## Advanced debug output There is two more advanced variants that can be used. The first is a tuple of a symbol and a string, where the string is used as the format print, that most [`DebugAction`](@ref)s have. The second is, to directly provide a `DebugAction`. We can for example change the way the `:ϵ` is printed by adding a format string and use [`DebugCost`](@ref)`()` which is equivalent to using `:Cost`. -Especially with the format change, the lines are more coniststent in length. +Especially with the format change, the lines are more consistent in length. ```{julia} @@ -88,7 +88,7 @@ p2 = exact_penalty_method( ); ``` -You can also write your own [`DebugAction`](@ref) functor, where the function to implement has the same signature as the `step` function, that is an [`AbstractManoptProblem`](@ref), an [`AbstractManoptSolverState`](@ref), as well as the current iterate. For example the already mentioned [`DebugDivider](@ref)`(s)` is given as +You can also write your own [`DebugAction`](@ref) functor, where the function to implement has the same signature as the `step` function, that is an [`AbstractManoptProblem`](@ref), an [`AbstractManoptSolverState`](@ref), as well as the current iterate. For example the already mentioned[`DebugDivider`](@ref)`(s)` is given as ```{julia} #| eval: false @@ -105,7 +105,7 @@ end or you could implement that of course just for your specific problem or state. -## Subsolver Debug +## Subsolver debug most subsolvers have a `sub_kwargs` keyword, such that you can pass keywords to the sub solver as well. This works well if you do not plan to change the subsolver. If you do you can wrap your own `solver_state=` argument in a [`decorate_state!`](@ref) and pass a `debug=` password to this function call. Keywords in a keyword have to be passed as pairs (`:debug => [...]`). diff --git a/tutorials/HowToRecord.qmd b/tutorials/HowToRecord.qmd index b940739dd7..02a291c367 100644 --- a/tutorials/HowToRecord.qmd +++ b/tutorials/HowToRecord.qmd @@ -1,5 +1,5 @@ --- -title: "How to Record Data During the Iterations" +title: "How to record data during the iterations" author: Ronny Bergmann --- @@ -80,8 +80,8 @@ To record more than one value, you can pass an array of a mix of symbols and [`R R2 = gradient_descent(M, f, grad_f, data[1]; record=[:Iteration, :Cost], return_state=true) ``` -Here, the symbol `:Cost` is mapped to using the [`RecordCost`](https://manoptjl.org/stable/plans/record/#Manopt.RecordCost) action. The same holds for `:Iteration` obiously records the current iteration number `i`. -To access these you can first extract the group of records (that is where the `:Iteration`s are recorded – note the plural) and then access the `:Cost` +Here, the symbol `:Cost` is mapped to using the [`RecordCost`](https://manoptjl.org/stable/plans/record/#Manopt.RecordCost) action. The same holds for `:Iteration` obviously records the current iteration number `i`. +To access these you can first extract the group of records (that is where the `:Iteration`s are recorded; note the plural) and then access the `:Cost` """ ```{julia} @@ -109,13 +109,14 @@ We can also pass a tuple as second argument to have our own order within the tup get_record(R2, :Iteration, (:Iteration, :Cost)) ``` -## A more Complex Example +## A more complex example To illustrate a complicated example let's record: + * the iteration number, cost and gradient field, but only every sixth iteration; * the iteration at which we stop. -We first generate the problem and the state, to also illustrate the low-level works when not using the high-level iterface [`gradient_descent`](https://manoptjl.org/stable/solvers/gradient_descent.html). +We first generate the problem and the state, to also illustrate the low-level works when not using the high-level interface [`gradient_descent`](https://manoptjl.org/stable/solvers/gradient_descent.html). ```{julia} p = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f)) @@ -126,7 +127,7 @@ s = GradientDescentState( ) ``` -We now first build a [`RecordGroup`](https://manoptjl.org/stable/plans/record/#Manopt.RecordGroup) to group the three entries we want to record per iteration. We then put this into a [`RecordEvery`](https://manoptjl.org/stable/plans/record/#Manopt.RecordEvery) to only record this every 6th iteration +We now first build a [`RecordGroup`](https://manoptjl.org/stable/plans/record/#Manopt.RecordGroup) to group the three entries we want to record per iteration. We then put this into a [`RecordEvery`](https://manoptjl.org/stable/plans/record/#Manopt.RecordEvery) to only record this every sixth iteration ```{julia} rI = RecordEvery( @@ -139,13 +140,13 @@ rI = RecordEvery( ) ``` -and for recodring the final iteration number +and for recording the final iteration number ```{julia} sI = RecordIteration() ``` -We now combine both into the [`RecordSolverState`](https://manoptjl.org/stable/plans/record/#Manopt.RecordSolverState) decorator. It acts completely the same as any [`AbstractManoptSolverState`](https://manoptjl.org/stable/plans/state/#Manopt.AbstractManoptSolverState) but records something in every iteration additionally. This is stored in a dictionary of [`RecordAction`](https://manoptjl.org/stable/plans/record/#Manopt.RecordAction)s, where `:Iteration` is the action (here the only every 6th iteration group) and the `sI` which is executed at stop. +We now combine both into the [`RecordSolverState`](https://manoptjl.org/stable/plans/record/#Manopt.RecordSolverState) decorator. It acts completely the same as any [`AbstractManoptSolverState`](https://manoptjl.org/stable/plans/state/#Manopt.AbstractManoptSolverState) but records something in every iteration additionally. This is stored in a dictionary of [`RecordAction`](https://manoptjl.org/stable/plans/record/#Manopt.RecordAction)s, where `:Iteration` is the action (here the only every sixth iteration group) and the `sI` which is executed at stop. Note that the keyword `record=` in the high level interface `gradient_descent` only would fill the `:Iteration` symbol of said dictionary. @@ -189,7 +190,7 @@ function (c::MyCost)(M, x) end ``` -and we define an own, new [`RecordAction`](https://manoptjl.org/stable/plans/record/#Manopt.RecordAction), which is a functor, i.e. a struct that is also a function. The function we have to implement is similar to a single solver step in signature, since it might get called every iteration: +and we define an own, new [`RecordAction`](https://manoptjl.org/stable/plans/record/#Manopt.RecordAction), which is a functor, that is a struct that is also a function. The function we have to implement is similar to a single solver step in signature, since it might get called every iteration: ```{julia} mutable struct RecordCount <: RecordAction @@ -206,7 +207,7 @@ end ``` Now we can initialize the new cost and call the gradient descent. -Note that this illustrates also the last use case – you can pass symbol-action pairs into the `record=`array. +Note that this illustrates also the last use case since you can pass symbol-action pairs into the `record=`array. ```{julia} f2 = MyCost(data) @@ -244,7 +245,7 @@ R3[:Iteration, :Count] and we see that the cost function is called once per iteration. -If we use this counting cost and run the default gradient descent with Armijo linesearch, we can infer how many Armijo linesearch backtracks are preformed: +If we use this counting cost and run the default gradient descent with Armijo line search, we can infer how many Armijo line search backtracks are preformed: ```{julia} f3 = MyCost(data) @@ -267,4 +268,4 @@ R4 = gradient_descent( get_record(R4) ``` -We can see that the number of cost function calls varies, depending on how many linesearch backtrack steps were required to obtain a good stepsize. +We can see that the number of cost function calls varies, depending on how many line search backtrack steps were required to obtain a good stepsize. diff --git a/tutorials/ImplementASolver.qmd b/tutorials/ImplementASolver.qmd index 228e47eaba..9664fe45b5 100644 --- a/tutorials/ImplementASolver.qmd +++ b/tutorials/ImplementASolver.qmd @@ -8,7 +8,7 @@ tutorial [Get Started: Optimize!](https://manoptjl.org/stable/tutorials/Optimize you might come to the idea of implementing a solver yourself. After a short introduction of the algorithm we will implement, -this tutorial first discusses the structural details, i.e. what a solver consists of and “works with”. +this tutorial first discusses the structural details, for example what a solver consists of and “works with”. Afterwards, we will show how to implement the algorithm. Finally, we will discuss how to make the algorithm both nice for the user as well as initialized in a way, that it can benefit from features already available in `Manopt.jl`. @@ -55,16 +55,16 @@ We can run the following steps of the algorithm 2. set our best point $q = p^{(0)}$ 2. Repeat until a stopping criterion is fulfilled 1. Choose a random tangent vector $X^{(k)} \in T_{p^{(k)}}\mathcal M$ of length $\lVert X^{(k)} \rVert = \sigma$ - 2. “Walk” along this direction, i.e. $p^{(k+1)} = \operatorname{retr}_{p^{(k)}}(X^{(k)})$ + 2. “Walk” along this direction, that is $p^{(k+1)} = \operatorname{retr}_{p^{(k)}}(X^{(k)})$ 3. If $f(p^{(k+1)}) < f(q)$ set q = p^{(k+1)}$ as our new best visited point 4. Return $q$ as the resulting best point we visited -## Preliminaries – Elements a Solver works on +## Preliminaries: elements a solver works on There are two main ingredients a solver needs: a problem to work on and the state of a solver, which “identifies” the solver and stores intermediate results. -### The “Task” – An `AbstractManoptProblem` +### The “task”: an `AbstractManoptProblem` A problem in `Manopt.jl` usually consists of a manifold (an [`AbstractManifold`](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/types.html#The-AbstractManifold)) and an [`AbstractManifoldObjective`](@ref) describing the function we have and its features. @@ -73,12 +73,12 @@ In our case the objective is (just) a [`ManifoldCostObjective`](@ref) that store or any other information we have about our task. This is something independent of the solver itself, since it only identifies the problem we -want to solve independent of how we want to solve it – or in other words, this type contains +want to solve independent of how we want to solve it, or in other words, this type contains all information that is static and independent of the specific solver at hand. Usually the problems variable is called `mp`. -### The Solver – An `AbstractManoptSolverState` +### The solver: an `AbstractManoptSolverState` Everything that is needed by a solver during the iterations, all its parameters, interims values that are needed beyond just one iteration, is stored in a subtype of the @@ -90,7 +90,7 @@ In our case we want to store five things - the best visited point $q$ - the variable $\sigma > 0$ - the retraction $\operatorname{retr}$ to use (cf. [retractions and inverse retractions](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/retractions.html)) -- a criterion, when to stop, i.e. a [`StoppingCriterion`](@ref) +- a criterion, when to stop: a [`StoppingCriterion`](@ref) We can defined this as @@ -131,7 +131,7 @@ in `Manopt.jl` and provide an easy way to construct this state now. States usually have a shortened name as their variable, we will use `rws` for our state here. -## Implementing the Your solver +## Implementing your solver There is basically only two methods we need to implement for our solver @@ -158,9 +158,9 @@ If your choice is different, you need to reimplement - `get_iterate(rws)` to access the current iterate We recommend to follow the general scheme with the `stop` field. If you have specific criteria -when to stop, consider implementing your own [stoping criterion](https://manoptjl.org/stable/plans/stopping_criteria/) instead. +when to stop, consider implementing your own [stopping criterion](https://manoptjl.org/stable/plans/stopping_criteria/) instead. -### Initialization & Iterate Access +### Initialization and iterate access For our solver, there is not so much to initialize, just to be safe we should copy over the initial value in `p` we start with, to `q`. We do not have to care about remembering the iterate, @@ -202,7 +202,7 @@ We could also store the cost of `q` in the state, but we will see how to easily this solver to allow for [caching](https://manoptjl.org/stable/tutorials/CountAndCache/#How-to-Count-and-Cache-Function-Calls). In practice, however, it is preferable to cache intermediate values like cost of `q` in the state when it can be easily achieved. This way we do not have to deal with overheads of an external cache. -Now we can just run the solver already! We take the same example as for the other tutorials +Now we can just run the solver already. We take the same example as for the other tutorials We first define our task, the Riemannian Center of Mass from the [Get Started: Optimize!](https://manoptjl.org/stable/tutorials/Optimize!.html) tutorial. @@ -240,7 +240,7 @@ solve!(mp, s2) get_solver_result(s2) ``` -## Ease of Use I: The high level interface(s) +## Ease of use I: a high level interface `Manopt.jl` offers a few additional features for solvers in their high level interfaces, for example ``[`debug=` for debug](@ref DebugSection)``{=commonmark}, ``[`record=`](@ref RecordSection)``{=commonmark} keywords for debug and recording @@ -256,11 +256,11 @@ using Manopt: get_solver_return, indicates_convergence, status_summary ### A high level interface using the objective This could be considered as an interims step to the high-level interface: -If we already have the objective – in our case a [`ManifoldCostObjective`](@ref) at hand, the high level interface consists of the steps +If we already have the objective, in our case a [`ManifoldCostObjective`](@ref) at hand, the high level interface consists of the steps 1. possibly decorate the objective 2. generate the problem -3. generate and possiblz generate the state +3. generate and possibly generate the state 4. call the solver 5. determine the return value @@ -311,6 +311,7 @@ about the reason it stopped and whether this indicates convergence. Here it would for example look like ```{julia} +#| output: false import Base: show function show(io::IO, rws::RandomWalkState) i = get_count(rws, :Iterations) @@ -330,8 +331,8 @@ function show(io::IO, rws::RandomWalkState) end ``` -Now the algorithm can be easily called and provides – if wanted – all features of a `Manopt.jl` -algorithm. For example to see the summary, we could now just call +Now the algorithm can be easily called and provides all features of a `Manopt.jl` algorithm. +For example to see the summary, we could now just call ```{julia} q = random_walk_algorithm!(M, f; return_state=true) @@ -341,4 +342,5 @@ q = random_walk_algorithm!(M, f; return_state=true) We saw in this tutorial how to implement a simple cost-based algorithm, to illustrate how optimization algorithms are covered in `Manopt.jl`. -One feature we did not cover is that most algorithms allow for inplace and allocation functions, as soon as they work on more than just the cost, e.g. gradients, proximal maps or Hessians. This is usually a keyword argument of the objective and hence also part of the high-level interfaces. \ No newline at end of file +One feature we did not cover is that most algorithms allow for in-place and allocation functions, as soon as they work on more than just the cost, for example use gradients, proximal maps or Hessians. +This is usually a keyword argument of the objective and hence also part of the high-level interfaces. \ No newline at end of file diff --git a/tutorials/ImplementOwnManifold.qmd b/tutorials/ImplementOwnManifold.qmd index 78777eecda..60671d5728 100644 --- a/tutorials/ImplementOwnManifold.qmd +++ b/tutorials/ImplementOwnManifold.qmd @@ -1,5 +1,5 @@ --- -title: "Optimize on your own Manifold" +title: "Optimize on your own manifold" author: Ronny Bergmann --- @@ -22,7 +22,7 @@ is maybe not completely necessary. This tutorial aims to help you through these steps to implement necessary parts of a manifold to get started with the `[solver](@ref SolversSection)`{=commonmark} you have in mind. -## Our Example Problem +## An example problem We get started by loading the packages we need. @@ -72,8 +72,8 @@ struct ScaledSphere <: AbstractManifold{ℝ} end ``` -And we would like to compute a mean and/or median similar to [🏔️ Get Started: Optimize!](https://manoptjl.org/stable/tutorials/Optimize!.html), -i.e. given a set of points $q_1,\ldots,q_n$ we want to compute [Karcher:1977](@cite) +We would like to compute a mean and/or median similar to [🏔️ Get Started: Optimize!](https://manoptjl.org/stable/tutorials/Optimize!.html). +For given a set of points $q_1,\ldots,q_n$ we want to compute [Karcher:1977](@cite) ```math \operatorname*{arg\,min}_{p\in\mathcal M} @@ -102,9 +102,9 @@ pts = [ [zeros(d)..., M.radius] .+ 0.5.*([rand(d)...,0.5] .- 0.5) for _=1:N] pts = [ r/norm(p) .* p for p in pts] ``` -Then – before starting with optimization, we need the distance on the manifold, +Then, before starting with optimization, we need the distance on the manifold, to define the cost function, as well as the logarithmic map to defined the gradient. -For both, we here use the “lazy” approach of using the [Sphere]() as a fallback +For both, we here use the “lazy” approach of using the [Sphere](https://juliamanifolds.github.io/Manifolds.jl/stable/manifolds/sphere.html) as a fallback ```{julia} #| output : false @@ -133,9 +133,9 @@ for our case that is [The gradient descent's Technical Details](https://manoptjl They list all details, but we can start even step by step here if we are a bit careful. -### A Retraction +### A retraction -We first implement a [retract](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/retractions/)ion. Informally – given a current point and a direction to “walk into” we need a function that performs that walk. +We first implement a [retract](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/retractions/)ion. Informally, given a current point and a direction to “walk into” we need a function that performs that walk. Since we take an easy one that just projects onto the sphere, we use the [`ProjectionRetraction`](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/retractions/#ManifoldsBase.ProjectionRetraction) type. To be precise, we have to implement the [in-place variant](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/design/#inplace-and-noninplace) [`retract_project!`](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/retractions/#ManifoldsBase.retract_project!-Tuple{AbstractManifold,%20Vararg{Any,%204}}) @@ -163,7 +163,7 @@ p0 = [zeros(d)...,1.0] f(M,p0) ``` -Then we can run our first solver – where we have to overwrite a few +Then we can run our first solver, where we have to overwrite a few defaults, which would use functions we do not (yet) have. We will discuss these in the next steps. @@ -177,7 +177,7 @@ q1 = gradient_descent(M, f, grad_f, p0; f(M,q1) ``` -We at least see, that the function value descreased. +We at least see, that the function value decreased. ### Norm and maximal step size. @@ -225,7 +225,7 @@ q3 = gradient_descent(M, f, grad_f, p0) f(M, q3) ``` -But we for example automaticaly also get the possibility to obtain debug information like +But we for example automatically also get the possibility to obtain debug information like ```{julia} gradient_descent(M, f, grad_f, p0; debug = [:Iteration, :Cost, :Stepsize, 25, :GradientNorm, :Stop, "\n"]); diff --git a/tutorials/InplaceGradient.qmd b/tutorials/InplaceGradient.qmd index 63a0118ed2..3ddee833c5 100644 --- a/tutorials/InplaceGradient.qmd +++ b/tutorials/InplaceGradient.qmd @@ -1,10 +1,10 @@ --- -title: "Speedup using Inplace Evaluation" +title: "Speedup using in-place evaluation" author: Ronny Bergmann --- When it comes to time critital operations, a main ingredient in Julia is given by -mutating functions, i.e. those that compute in place without additional memory +mutating functions, that is those that compute in place without additional memory allocations. In the following, we illustrate how to do this with `Manopt.jl`. Let's start with the same function as in [Get Started: Optimize!](https://manoptjl.org/stable/tutorials/Optimize!.html) @@ -95,7 +95,7 @@ end For the actual call to the solver, we first have to generate an instance of `GradF!` and tell the solver, that the gradient is provided in an [`InplaceEvaluation`](https://manoptjl.org/stable/plans/objective/#Manopt.InplaceEvaluation). -We can further also use [`gradient_descent!`](https://manoptjl.org/stable/solvers/gradient_descent/#Manopt.gradient_descent!) to even work inplace of the initial point we pass. +We can further also use [`gradient_descent!`](https://manoptjl.org/stable/solvers/gradient_descent/#Manopt.gradient_descent!) to even work in-place of the initial point we pass. ```{julia} grad_f2! = GradF!(data, similar(data[1])) m2 = deepcopy(p0) diff --git a/tutorials/Optimize!.qmd b/tutorials/Optimize.qmd similarity index 86% rename from tutorials/Optimize!.qmd rename to tutorials/Optimize.qmd index 2bc9e0fbfe..7e257752aa 100644 --- a/tutorials/Optimize!.qmd +++ b/tutorials/Optimize.qmd @@ -1,12 +1,12 @@ --- -title: "🏔️ Get Started: Optimize!" +title: "🏔️ Get started: optimize." author: Ronny Bergmann --- In this tutorial, we will both introduce the basics of optimisation on manifolds as well as how to use [`Manopt.jl`](https://manoptjl.org) to perform optimisation on manifolds in [Julia](https://julialang.org). -For more theoretical background, see e.g. [doCarmo:1992](@cite) for an introduction to Riemannian manifolds +For more theoretical background, see for example [doCarmo:1992](@cite) for an introduction to Riemannian manifolds and [AbsilMahonySepulchre:2008](@cite) or [Boumal:2023](@cite) to read more about optimisation thereon. Let $\mathcal M$ denote a [Riemannian manifold](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/#ManifoldsBase.Manifold) @@ -29,7 +29,7 @@ In the Euclidean case with$d\in\mathbb N$, that is for $n\in \mathbb N$ data poi can not be directly generalised to data $q_1,\ldots,q_n$, since on a manifold we do not have an addition. -But the mean can also be charcterised as +But the mean can also be characterised as ```math \operatorname*{arg\,min}_{x\in\mathbb R^d} \frac{1}{2n}\sum_{i=1}^n \lVert x - y_i\rVert^2 @@ -59,8 +59,8 @@ cd(@__DIR__) Pkg.activate("."); # for reproducibility use the local tutorial environment. ``` -Let's assume you have already installed both Manotp and Manifolds in Julia (using e.g. `using Pkg; Pkg.add(["Manopt", "Manifolds"])`). -Then we can get started by loading both packages – and `Random` for persistency in this tutorial. +Let's assume you have already installed both `Manopt.jl` and `Manifolds.jl` in Julia (using for example `using Pkg; Pkg.add(["Manopt", "Manifolds"])`). +Then we can get started by loading both packages as well as `Random.jl` for persistency in this tutorial. ```{julia} using Manopt, Manifolds, Random, LinearAlgebra @@ -88,7 +88,7 @@ grad_f(M, p) = sum(1 / n * grad_distance.(Ref(M), data, Ref(p))); and just call [`gradient_descent`](https://manoptjl.org/stable/solvers/gradient_descent/). For a first start, we do not have to provide more than the manifold, the cost, the gradient, -and a startig point, which we just set to the first data point +and a starting point, which we just set to the first data point ```{julia} m1 = gradient_descent(M, f, grad_f, data[1]) @@ -107,8 +107,8 @@ The goal is to get an output of the form but where we also want to fix the display format for the change and the cost numbers (the `[...]`) to have a certain format. Furthermore, the reason why the solver stopped should be printed at the end -These can easily be specified using either a Symbol – using the default format for numbers – or a tuple of a symbol and a format-string in the `debug=` keyword that is avaiable for every solver. -We can also – for illustration reasons – just look at the first 6 steps by setting a [`stopping_criterion=`](https://manoptjl.org/stable/plans/stopping_criteria/) +These can easily be specified using either a Symbol when using the default format for numbers, or a tuple of a symbol and a format-string in the `debug=` keyword that is available for every solver. +We can also, for illustration reasons, just look at the first 6 steps by setting a [`stopping_criterion=`](https://manoptjl.org/stable/plans/stopping_criteria/) ```{julia} m2 = gradient_descent(M, f, grad_f, data[1]; @@ -125,7 +125,7 @@ See [here](https://manoptjl.org/stable/plans/debug/#Manopt.DebugActionFactory-Tu The `debug=` keyword is actually a list of [`DebugActions`](https://manoptjl.org/stable/plans/debug/#Manopt.DebugAction) added to every iteration, allowing you to write your own ones even. Additionally, `:Stop` is an action added to the end of the solver to display the reason why the solver stopped. ``` -The default stopping criterion for [`gradient_descent`](https://manoptjl.org/stable/solvers/gradient_descent/) is, to either stopwhen the gradient is small (`<1e-9`) or a max number of iterations is reached (as a fallback. +The default stopping criterion for [`gradient_descent`](https://manoptjl.org/stable/solvers/gradient_descent/) is, to either stop when the gradient is small (`<1e-9`) or a max number of iterations is reached (as a fallback). Combining stopping-criteria can be done by `|` or `&`. We further pass a number `25` to `debug=` to only an output every `25`th iteration: @@ -167,7 +167,7 @@ data2 = [exp(N, q, σ * rand(N; vector_at=q)) for i in 1:m]; ``` Instead of the mean, let's consider a non-smooth optimisation task: -The median can be generalized to Manifolds as the minimiser of the sum of distances, see e.g. [Bacak:2014](@cite). We define +The median can be generalized to Manifolds as the minimiser of the sum of distances, see [Bacak:2014](@cite). We define ```{julia} g(N, q) = sum(1 / (2 * m) * distance.(Ref(N), Ref(q), data2)) @@ -209,8 +209,8 @@ at the recorded values at iteration 42 get_record(s)[42] ``` -But we can also access whole serieses and see that the cost does not decrease that fast; actually, the CPPA might converge relatively slow. For that we can for -example access the `:Cost` that was recorded every `:Iterate` as well as the (maybe a little boring) `:Iteration`-number in a semilogplot. +But we can also access whole series and see that the cost does not decrease that fast; actually, the CPPA might converge relatively slow. For that we can for +example access the `:Cost` that was recorded every `:Iterate` as well as the (maybe a little boring) `:Iteration`-number in a semi-log-plot. ```{julia} x = get_record(s, :Iteration, :Iteration) @@ -223,7 +223,7 @@ plot(x,y,xaxis=:log, label="CPPA Cost") ````{=commonmark} ```@bibliography -Pages = ["Optimize!.md"] +Pages = ["Optimize.md"] Canonical=false ``` ```` \ No newline at end of file diff --git a/tutorials/StochasticGradientDescent.qmd b/tutorials/StochasticGradientDescent.qmd index cbd8e49116..e42d5d6050 100644 --- a/tutorials/StochasticGradientDescent.qmd +++ b/tutorials/StochasticGradientDescent.qmd @@ -16,7 +16,7 @@ for given points $p_i ∈\mathcal M$, $i=1,…,N$ this optimization problem read \operatorname{d}^2_{\mathcal M}(x,p_i), ``` -which of course can be (and is) solved by a gradient descent, see the introductionary +which of course can be (and is) solved by a gradient descent, see the introductory tutorial or [Statistics in Manifolds.jl](https://juliamanifolds.github.io/Manifolds.jl/stable/features/statistics.html). If $N$ is very large, evaluating the complete gradient might be quite expensive. A remedy is to evaluate only one of the terms at a time and choose a random order for these. @@ -63,7 +63,7 @@ For the mean, the gradient is ``` which we define in `Manopt.jl` in two different ways: -either as one function returning all gradients as a vector (see `gradF`), or – maybe more fitting for a large scale problem, as a vector of small gradient functions (see `gradf`) +either as one function returning all gradients as a vector (see `gradF`), or, maybe more fitting for a large scale problem, as a vector of small gradient functions (see `gradf`) ```{julia} @@ -73,7 +73,7 @@ gradf = [(M, p) -> grad_distance(M, q, p) for q in data]; p0 = 1 / sqrt(3) * [1.0, 1.0, 1.0] ``` -The calls are only slightly different, but notice that accessing the 2nd gradient element +The calls are only slightly different, but notice that accessing the second gradient element requires evaluating all logs in the first function, while we only call _one_ of the functions in the second array of functions. So while you can use both `gradF` and `gradf` in the following call, the second one is (much) faster: