Skip to content

Commit

Permalink
feat: Adapt the trust region algorithm for function optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
pnevyk committed May 13, 2023
1 parent dd6524a commit 96fe08f
Show file tree
Hide file tree
Showing 3 changed files with 760 additions and 7 deletions.
329 changes: 325 additions & 4 deletions src/derivatives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,22 @@ use nalgebra::{
allocator::Allocator,
convert,
storage::{Storage, StorageMut},
ComplexField, DefaultAllocator, IsContiguous, OMatrix, RealField, Vector,
ComplexField, DefaultAllocator, Dim, DimName, IsContiguous, OMatrix, OVector, RealField,
Vector, U1,
};
use num_traits::{One, Zero};
use thiserror::Error;

use crate::core::{Problem, ProblemError, System};
use crate::core::{Function, Problem, ProblemError, System};

/// Square root of double precision machine epsilon. This value is a standard
/// constant for epsilons in approximating derivate-based concepts.
/// constant for epsilons in approximating first-order derivate-based concepts.
pub const EPSILON_SQRT: f64 = 0.000000014901161193847656;

/// Cubic root of double precision machine epsilon. This value is a standard
/// constant for epsilons in approximating second-order derivate-based concepts.
pub const EPSILON_CBRT: f64 = 0.0000060554544523933395;

/// Error when computing the Jacobian matrix.
#[derive(Debug, Error)]
pub enum JacobianError {
Expand Down Expand Up @@ -135,13 +140,297 @@ where
}
}

/// Error when computing the gradient matrix.
#[derive(Debug, Error)]
pub enum GradientError {
/// Error that occurred when evaluating the function.
#[error("{0}")]
Problem(#[from] ProblemError),
}

/// Gradient vector of a function.
#[derive(Debug)]
pub struct Gradient<F: Problem>
where
DefaultAllocator: Allocator<F::Scalar, F::Dim>,
{
grad: OVector<F::Scalar, F::Dim>,
}

impl<F: Problem> Gradient<F>
where
DefaultAllocator: Allocator<F::Scalar, F::Dim>,
{
/// Initializes the Gradient matrix with zeros.
pub fn zeros(f: &F) -> Self {
Self {
grad: OVector::zeros_generic(f.dim(), U1::name()),
}
}
}

impl<F: Function> Gradient<F>
where
DefaultAllocator: Allocator<F::Scalar, F::Dim>,
{
/// Compute Compute the gradient vector of the function in given point with
/// given scale of variables. See [`compute`](Gradient::compute) for more
/// details.
pub fn new<Sx, Sscale>(
f: &F,
x: &mut Vector<F::Scalar, F::Dim, Sx>,
scale: &Vector<F::Scalar, F::Dim, Sscale>,
fx: F::Scalar,
) -> Result<Self, GradientError>
where
Sx: StorageMut<F::Scalar, F::Dim> + IsContiguous,
Sscale: Storage<F::Scalar, F::Dim>,
{
let mut grad = Self::zeros(f);
grad.compute(f, x, scale, fx)?;
Ok(grad)
}

/// Compute the gradient vector of the function in given point with given
/// scale of variables.
///
/// The parameter `x` is mutable to allow temporary mutations avoiding
/// unnecessary allocations, but after this method ends, the content of the
/// vector is exactly the same as before.
///
/// Information about variable scale is useful for problematic cases of
/// finite differentiation (e.g., when the value is near zero).
pub fn compute<Sx, Sscale>(
&mut self,
f: &F,
x: &mut Vector<F::Scalar, F::Dim, Sx>,
scale: &Vector<F::Scalar, F::Dim, Sscale>,
fx: F::Scalar,
) -> Result<&mut Self, GradientError>
where
Sx: StorageMut<F::Scalar, F::Dim> + IsContiguous,
Sscale: Storage<F::Scalar, F::Dim>,
{
let eps: F::Scalar = convert(EPSILON_SQRT);

for i in 0..f.dim().value() {
let xi = x[i];

// See the implementation of Jacobian for details on computing step size.
let magnitude = F::Scalar::one() / scale[i];
let step = eps * xi.abs().max(magnitude) * F::Scalar::one().copysign(xi);
let step = if step == F::Scalar::zero() { eps } else { step };

// Update the point.
x[i] = xi + step;
let fxi = f.apply(x)?;

// Compute the derivative approximation: grad[i] = (F(x + e_i * step_i) - F(x)) / step_i.
self.grad[i] = (fxi - fx) / step;

// Restore the original value.
x[i] = xi;
}

Ok(self)
}
}

impl<F: Problem> Deref for Gradient<F>
where
DefaultAllocator: Allocator<F::Scalar, F::Dim>,
{
type Target = OVector<F::Scalar, F::Dim>;

fn deref(&self) -> &Self::Target {
&self.grad
}
}

/// Error when computing the Hessian matrix.
#[derive(Debug, Error)]
pub enum HessianError {
/// Error that occurred when evaluating the system.
#[error("{0}")]
Problem(#[from] ProblemError),
}

/// Hessian matrix of a system.
#[derive(Debug)]
pub struct Hessian<F: Problem>
where
DefaultAllocator: Allocator<F::Scalar, F::Dim, F::Dim>,
DefaultAllocator: Allocator<F::Scalar, F::Dim>,
{
hes: OMatrix<F::Scalar, F::Dim, F::Dim>,
steps: OVector<F::Scalar, F::Dim>,
neighbors: OVector<F::Scalar, F::Dim>,
}

impl<F: Problem> Hessian<F>
where
DefaultAllocator: Allocator<F::Scalar, F::Dim, F::Dim>,
DefaultAllocator: Allocator<F::Scalar, F::Dim>,
{
/// Initializes the Hessian matrix with zeros.
pub fn zeros(f: &F) -> Self {
Self {
hes: OMatrix::zeros_generic(f.dim(), f.dim()),
steps: OVector::zeros_generic(f.dim(), U1::name()),
neighbors: OVector::zeros_generic(f.dim(), U1::name()),
}
}
}

impl<F: Function> Hessian<F>
where
DefaultAllocator: Allocator<F::Scalar, F::Dim, F::Dim>,
DefaultAllocator: Allocator<F::Scalar, F::Dim>,
{
/// Compute Compute the Hessian matrix of the function in given point with
/// given scale of variables. See [`compute`](Hessian::compute) for more
/// details.
pub fn new<Sx, Sscale>(
f: &F,
x: &mut Vector<F::Scalar, F::Dim, Sx>,
scale: &Vector<F::Scalar, F::Dim, Sscale>,
fx: F::Scalar,
) -> Result<Self, HessianError>
where
Sx: StorageMut<F::Scalar, F::Dim> + IsContiguous,
Sscale: Storage<F::Scalar, F::Dim>,
{
let mut hes = Self::zeros(f);
hes.compute(f, x, scale, fx)?;
Ok(hes)
}

/// Compute the Hessian matrix of the function in given point with given
/// scale of variables.
///
/// The parameter `x` is mutable to allow temporary mutations avoiding
/// unnecessary allocations, but after this method ends, the content of the
/// vector is exactly the same as before.
///
/// Information about variable scale is useful for problematic cases of
/// finite differentiation (e.g., when the value is near zero).
pub fn compute<Sx, Sscale>(
&mut self,
f: &F,
x: &mut Vector<F::Scalar, F::Dim, Sx>,
scale: &Vector<F::Scalar, F::Dim, Sscale>,
fx: F::Scalar,
) -> Result<&mut Self, HessianError>
where
Sx: StorageMut<F::Scalar, F::Dim> + IsContiguous,
Sscale: Storage<F::Scalar, F::Dim>,
{
let eps: F::Scalar = convert(EPSILON_CBRT);

for i in 0..f.dim().value() {
let xi = x[i];

// See the implementation of Jacobian for details on computing step size.
let magnitude = F::Scalar::one() / scale[i];
let step = eps * xi.abs().max(magnitude) * F::Scalar::one().copysign(xi);
let step = if step == F::Scalar::zero() { eps } else { step };

// Store the step for Hessian calculation.
self.steps[i] = step;

// Update the point and store the function output.
x[i] = xi + step;
let fxi = f.apply(x)?;
self.neighbors[i] = fxi;

// Restore the original value.
x[i] = xi;
}

for i in 0..f.dim().value() {
let xi = x[i];
let stepi = self.steps[i];

// Prepare x_i + 2 * e_i.
x[i] = xi + stepi + stepi;

let fxi = f.apply(x)?;
let fni = self.neighbors[i];

x[i] = xi + stepi;

self.hes[(i, i)] = ((fx - fni) + (fxi - fni)) / (stepi * stepi);

for j in (i + 1)..f.dim().value() {
let xj = x[j];
let stepj = self.steps[j];

x[j] = xj + stepj;

let fxj = f.apply(x)?;
let fnj = self.neighbors[j];

let hij = ((fx - fni) + (fxj - fnj)) / (stepi * stepj);
self.hes[(i, j)] = hij;
self.hes[(j, i)] = hij;

x[j] = xj;
}

x[i] = xi;
}

Ok(self)
}
}

impl<F: Problem> Deref for Hessian<F>
where
DefaultAllocator: Allocator<F::Scalar, F::Dim, F::Dim>,
DefaultAllocator: Allocator<F::Scalar, F::Dim>,
{
type Target = OMatrix<F::Scalar, F::Dim, F::Dim>;

fn deref(&self) -> &Self::Target {
&self.hes
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::testing::{ExtendedPowell, ExtendedRosenbrock};

use approx::assert_abs_diff_eq;
use nalgebra::{dmatrix, dvector};
use nalgebra::{dmatrix, dvector, Dynamic};

struct MixedVars;

impl Problem for MixedVars {
type Scalar = f64;
type Dim = Dynamic;

fn dim(&self) -> Self::Dim {
Dynamic::new(2)
}
}

impl Function for MixedVars {
fn apply<Sx>(
&self,
x: &Vector<Self::Scalar, Self::Dim, Sx>,
) -> Result<Self::Scalar, ProblemError>
where
Sx: Storage<Self::Scalar, Self::Dim> + IsContiguous,
{
// A simple, arbitrary function that produces Hessian matrix with
// non-zero corners.
let x1 = x[0];
let x2 = x[1];

Ok(x1.powi(2) + x1 * x2 + x2.powi(3))
}
}

#[test]
fn rosenbrock_jacobian() {
Expand Down Expand Up @@ -181,4 +470,36 @@ mod tests {
];
assert_abs_diff_eq!(&*jac, &expected, epsilon = 10e-6);
}

#[test]
fn mixed_vars_gradient() {
let mut x = dvector![3.0, -3.0];
let scale = dvector![1.0, 1.0];

let func = MixedVars;
let fx = func.apply(&x).unwrap();
let grad = Gradient::new(&func, &mut x, &scale, fx);

assert!(grad.is_ok());
let grad = grad.unwrap();

let expected = dvector![3.0, 30.0];
assert_abs_diff_eq!(&*grad, &expected, epsilon = 10e-6);
}

#[test]
fn mixed_vars_hessian() {
let mut x = dvector![3.0, -3.0];
let scale = dvector![1.0, 1.0];

let func = MixedVars;
let fx = func.apply(&x).unwrap();
let hes = Hessian::new(&func, &mut x, &scale, fx);

assert!(hes.is_ok());
let hes = hes.unwrap();

let expected = dmatrix![2.0, 1.0; 1.0, -18.0];
assert_abs_diff_eq!(&*hes, &expected, epsilon = 10e-3);
}
}
Loading

0 comments on commit 96fe08f

Please sign in to comment.