feat: Adapt the trust region algorithm for function optimization

datamole-ai · May 13, 2023 · 96fe08f · 96fe08f
1 parent dd6524a
commit 96fe08f
Show file tree

Hide file tree

Showing 3 changed files with 760 additions and 7 deletions.
diff --git a/src/derivatives.rs b/src/derivatives.rs
@@ -6,17 +6,22 @@ use nalgebra::{
     allocator::Allocator,
     convert,
     storage::{Storage, StorageMut},
-    ComplexField, DefaultAllocator, IsContiguous, OMatrix, RealField, Vector,
+    ComplexField, DefaultAllocator, Dim, DimName, IsContiguous, OMatrix, OVector, RealField,
+    Vector, U1,
 };
 use num_traits::{One, Zero};
 use thiserror::Error;
 
-use crate::core::{Problem, ProblemError, System};
+use crate::core::{Function, Problem, ProblemError, System};
 
 /// Square root of double precision machine epsilon. This value is a standard
-/// constant for epsilons in approximating derivate-based concepts.
+/// constant for epsilons in approximating first-order derivate-based concepts.
 pub const EPSILON_SQRT: f64 = 0.000000014901161193847656;
 
+/// Cubic root of double precision machine epsilon. This value is a standard
+/// constant for epsilons in approximating second-order derivate-based concepts.
+pub const EPSILON_CBRT: f64 = 0.0000060554544523933395;
+
 /// Error when computing the Jacobian matrix.
 #[derive(Debug, Error)]
 pub enum JacobianError {
@@ -135,13 +140,297 @@ where
     }
 }
 
+/// Error when computing the gradient matrix.
+#[derive(Debug, Error)]
+pub enum GradientError {
+    /// Error that occurred when evaluating the function.
+    #[error("{0}")]
+    Problem(#[from] ProblemError),
+}
+
+/// Gradient vector of a function.
+#[derive(Debug)]
+pub struct Gradient<F: Problem>
+where
+    DefaultAllocator: Allocator<F::Scalar, F::Dim>,
+{
+    grad: OVector<F::Scalar, F::Dim>,
+}
+
+impl<F: Problem> Gradient<F>
+where
+    DefaultAllocator: Allocator<F::Scalar, F::Dim>,
+{
+    /// Initializes the Gradient matrix with zeros.
+    pub fn zeros(f: &F) -> Self {
+        Self {
+            grad: OVector::zeros_generic(f.dim(), U1::name()),
+        }
+    }
+}
+
+impl<F: Function> Gradient<F>
+where
+    DefaultAllocator: Allocator<F::Scalar, F::Dim>,
+{
+    /// Compute Compute the gradient vector of the function in given point with
+    /// given scale of variables. See [`compute`](Gradient::compute) for more
+    /// details.
+    pub fn new<Sx, Sscale>(
+        f: &F,
+        x: &mut Vector<F::Scalar, F::Dim, Sx>,
+        scale: &Vector<F::Scalar, F::Dim, Sscale>,
+        fx: F::Scalar,
+    ) -> Result<Self, GradientError>
+    where
+        Sx: StorageMut<F::Scalar, F::Dim> + IsContiguous,
+        Sscale: Storage<F::Scalar, F::Dim>,
+    {
+        let mut grad = Self::zeros(f);
+        grad.compute(f, x, scale, fx)?;
+        Ok(grad)
+    }
+
+    /// Compute the gradient vector of the function in given point with given
+    /// scale of variables.
+    ///
+    /// The parameter `x` is mutable to allow temporary mutations avoiding
+    /// unnecessary allocations, but after this method ends, the content of the
+    /// vector is exactly the same as before.
+    ///
+    /// Information about variable scale is useful for problematic cases of
+    /// finite differentiation (e.g., when the value is near zero).
+    pub fn compute<Sx, Sscale>(
+        &mut self,
+        f: &F,
+        x: &mut Vector<F::Scalar, F::Dim, Sx>,
+        scale: &Vector<F::Scalar, F::Dim, Sscale>,
+        fx: F::Scalar,
+    ) -> Result<&mut Self, GradientError>
+    where
+        Sx: StorageMut<F::Scalar, F::Dim> + IsContiguous,
+        Sscale: Storage<F::Scalar, F::Dim>,
+    {
+        let eps: F::Scalar = convert(EPSILON_SQRT);
+
+        for i in 0..f.dim().value() {
+            let xi = x[i];
+
+            // See the implementation of Jacobian for details on computing step size.
+            let magnitude = F::Scalar::one() / scale[i];
+            let step = eps * xi.abs().max(magnitude) * F::Scalar::one().copysign(xi);
+            let step = if step == F::Scalar::zero() { eps } else { step };
+
+            // Update the point.
+            x[i] = xi + step;
+            let fxi = f.apply(x)?;
+
+            // Compute the derivative approximation: grad[i] = (F(x + e_i * step_i) - F(x)) / step_i.
+            self.grad[i] = (fxi - fx) / step;
+
+            // Restore the original value.
+            x[i] = xi;
+        }
+
+        Ok(self)
+    }
+}
+
+impl<F: Problem> Deref for Gradient<F>
+where
+    DefaultAllocator: Allocator<F::Scalar, F::Dim>,
+{
+    type Target = OVector<F::Scalar, F::Dim>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.grad
+    }
+}
+
+/// Error when computing the Hessian matrix.
+#[derive(Debug, Error)]
+pub enum HessianError {
+    /// Error that occurred when evaluating the system.
+    #[error("{0}")]
+    Problem(#[from] ProblemError),
+}
+
+/// Hessian matrix of a system.
+#[derive(Debug)]
+pub struct Hessian<F: Problem>
+where
+    DefaultAllocator: Allocator<F::Scalar, F::Dim, F::Dim>,
+    DefaultAllocator: Allocator<F::Scalar, F::Dim>,
+{
+    hes: OMatrix<F::Scalar, F::Dim, F::Dim>,
+    steps: OVector<F::Scalar, F::Dim>,
+    neighbors: OVector<F::Scalar, F::Dim>,
+}
+
+impl<F: Problem> Hessian<F>
+where
+    DefaultAllocator: Allocator<F::Scalar, F::Dim, F::Dim>,
+    DefaultAllocator: Allocator<F::Scalar, F::Dim>,
+{
+    /// Initializes the Hessian matrix with zeros.
+    pub fn zeros(f: &F) -> Self {
+        Self {
+            hes: OMatrix::zeros_generic(f.dim(), f.dim()),
+            steps: OVector::zeros_generic(f.dim(), U1::name()),
+            neighbors: OVector::zeros_generic(f.dim(), U1::name()),
+        }
+    }
+}
+
+impl<F: Function> Hessian<F>
+where
+    DefaultAllocator: Allocator<F::Scalar, F::Dim, F::Dim>,
+    DefaultAllocator: Allocator<F::Scalar, F::Dim>,
+{
+    /// Compute Compute the Hessian matrix of the function in given point with
+    /// given scale of variables. See [`compute`](Hessian::compute) for more
+    /// details.
+    pub fn new<Sx, Sscale>(
+        f: &F,
+        x: &mut Vector<F::Scalar, F::Dim, Sx>,
+        scale: &Vector<F::Scalar, F::Dim, Sscale>,
+        fx: F::Scalar,
+    ) -> Result<Self, HessianError>
+    where
+        Sx: StorageMut<F::Scalar, F::Dim> + IsContiguous,
+        Sscale: Storage<F::Scalar, F::Dim>,
+    {
+        let mut hes = Self::zeros(f);
+        hes.compute(f, x, scale, fx)?;
+        Ok(hes)
+    }
+
+    /// Compute the Hessian matrix of the function in given point with given
+    /// scale of variables.
+    ///
+    /// The parameter `x` is mutable to allow temporary mutations avoiding
+    /// unnecessary allocations, but after this method ends, the content of the
+    /// vector is exactly the same as before.
+    ///
+    /// Information about variable scale is useful for problematic cases of
+    /// finite differentiation (e.g., when the value is near zero).
+    pub fn compute<Sx, Sscale>(
+        &mut self,
+        f: &F,
+        x: &mut Vector<F::Scalar, F::Dim, Sx>,
+        scale: &Vector<F::Scalar, F::Dim, Sscale>,
+        fx: F::Scalar,
+    ) -> Result<&mut Self, HessianError>
+    where
+        Sx: StorageMut<F::Scalar, F::Dim> + IsContiguous,
+        Sscale: Storage<F::Scalar, F::Dim>,
+    {
+        let eps: F::Scalar = convert(EPSILON_CBRT);
+
+        for i in 0..f.dim().value() {
+            let xi = x[i];
+
+            // See the implementation of Jacobian for details on computing step size.
+            let magnitude = F::Scalar::one() / scale[i];
+            let step = eps * xi.abs().max(magnitude) * F::Scalar::one().copysign(xi);
+            let step = if step == F::Scalar::zero() { eps } else { step };
+
+            // Store the step for Hessian calculation.
+            self.steps[i] = step;
+
+            // Update the point and store the function output.
+            x[i] = xi + step;
+            let fxi = f.apply(x)?;
+            self.neighbors[i] = fxi;
+
+            // Restore the original value.
+            x[i] = xi;
+        }
+
+        for i in 0..f.dim().value() {
+            let xi = x[i];
+            let stepi = self.steps[i];
+
+            // Prepare x_i + 2 * e_i.
+            x[i] = xi + stepi + stepi;
+
+            let fxi = f.apply(x)?;
+            let fni = self.neighbors[i];
+
+            x[i] = xi + stepi;
+
+            self.hes[(i, i)] = ((fx - fni) + (fxi - fni)) / (stepi * stepi);
+
+            for j in (i + 1)..f.dim().value() {
+                let xj = x[j];
+                let stepj = self.steps[j];
+
+                x[j] = xj + stepj;
+
+                let fxj = f.apply(x)?;
+                let fnj = self.neighbors[j];
+
+                let hij = ((fx - fni) + (fxj - fnj)) / (stepi * stepj);
+                self.hes[(i, j)] = hij;
+                self.hes[(j, i)] = hij;
+
+                x[j] = xj;
+            }
+
+            x[i] = xi;
+        }
+
+        Ok(self)
+    }
+}
+
+impl<F: Problem> Deref for Hessian<F>
+where
+    DefaultAllocator: Allocator<F::Scalar, F::Dim, F::Dim>,
+    DefaultAllocator: Allocator<F::Scalar, F::Dim>,
+{
+    type Target = OMatrix<F::Scalar, F::Dim, F::Dim>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.hes
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::testing::{ExtendedPowell, ExtendedRosenbrock};
 
     use approx::assert_abs_diff_eq;
-    use nalgebra::{dmatrix, dvector};
+    use nalgebra::{dmatrix, dvector, Dynamic};
+
+    struct MixedVars;
+
+    impl Problem for MixedVars {
+        type Scalar = f64;
+        type Dim = Dynamic;
+
+        fn dim(&self) -> Self::Dim {
+            Dynamic::new(2)
+        }
+    }
+
+    impl Function for MixedVars {
+        fn apply<Sx>(
+            &self,
+            x: &Vector<Self::Scalar, Self::Dim, Sx>,
+        ) -> Result<Self::Scalar, ProblemError>
+        where
+            Sx: Storage<Self::Scalar, Self::Dim> + IsContiguous,
+        {
+            // A simple, arbitrary function that produces Hessian matrix with
+            // non-zero corners.
+            let x1 = x[0];
+            let x2 = x[1];
+
+            Ok(x1.powi(2) + x1 * x2 + x2.powi(3))
+        }
+    }
 
     #[test]
     fn rosenbrock_jacobian() {
@@ -181,4 +470,36 @@ mod tests {
         ];
         assert_abs_diff_eq!(&*jac, &expected, epsilon = 10e-6);
     }
+
+    #[test]
+    fn mixed_vars_gradient() {
+        let mut x = dvector![3.0, -3.0];
+        let scale = dvector![1.0, 1.0];
+
+        let func = MixedVars;
+        let fx = func.apply(&x).unwrap();
+        let grad = Gradient::new(&func, &mut x, &scale, fx);
+
+        assert!(grad.is_ok());
+        let grad = grad.unwrap();
+
+        let expected = dvector![3.0, 30.0];
+        assert_abs_diff_eq!(&*grad, &expected, epsilon = 10e-6);
+    }
+
+    #[test]
+    fn mixed_vars_hessian() {
+        let mut x = dvector![3.0, -3.0];
+        let scale = dvector![1.0, 1.0];
+
+        let func = MixedVars;
+        let fx = func.apply(&x).unwrap();
+        let hes = Hessian::new(&func, &mut x, &scale, fx);
+
+        assert!(hes.is_ok());
+        let hes = hes.unwrap();
+
+        let expected = dmatrix![2.0, 1.0; 1.0, -18.0];
+        assert_abs_diff_eq!(&*hes, &expected, epsilon = 10e-3);
+    }
 }