Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve GP/SGP API #132

Merged
merged 26 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
92f0c33
Add experts getter
relf Jan 25, 2024
01230ca
Add experts API
relf Jan 25, 2024
acc5644
Add sparse method choice in py API
relf Jan 26, 2024
3a689f6
Manage traces, Add initial_theta and sparse_method
relf Jan 26, 2024
47fb019
Make GP n_start configurable
relf Jan 27, 2024
54bb51c
Add n_start argument to control hyperparams optimization restarts
relf Jan 27, 2024
4ec0e22
Add theta tuning interface for SGP
relf Jan 29, 2024
8e1a754
Fix theta tuning initialization, refactor cobyla params
relf Jan 30, 2024
4c118cd
Renaming sparse_algorithm|parameters
relf Jan 30, 2024
b36c359
Renaming theta_init
relf Jan 30, 2024
bff20f5
Rename guess in init
relf Jan 30, 2024
f266045
Add thate_tuning in GP API
relf Jan 30, 2024
8ce0dc4
Add theta_tuning to GP
relf Jan 30, 2024
79299cd
Fix cobyla maxeval parameter
relf Jan 31, 2024
b4991ea
Parallellize multistart optimizations for SGP
relf Jan 31, 2024
3029bc5
Add SparseGpx basic tutorial
relf Jan 31, 2024
8894fb6
Trained Gp model store reduced likelihood value
relf Jan 31, 2024
d6da0df
Improve display trained model infos
relf Jan 31, 2024
f139e01
Fix moe display test
relf Jan 31, 2024
4c9ea54
Make GP/SGP computation interruptible
relf Feb 1, 2024
a5c3084
Fix double import
relf Feb 1, 2024
36ee40a
Cleanup
relf Feb 1, 2024
726c087
Remove fallible assertion in moe display test
relf Feb 1, 2024
f826103
Avoid ctrlc multiple handlers errors
relf Feb 1, 2024
cce24f7
Relax sgp noise test tolerance
relf Feb 1, 2024
ae62ce6
Add parallel multistart to GP
relf Feb 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ pyo3 = { version = "0.20.0", features = ["extension-module"] }
pyo3-log = "0.9.0"
serde = "1"
serde_json = "1"
ctrlc = "3.4"

[dev-dependencies]
criterion = "0.4"
Expand Down
329 changes: 329 additions & 0 deletions doc/SparseGpx_Tutorial.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions gp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ paste = "1.0"
num-traits = "0.2"
thiserror = "1"
log = "0.4"
rayon = "1"

serde = { version = "1", features = ["derive"], optional = true }
serde_json = { version = "1", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion gp/benches/gp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ fn criterion_gp(c: &mut Criterion) {
SquaredExponentialCorr::default(),
)
.kpls_dim(Some(1))
.initial_theta(Some(vec![1.0]))
.theta_init(vec![1.0])
.fit(&Dataset::new(xt.to_owned(), yt.to_owned()))
.expect("GP fit error"),
)
Expand Down
201 changes: 147 additions & 54 deletions gp/src/algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,37 @@ use ndarray::{arr1, s, Array, Array1, Array2, ArrayBase, Axis, Data, Ix1, Ix2, Z
use ndarray_einsum_beta::*;
#[cfg(feature = "blas")]
use ndarray_linalg::{cholesky::*, eigh::*, qr::*, svd::*, triangular::*};
use ndarray_rand::rand::SeedableRng;
use ndarray_rand::rand::{Rng, SeedableRng};
use ndarray_rand::rand_distr::Normal;
use ndarray_rand::RandomExt;
use ndarray_stats::QuantileExt;

use log::debug;
use rand_xoshiro::Xoshiro256Plus;
use rayon::prelude::*;
#[cfg(feature = "serializable")]
use serde::{Deserialize, Serialize};
use std::fmt;
use std::time::Instant;

use ndarray_rand::rand_distr::Normal;
use ndarray_rand::RandomExt;
pub(crate) struct CobylaParams {
pub rhobeg: f64,
pub ftol_rel: f64,
pub maxeval: usize,
}

impl Default for CobylaParams {
fn default() -> Self {
CobylaParams {
rhobeg: 0.5,
ftol_rel: 1e-4,
maxeval: 25,
}
}
}

// const LOG10_20: f64 = 1.301_029_995_663_981_3; //f64::log10(20.);
const N_START: usize = 10; // number of optimization restart (aka multistart)
//const N_START: usize = 0; // number of optimization restart (aka multistart)

/// Internal parameters computed Gp during training
/// used later on in prediction computations
Expand Down Expand Up @@ -160,6 +178,9 @@ impl<F: Float> Clone for GpInnerParams<F> {
pub struct GaussianProcess<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> {
/// Parameter of the autocorrelation model
theta: Array1<F>,
/// Reduced likelihood value (result from internal optimization)
/// Maybe used to compare different trained models
likelihood: F,
/// Regression model
#[cfg_attr(
feature = "serializable",
Expand Down Expand Up @@ -202,6 +223,7 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> Clone
fn clone(&self) -> Self {
Self {
theta: self.theta.to_owned(),
likelihood: self.likelihood,
mean: self.mean,
corr: self.corr,
inner_params: self.inner_params.clone(),
Expand All @@ -216,7 +238,11 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> fmt::Display
for GaussianProcess<F, Mean, Corr>
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "GP({}, {})", self.mean, self.corr)
write!(
f,
"GP(mean={}, corr={}, theta={}, variance={}, likelihood={})",
self.mean, self.corr, self.theta, self.inner_params.sigma2, self.likelihood,
)
}
}

Expand Down Expand Up @@ -751,7 +777,7 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
let y = dataset.targets();
if let Some(d) = self.kpls_dim() {
if *d > x.ncols() {
return Err(GpError::InvalidValue(format!(
return Err(GpError::InvalidValueError(format!(
"Dimension reduction {} should be smaller than actual \
training input dimensions {}",
d,
Expand Down Expand Up @@ -786,14 +812,18 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
"Warning: multiple x input features have the same value (at least same row twice)."
);
}
let theta0 = self
.initial_theta()
.clone()
.map_or(Array1::from_elem(w_star.ncols(), F::cast(1e-2)), |v| {
Array::from_vec(v)
});

// Initial guess for theta
let theta0_dim = self.theta_tuning().theta0().len();
let theta0 = if theta0_dim == 1 {
Array1::from_elem(w_star.ncols(), self.theta_tuning().theta0()[0])
} else if theta0_dim == w_star.ncols() {
Array::from_vec(self.theta_tuning().theta0().to_vec())
} else {
panic!("Initial guess for theta should be either 1-dim or dim of xtrain (w_star.ncols()), got {}", theta0_dim)
};

let fx = self.mean().value(&xtrain.data);
let y_t = ytrain.clone();
let base: f64 = 10.;
let objfn = |x: &[f64], _gradient: Option<&mut [f64]>, _params: &mut ()| -> f64 {
let theta =
Expand All @@ -808,42 +838,60 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
}
let theta = theta.mapv(F::cast);
let rxx = self.corr().value(&x_distances.d, &theta, &w_star);
match reduced_likelihood(&fx, rxx, &x_distances, &y_t, self.nugget()) {
match reduced_likelihood(&fx, rxx, &x_distances, &ytrain, self.nugget()) {
Ok(r) => unsafe { -(*(&r.0 as *const F as *const f64)) },
Err(_) => f64::INFINITY,
}
};

// Multistart: user theta0 + 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1., 10.
let mut theta0s = Array2::zeros((N_START + 1, theta0.len()));
theta0s.row_mut(0).assign(&theta0.mapv(|v| F::log10(v)));
let mut xlimits: Array2<F> = Array2::zeros((theta0.len(), 2));
for mut row in xlimits.rows_mut() {
row.assign(&arr1(&[F::cast(-6), F::cast(2)]));
}
// Use a seed here for reproducibility. Do we need to make it truly random
// Probably no, as it is just to get init values spread over
// [1e-6, 20] for multistart thanks to LHS method.
let seeds = Lhs::new(&xlimits)
.kind(egobox_doe::LhsKind::Maximin)
.with_rng(Xoshiro256Plus::seed_from_u64(42))
.sample(N_START);
Zip::from(theta0s.slice_mut(s![1.., ..]).rows_mut())
.and(seeds.rows())
.par_for_each(|mut theta, row| theta.assign(&row));

let bounds = vec![(F::cast(-6.), F::cast(2.)); theta0.len()];

let opt_thetas = theta0s.map_axis(Axis(1), |theta| {
optimize_params(objfn, &theta.to_owned(), &bounds)
});
let opt_index = opt_thetas.map(|(_, opt_f)| opt_f).argmin().unwrap();
let opt_theta = &(opt_thetas[opt_index]).0.mapv(|v| F::cast(base.powf(v)));
// println!("opt_theta={}", opt_theta);
let rxx = self.corr().value(&x_distances.d, opt_theta, &w_star);
let (_, inner_params) = reduced_likelihood(&fx, rxx, &x_distances, &ytrain, self.nugget())?;
// let bounds = vec![(F::cast(-6.), F::cast(2.)); theta0.len()];
let bounds_dim = self.theta_tuning().bounds().len();
let bounds = if bounds_dim == 1 {
vec![self.theta_tuning().bounds()[0]; w_star.ncols()]
} else if theta0_dim == w_star.ncols() {
self.theta_tuning().bounds().to_vec()
} else {
panic!(
"Bounds for theta should be either 1-dim or dim of xtrain ({}), got {}",
w_star.ncols(),
theta0_dim
)
};

let (params, bounds) = prepare_multistart(self.n_start(), &theta0, &bounds);
debug!(
"Optimize with multistart theta = {:?} and bounds = {:?}",
params, bounds
);
let now = Instant::now();
let opt_params = (0..params.nrows())
.into_par_iter()
.map(|i| {
let opt_res = optimize_params(
objfn,
&params.row(i).to_owned(),
&bounds,
CobylaParams {
maxeval: (10 * theta0_dim).max(CobylaParams::default().maxeval),
..CobylaParams::default()
},
);

opt_res
})
.reduce(
|| (Array::ones((params.ncols(),)), f64::INFINITY),
|a, b| if b.1 < a.1 { b } else { a },
);
debug!("elapsed optim = {:?}", now.elapsed().as_millis());
let opt_params = opt_params.0.mapv(|v| F::cast(base.powf(v)));
let rxx = self.corr().value(&x_distances.d, &opt_params, &w_star);
let (lkh, inner_params) =
reduced_likelihood(&fx, rxx, &x_distances, &ytrain, self.nugget())?;
Ok(GaussianProcess {
theta: opt_theta.to_owned(),
theta: opt_params,
likelihood: lkh,
mean: *self.mean(),
corr: *self.corr(),
inner_params,
Expand All @@ -854,12 +902,60 @@ impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem
}
}

pub(crate) fn prepare_multistart<F: Float>(
n_start: usize,
theta0: &Array1<F>,
bounds: &[(F, F)],
) -> (Array2<F>, Vec<(F, F)>) {
// Use log10 theta as optimization parameter
let bounds: Vec<(F, F)> = bounds
.iter()
.map(|(lo, up)| (lo.log10(), up.log10()))
.collect();

// Multistart: user theta0 + 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1., 10.
let mut theta0s = Array2::zeros((n_start + 1, theta0.len()));
theta0s.row_mut(0).assign(&theta0.mapv(|v| F::log10(v)));

match n_start.cmp(&1) {
std::cmp::Ordering::Equal => {
//let mut rng = Xoshiro256Plus::seed_from_u64(42);
let mut rng = Xoshiro256Plus::from_entropy();
let vals = bounds.iter().map(|(a, b)| rng.gen_range(*a..*b)).collect();
theta0s.row_mut(1).assign(&Array::from_vec(vals))
}
std::cmp::Ordering::Greater => {
let mut xlimits: Array2<F> = Array2::zeros((bounds.len(), 2));
// for mut row in xlimits.rows_mut() {
// row.assign(&arr1(&[limits.0, limits.1]));
// }
Zip::from(xlimits.rows_mut())
.and(&bounds)
.for_each(|mut row, limits| row.assign(&arr1(&[limits.0, limits.1])));
// Use a seed here for reproducibility. Do we need to make it truly random
// Probably no, as it is just to get init values spread over
// [1e-6, 20] for multistart thanks to LHS method.

let seeds = Lhs::new(&xlimits)
.kind(egobox_doe::LhsKind::Maximin)
.with_rng(Xoshiro256Plus::seed_from_u64(42))
.sample(n_start);
Zip::from(theta0s.slice_mut(s![1.., ..]).rows_mut())
.and(seeds.rows())
.par_for_each(|mut theta, row| theta.assign(&row));
}
std::cmp::Ordering::Less => (),
};
(theta0s, bounds)
}

/// Optimize gp hyper parameters given an initial guess and bounds with NLOPT::Cobyla
#[cfg(feature = "nlopt")]
pub(crate) fn optimize_params<ObjF, F>(
objfn: ObjF,
param0: &Array1<F>,
bounds: &[(F, F)],
cobyla: CobylaParams,
) -> (Array1<f64>, f64)
where
ObjF: Fn(&[f64], Option<&mut [f64]>, &mut ()) -> f64,
Expand All @@ -879,9 +975,9 @@ where
let upper_bounds = bounds.iter().map(|b| into_f64(&b.1)).collect::<Vec<_>>();
optimizer.set_upper_bounds(&upper_bounds).unwrap();

optimizer.set_initial_step1(0.5).unwrap();
optimizer.set_maxeval(15 * param0.len() as u32).unwrap();
optimizer.set_ftol_rel(1e-4).unwrap();
optimizer.set_initial_step1(cobyla.rhobeg).unwrap();
optimizer.set_maxeval(cobyla.maxeval as u32).unwrap();
optimizer.set_ftol_rel(cobyla.ftol_rel).unwrap();

match optimizer.optimize(&mut param) {
Ok((_, fmin)) => {
Expand All @@ -906,6 +1002,7 @@ pub(crate) fn optimize_params<ObjF, F>(
objfn: ObjF,
param0: &Array1<F>,
bounds: &[(F, F)],
cobyla: CobylaParams,
) -> (Array1<f64>, f64)
where
ObjF: Fn(&[f64], Option<&mut [f64]>, &mut ()) -> f64,
Expand All @@ -917,10 +1014,6 @@ where
let cons: Vec<&dyn Func<()>> = vec![];
let param0 = param0.map(|v| into_f64(v)).into_raw_vec();

let initial_step = 0.5;
let ftol_rel = 1e-4;
let maxeval = 15 * param0.len();

let bounds: Vec<_> = bounds
.iter()
.map(|(lo, up)| (into_f64(lo), into_f64(up)))
Expand All @@ -932,10 +1025,10 @@ where
&bounds,
&cons,
(),
maxeval,
cobyla::RhoBeg::All(initial_step),
cobyla.maxeval,
cobyla::RhoBeg::All(cobyla.rhobeg),
Some(StopTols {
ftol_rel,
ftol_rel: cobyla.ftol_rel,
..StopTols::default()
}),
) {
Expand Down Expand Up @@ -1158,7 +1251,7 @@ mod tests {
ConstantMean::default(),
SquaredExponentialCorr::default(),
)
.initial_theta(Some(vec![0.1]))
.theta_init(vec![0.1])
.kpls_dim(Some(1))
.fit(&Dataset::new(xt, yt))
.expect("GP fit error");
Expand All @@ -1181,7 +1274,7 @@ mod tests {
[<$regr Mean>]::default(),
[<$corr Corr>]::default(),
)
.initial_theta(Some(vec![0.1]))
.theta_init(vec![0.1])
.fit(&Dataset::new(xt, yt))
.expect("GP fit error");
let yvals = gp
Expand Down
2 changes: 1 addition & 1 deletion gp/src/correlation_models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::convert::TryFrom;
use std::fmt;

/// A trait for using a correlation model in GP regression
pub trait CorrelationModel<F: Float>: Clone + Copy + Default + fmt::Display {
pub trait CorrelationModel<F: Float>: Clone + Copy + Default + fmt::Display + Sync {
/// Compute correlation function matrix r(x, x') given distances `d` between x and x',
/// `theta` parameters, and PLS `weights`, where:
/// `theta` : hyperparameters (1xd)
Expand Down
5 changes: 1 addition & 4 deletions gp/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ pub enum GpError {
/// When PLS fails
#[error("PLS error: {0}")]
PlsError(#[from] linfa_pls::PlsError),
/// When a value is invalid
#[error("PLS error: {0}")]
InvalidValue(String),
/// When a linfa error occurs
#[error(transparent)]
LinfaError(#[from] linfa::error::Error),
Expand All @@ -36,7 +33,7 @@ pub enum GpError {
/// When error during loading
#[error("Load error: {0}")]
LoadError(String),
/// When error during loading
/// When error dur to a bad value
#[error("InvalidValue error: {0}")]
InvalidValueError(String),
}
8 changes: 4 additions & 4 deletions gp/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ mod algorithm;
pub mod correlation_models;
mod errors;
pub mod mean_models;
mod sgp_algorithm;
mod sparse_algorithm;

mod parameters;
mod sgp_parameters;
mod sparse_parameters;
mod utils;

pub use algorithm::*;
pub use errors::*;
pub use parameters::*;
pub use sgp_algorithm::*;
pub use sgp_parameters::*;
pub use sparse_algorithm::*;
pub use sparse_parameters::*;
Loading
Loading