diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst index 3506da7..d146376 100644 --- a/docs/source/tutorials.rst +++ b/docs/source/tutorials.rst @@ -4,6 +4,7 @@ Tutorials .. toctree:: :maxdepth: 2 - tutorials/himmelblau.ipynb + tutorials/introduction.ipynb tutorials/hyperparameters.ipynb + tutorials/pareto-fronts.ipynb tutorials/passive-dofs.ipynb diff --git a/docs/source/tutorials/himmelblau.ipynb b/docs/source/tutorials/introduction.ipynb similarity index 98% rename from docs/source/tutorials/himmelblau.ipynb rename to docs/source/tutorials/introduction.ipynb index 7925a58..143fb6a 100644 --- a/docs/source/tutorials/himmelblau.ipynb +++ b/docs/source/tutorials/introduction.ipynb @@ -165,7 +165,7 @@ "metadata": {}, "outputs": [], "source": [ - "RE(agent.learn(\"quasi-random\", n=32))\n", + "RE(agent.learn(\"quasi-random\", n=36))\n", "agent.plot_objectives()" ] }, @@ -294,7 +294,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.11.5 64-bit", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -308,7 +308,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.0" }, "vscode": { "interpreter": { diff --git a/docs/source/tutorials/pareto-fronts.ipynb b/docs/source/tutorials/pareto-fronts.ipynb new file mode 100644 index 0000000..4b8a13c --- /dev/null +++ b/docs/source/tutorials/pareto-fronts.ipynb @@ -0,0 +1,159 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "e7b5e13a-c059-441d-8d4f-fff080d52054", + "metadata": {}, + "source": [ + "# Multiobjective optimization with Pareto front mapping\n", + "\n", + "One way to do multiobjective optimization is with Pareto optimization, which explores the set of Pareto-efficient points. A point is Pareto-efficient if there are no other valid points that are better at every objective: it shows the \"trade-off\" between several objectives. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cac0177b-576c-4f01-b306-2e9e8544a05c", + "metadata": {}, + "outputs": [], + "source": [ + "from blop.utils import prepare_re_env\n", + "\n", + "%run -i $prepare_re_env.__file__ --db-type=temp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "120812d8-5de2-4efa-8fc6-2d8e4cd0693e", + "metadata": {}, + "outputs": [], + "source": [ + "from blop import DOF, Objective, Agent\n", + "from blop.utils import functions\n", + "from blop.dofs import BrownianMotion\n", + "\n", + "import numpy as np\n", + "\n", + "\n", + "def digestion(db, uid):\n", + " products = db[uid].table()\n", + "\n", + " for index, entry in products.iterrows():\n", + " x1, x2 = entry.x1, entry.x2\n", + "\n", + " products.loc[index, \"f1\"] = (x1 - 2) ** 2 + (x2 - 1) + 2\n", + " products.loc[index, \"f2\"] = 9 * x1 - (x2 - 1) + 2\n", + " products.loc[index, \"c1\"] = x1**2 + x2**2\n", + " products.loc[index, \"c2\"] = x1 - 3 * x2 + 10\n", + "\n", + " return products\n", + "\n", + "\n", + "dofs = [\n", + " DOF(name=\"x1\", search_domain=(-20, 20)),\n", + " DOF(name=\"x2\", search_domain=(-20, 20)),\n", + "]\n", + "\n", + "\n", + "objectives = [\n", + " Objective(name=\"f1\", target=\"min\"),\n", + " Objective(name=\"f2\", target=\"min\"),\n", + " Objective(name=\"c1\", target=(-np.inf, 225)),\n", + " Objective(name=\"c2\", target=(-np.inf, 0)),\n", + "]\n", + "\n", + "agent = Agent(\n", + " dofs=dofs,\n", + " objectives=objectives,\n", + " digestion=digestion,\n", + " db=db,\n", + ")\n", + "\n", + "(uid,) = RE(agent.learn(\"qr\", n=64))" + ] + }, + { + "cell_type": "markdown", + "id": "d81c6af2-0a4c-4d31-9b7c-cc3065550b98", + "metadata": {}, + "source": [ + "We can plot our fitness and constraint objectives to see their models:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6c08f53-e96b-4987-ba3d-a93c84468b0b", + "metadata": {}, + "outputs": [], + "source": [ + "agent.plot_objectives()" + ] + }, + { + "cell_type": "markdown", + "id": "48b976e6-048a-4e16-9a1c-500203a2e195", + "metadata": {}, + "source": [ + "We can plot the Pareto front (the set of all Pareto-efficient points), which shows the trade-off between the two fitnesses. The points in blue comprise the Pareto front, while the points in red are either not Pareto efficient or are invalidated by one of the constraints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "990a877e-f533-419c-bf5d-569ad7e72c6b", + "metadata": {}, + "outputs": [], + "source": [ + "agent.plot_pareto_front()" + ] + }, + { + "cell_type": "markdown", + "id": "29d7f4fb-8f25-4b57-982f-28737fad2a7c", + "metadata": {}, + "source": [ + "We can explore the Pareto front by choosing a random point on the Pareto front and computing the expected improvement in the hypervolume of all fitness objectives with respect to that point (called the \"reference point\"). All this is done automatically with the `qnehvi` acquisition function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b49e6233-b228-43a3-9d8a-722a82e93443", + "metadata": {}, + "outputs": [], + "source": [ + "# this is broken now but is fixed in the next PR\n", + "# RE(agent.learn(\"qnehvi\", n=4))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + }, + "vscode": { + "interpreter": { + "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/tutorials/passive-dofs.ipynb b/docs/source/tutorials/passive-dofs.ipynb index 8e53358..a79ae5b 100644 --- a/docs/source/tutorials/passive-dofs.ipynb +++ b/docs/source/tutorials/passive-dofs.ipynb @@ -94,7 +94,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.0" }, "vscode": { "interpreter": { diff --git a/src/blop/agent.py b/src/blop/agent.py index 3b9d4c8..cf57290 100644 --- a/src/blop/agent.py +++ b/src/blop/agent.py @@ -17,15 +17,19 @@ import pandas as pd import scipy as sp import torch + +# from botorch.utils.transforms import normalize +from botorch.acquisition.objective import ScalarizedPosteriorTransform from botorch.models.deterministic import GenericDeterministicModel from botorch.models.model_list_gp_regression import ModelListGP from botorch.models.transforms.input import AffineInputTransform, ChainedInputTransform, Log10, Normalize from databroker import Broker from ophyd import Signal -from . import utils -from .bayesian import acquisition, models, plotting -from .bayesian.transforms import TargetingPosteriorTransform +from . import plotting, utils +from .bayesian import acquisition, models + +# from .bayesian.transforms import TargetingPosteriorTransform from .digestion import default_digestion_function from .dofs import DOF, DOFList from .objectives import Objective, ObjectiveList @@ -35,7 +39,7 @@ mpl.rc("image", cmap="coolwarm") -DEFAULT_MAX_SAMPLES = 2**11 +DEFAULT_MAX_SAMPLES = 3200 def _validate_dofs_and_objs(dofs: DOFList, objs: ObjectiveList): @@ -532,7 +536,7 @@ def benchmark( @property def model(self): - """A model encompassing all the objectives. A single GP in the single-objective case, or a model list.""" + """A model encompassing all the fitnesses and constraints.""" return ( ModelListGP(*[obj.model for obj in self.active_objs]) if len(self.active_objs) > 1 else self.active_objs[0].model ) @@ -542,32 +546,93 @@ def posterior(self, x): return self.model.posterior(torch.tensor(x)) @property - def targeting_transform(self): - return TargetingPosteriorTransform( - weights=torch.tensor(self.active_objs.weights, dtype=torch.double), targets=self.active_objs.targets + def fitness_model(self): + return ( + ModelListGP(*[obj.model for obj in self.active_objs if obj.type == "fitness"]) + if len(self.active_objs) > 1 + else self.active_objs[0].model ) @property - def scalarized_objectives(self): - """Returns a (n_obs,) array of scalarized objectives""" - return self.targeting_transform.evaluate(self.train_targets(active=True)).sum(axis=-1) + def fitness_weights(self): + return torch.tensor([obj.weight for obj in self.objectives if obj.type == "fitness"], dtype=torch.double) + + @property + def fitness_scalarization(self): + return ScalarizedPosteriorTransform(weights=self.fitness_weights) + + @property + def evaluated_fitnesses(self): + return self.train_targets()[:, self.objectives.type == "fitness"] @property - def max_scalarized_objective(self): - """Returns the value of the best scalarized objective seen so far.""" - f = self.scalarized_objectives - return np.max(np.where(np.isnan(f), -np.inf, f)) + def scalarized_fitnesses(self): + return self.fitness_scalarization.evaluate(self.evaluated_fitnesses) + + @property + def evaluated_constraints(self): + if sum(self.objectives.type == "constraint"): + y = self.train_targets() + return torch.cat( + [ + ((y[:, i] >= obj.target[0]) & (y[:, i] <= obj.target[1])).unsqueeze(0) + for i, obj in enumerate(self.objectives) + if obj.type == "constraint" + ], + dim=0, + ).T + else: + return torch.ones(size=(len(self.table), 0), dtype=torch.bool) + + @property + def argmax_best_f(self): + f = self.scalarized_fitnesses + c = self.evaluated_constraints.all(axis=-1) + mask = (~f.isnan()) & c + + if not mask.sum(): + raise ValueError("There are no valid points that satisfy the constraints!") + + return torch.where(mask)[0][f[mask].argmax()] @property - def argmax_scalarized_objective(self): - """Returns the index of the best scalarized objective seen so far.""" - f = self.scalarized_objectives - return np.argmax(np.where(np.isnan(f), -np.inf, f)) + def best_f(self): + return self.scalarized_fitnesses[self.argmax_best_f] + + @property + def pareto_front_mask(self): + """ + A mask for all data points that is true when the point is on the Pareto front. + A point is on the Pareto front if it is Pareto dominant + A point is Pareto dominant if there is no other point that is better at every objective + Points that violate any constraint are excluded. + """ + y = self.train_targets()[:, self.objectives.type == "fitness"] + in_pareto_front = ~(y.unsqueeze(1) > y.unsqueeze(0)).all(axis=-1).any(axis=0) + all_constraints_satisfied = self.evaluated_constraints.all(axis=-1) + return in_pareto_front & all_constraints_satisfied + + @property + def pareto_front(self): + """ + A subset of the data table containing only points on the Pareto front. + """ + return self.table.loc[self.pareto_front_mask.numpy()] + + @property + def min_ref_point(self): + y = self.train_targets()[:, self.objectives.type == "fitness"] + return y[y.argmax(axis=0)].min(axis=0).values + + @property + def random_ref_point(self): + pareto_front_index = torch.where(self.pareto_front_mask)[0] + return self.evaluated_fitnesses[np.random.choice(pareto_front_index)] @property def all_objectives_valid(self): """A mask of whether all objectives are valid for each data point.""" - return ~torch.isnan(self.scalarized_objectives) + return ~torch.isnan(self.scalarized_fitnesses) def _train_model(self, model, hypers=None, **kwargs): """Fit all of the agent's models. All kwargs are passed to `botorch.fit.fit_gpytorch_mll`.""" @@ -621,7 +686,7 @@ def _construct_model(self, obj, skew_dims=None): trusted.long(), learn_additional_noise=True ) - obj.validity_conjugate_model = models.LatentDirichletModel( + obj.validity_conjugate_model = models.LatentDirichletClassifier( train_inputs=train_inputs[inputs_are_trusted], train_targets=dirichlet_likelihood.transformed_targets.transpose(-1, -2)[inputs_are_trusted].double(), skew_dims=skew_dims, @@ -760,12 +825,12 @@ def constraint(self, x): p = torch.ones(x.shape[:-1]) for obj in self.active_objs: # if the targeting constraint is non-trivial - if obj.use_as_constraint: + if obj.type == "constraint": p *= obj.targeting_constraint(x) # if the validity constaint is non-trivial if obj.validity_conjugate_model is not None: p *= obj.validity_constraint(x) - return p + return p # + 1e-6 * normalize(x, self._sample_domain).square().sum(axis=-1) @property def hypers(self) -> dict: @@ -848,22 +913,20 @@ def train_targets(self, index=None, **subset_kwargs): return torch.cat([self.train_targets(obj.name) for obj in self.objectives], dim=-1) obj = self.objectives[index] - targets = self.table.loc[:, obj.name].values.copy() + values = self.table.loc[:, obj.name].values.copy() # check that targets values are inside acceptable values - valid = (targets >= obj._trust_domain[0]) & (targets <= obj._trust_domain[1]) - targets = np.where(valid, targets, np.nan) + valid = (values >= obj._trust_domain[0]) & (values <= obj._trust_domain[1]) + values = np.where(valid, values, np.nan) - # transform if needed - if obj.log: - targets = np.where(targets > 0, np.log(targets), np.nan) + targets = obj.fitness_forward(values) return torch.tensor(targets, dtype=torch.double).unsqueeze(-1) @property def best(self): """Returns all data for the best point.""" - return self.table.loc[self.argmax_scalarized_objective] + return self.table.loc[self.argmax_best_f.item()] @property def best_inputs(self): @@ -941,3 +1004,7 @@ def plot_history(self, **kwargs): @property def latent_transforms(self): return {obj.name: obj.model.covar_module.latent_transform for obj in self.active_objs} + + def plot_pareto_front(self, **kwargs): + """Plot the improvement of the agent over time.""" + plotting._plot_pareto_front(self, **kwargs) diff --git a/src/blop/bayesian/acquisition/__init__.py b/src/blop/bayesian/acquisition/__init__.py index 24bdacf..9d807d7 100644 --- a/src/blop/bayesian/acquisition/__init__.py +++ b/src/blop/bayesian/acquisition/__init__.py @@ -1,6 +1,7 @@ import os import yaml +from botorch.utils.transforms import normalize from . import analytic, monte_carlo from .analytic import * # noqa F401 @@ -42,53 +43,53 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb if acq_func_name == "expected_improvement": acq_func = analytic.ConstrainedLogExpectedImprovement( constraint=agent.constraint, - model=agent.model, - best_f=agent.max_scalarized_objective, - posterior_transform=agent.targeting_transform, + model=agent.fitness_model, + best_f=agent.best_f, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {}} elif acq_func_name == "monte_carlo_expected_improvement": acq_func = monte_carlo.qConstrainedExpectedImprovement( constraint=agent.constraint, - model=agent.model, - best_f=agent.max_scalarized_objective, - posterior_transform=agent.targeting_transform, + model=agent.fitness_model, + best_f=agent.best_f, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {}} elif acq_func_name == "probability_of_improvement": acq_func = analytic.ConstrainedLogProbabilityOfImprovement( constraint=agent.constraint, - model=agent.model, - best_f=agent.max_scalarized_objective, - posterior_transform=agent.targeting_transform, + model=agent.fitness_model, + best_f=agent.best_f, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {}} elif acq_func_name == "monte_carlo_probability_of_improvement": acq_func = monte_carlo.qConstrainedProbabilityOfImprovement( constraint=agent.constraint, - model=agent.model, - best_f=agent.max_scalarized_objective, - posterior_transform=agent.targeting_transform, + model=agent.fitness_model, + best_f=agent.best_f, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {}} elif acq_func_name == "lower_bound_max_value_entropy": acq_func = monte_carlo.qConstrainedLowerBoundMaxValueEntropy( constraint=agent.constraint, - model=agent.model, + model=agent.fitness_model, candidate_set=agent.test_inputs(n=1024).squeeze(1), ) acq_func_meta = {"name": acq_func_name, "args": {}} - elif acq_func_name == "noisy_expected_hypervolume_improvement": + elif acq_func_name == "monte_carlo_noisy_expected_hypervolume_improvement": acq_func = monte_carlo.qConstrainedNoisyExpectedHypervolumeImprovement( constraint=agent.constraint, - model=agent.model, - ref_point=agent.train_targets.min(dim=0).values, - X_baseline=agent.train_inputs, + model=agent.fitness_model, + ref_point=agent.random_ref_point, + X_baseline=normalize(agent.train_inputs(), agent._sample_domain), prune_baseline=True, ) acq_func_meta = {"name": acq_func_name, "args": {}} @@ -98,9 +99,9 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb acq_func = analytic.ConstrainedUpperConfidenceBound( constraint=agent.constraint, - model=agent.model, + model=agent.fitness_model, beta=beta, - posterior_transform=agent.targeting_transform, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {"beta": beta}} @@ -109,9 +110,9 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb acq_func = monte_carlo.qConstrainedUpperConfidenceBound( constraint=agent.constraint, - model=agent.model, + model=agent.fitness_model, beta=beta, - posterior_transform=agent.targeting_transform, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {"beta": beta}} diff --git a/src/blop/bayesian/acquisition/config.yml b/src/blop/bayesian/acquisition/config.yml index 2fac116..512470a 100644 --- a/src/blop/bayesian/acquisition/config.yml +++ b/src/blop/bayesian/acquisition/config.yml @@ -48,6 +48,14 @@ noisy_expected_hypervolume_improvement: pretty_name: Noisy expected hypervolume improvement type: analytic +monte_carlo_noisy_expected_hypervolume_improvement: + description: It's like a big box. How big is the box? + identifiers: + - qnehvi + multitask_only: true + pretty_name: Noisy expected hypervolume improvement + type: monte_carlo + probability_of_improvement: description: The probability that this input improves on the current maximum. identifiers: diff --git a/src/blop/bayesian/models.py b/src/blop/bayesian/models.py index 05c7268..59ca6e9 100644 --- a/src/blop/bayesian/models.py +++ b/src/blop/bayesian/models.py @@ -1,11 +1,11 @@ -import botorch import gpytorch import torch +from botorch.models.gp_regression import SingleTaskGP from . import kernels -class LatentGP(botorch.models.gp_regression.SingleTaskGP): +class LatentGP(SingleTaskGP): def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs): super().__init__(train_inputs, train_targets, *args, **kwargs) @@ -23,7 +23,22 @@ def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs) self.trained = False -class LatentDirichletModel(LatentGP): +class LatentConstraintModel(LatentGP): + def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs): + super().__init__(train_inputs, train_targets, skew_dims, *args, **kwargs) + + self.trained = False + + def fitness(self, x, n_samples=1024): + """ + Takes in a (..., m) dimension tensor and returns a (..., n_classes) tensor + """ + *input_shape, n_dim = x.shape + samples = self.posterior(x.reshape(-1, n_dim)).sample(torch.Size((n_samples,))).exp() + return (samples / samples.sum(-1, keepdim=True)).mean(0).reshape(*input_shape, -1) + + +class LatentDirichletClassifier(LatentGP): def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs): super().__init__(train_inputs, train_targets, skew_dims, *args, **kwargs) diff --git a/src/blop/bayesian/transforms.py b/src/blop/bayesian/transforms.py deleted file mode 100644 index 82bdb9d..0000000 --- a/src/blop/bayesian/transforms.py +++ /dev/null @@ -1,77 +0,0 @@ -from typing import Union - -import botorch -from botorch.acquisition.objective import PosteriorTransform -from botorch.posteriors.gpytorch import GPyTorchPosterior -from botorch.posteriors.posterior_list import PosteriorList -from torch import Tensor - - -def targeting_transform(y, target): - if target == "max": - return y - if target == "min": - return -y - elif not isinstance(target, tuple): - return -(y - target).abs() - else: - return -((y - 0.5 * (target[1] + target[0])).abs() - 0.5 * (target[1] - target[0])).clamp(min=0) - - -class TargetingPosteriorTransform(PosteriorTransform): - r"""An affine posterior transform for scalarizing multi-output posteriors.""" - - scalarize: bool = True - - def __init__(self, weights: Tensor, targets: Tensor) -> None: - r""" - Args: - weights: A one-dimensional tensor with `m` elements representing the - linear weights on the outputs. - offset: An offset to be added to posterior mean. - """ - super().__init__() - self.targets = targets - self.register_buffer("weights", weights) - - def sample_transform(self, y): - for i, target in enumerate(self.targets): - y[..., i] = targeting_transform(y[..., i], target) - return y @ self.weights.unsqueeze(-1) - - def mean_transform(self, mean, var): - for i, target in enumerate(self.targets): - mean[..., i] = targeting_transform(mean[..., i], target) - return mean @ self.weights.unsqueeze(-1) - - def variance_transform(self, mean, var): - return var @ self.weights.unsqueeze(-1) - - def evaluate(self, Y: Tensor) -> Tensor: - r"""Evaluate the transform on a set of outcomes. - - Args: - Y: A `batch_shape x q x m`-dim tensor of outcomes. - - Returns: - A `batch_shape x q`-dim tensor of transformed outcomes. - """ - return self.sample_transform(Y) - - def forward(self, posterior: Union[GPyTorchPosterior, PosteriorList]) -> GPyTorchPosterior: - r"""Compute the posterior of the affine transformation. - - Args: - posterior: A posterior with the same number of outputs as the - elements in `self.weights`. - - Returns: - A single-output posterior. - """ - - return botorch.posteriors.transformed.TransformedPosterior( - posterior, - sample_transform=self.sample_transform, - mean_transform=self.mean_transform, - variance_transform=self.variance_transform, - ) diff --git a/src/blop/dofs.py b/src/blop/dofs.py index a78a8f0..af23fcc 100644 --- a/src/blop/dofs.py +++ b/src/blop/dofs.py @@ -105,14 +105,16 @@ def __post_init__(self): if not self.read_only: raise ValueError("You must specify search_domain if the device is not read-only.") else: - self.search_domain = tuple(self.search_domain) if len(self.search_domain) != 2: - raise ValueError("'search_domain' must be a 2-tuple of floats.") + raise ValueError("'search_domain' must have length 2.") + self.search_domain = tuple([float(self.search_domain[0]), float(self.search_domain[1])]) if self.search_domain[0] > self.search_domain[1]: raise ValueError("The lower search bound must be less than the upper search bound.") if self.trust_domain is not None: - self.trust_domain = tuple(self.trust_domain) + if len(self.trust_domain) != 2: + raise ValueError("'trust_domain' must have length 2.") + self.trust_domain = tuple([float(self.trust_domain[0]), float(self.trust_domain[1])]) if not self.read_only: if (self.search_domain[0] < self.trust_domain[0]) or (self.search_domain[1] > self.trust_domain[1]): raise ValueError("Trust domain must be larger than search domain.") diff --git a/src/blop/objectives.py b/src/blop/objectives.py index 97aa1a4..71b7f38 100644 --- a/src/blop/objectives.py +++ b/src/blop/objectives.py @@ -5,7 +5,8 @@ import numpy as np import pandas as pd import torch -from torch.special import erf + +from .utils.functions import approximate_erf DEFAULT_MIN_NOISE_LEVEL = 1e-6 DEFAULT_MAX_NOISE_LEVEL = 1e0 @@ -76,7 +77,7 @@ class Objective: name: str description: str = "" - type: str = "continuous" + type: str = None target: Union[Tuple[float, float], float, str] = "max" log: bool = False weight: float = 1.0 @@ -96,7 +97,7 @@ def __post_init__(self): if self.log and not self.target > 0: return ValueError("'target' must strictly positive if log=True.") - self.use_as_constraint = True if isinstance(self.target, tuple) else False + self.type = "fitness" if self.target in ["min", "max"] else "constraint" @property def _trust_domain(self): @@ -152,7 +153,45 @@ def targeting_constraint(self, x: torch.Tensor) -> torch.Tensor: m = p.mean s = p.variance.sqrt() - return 0.5 * (erf((b - m) / (np.sqrt(2) * s)) - erf((a - m) / (np.sqrt(2) * s)))[..., -1] + return 0.5 * (approximate_erf((b - m) / (np.sqrt(2) * s)) - approximate_erf((a - m) / (np.sqrt(2) * s)))[..., -1] + + def fitness_forward(self, y): + f = y + if self.log: + f = np.log(f) + if self.target == "min": + f = -f + return f + + def fitness_inverse(self, f): + y = f + if self.target == "min": + y = -y + if self.log: + y = np.exp(y) + return y + + @property + def is_fitness(self): + return self.target in ["min", "max"] + + def value_prediction(self, X): + p = self.model.posterior(X) + + if self.is_fitness: + return self.fitness_inverse(p.mean) + + if isinstance(self.target, tuple): + return p.mean + + def fitness_prediction(self, X): + p = self.model.posterior(X) + + if self.is_fitness: + return self.fitness_inverse(p.mean) + + if isinstance(self.target, tuple): + return self.targeting_constraint(X).log().clamp(min=-16) class ObjectiveList(Sequence): @@ -197,13 +236,13 @@ def summary(self) -> pd.DataFrame: for attr, dtype in OBJ_FIELD_TYPES.items(): table[attr] = table[attr].astype(dtype) - return table + return table.T def __repr__(self): return self.summary.__repr__() - def __repr_html__(self): - return self.summary.__repr_html__() + def _repr_html_(self): + return self.summary._repr_html_() @property def descriptions(self) -> list: @@ -233,6 +272,13 @@ def weights(self) -> np.array: """ return np.array([obj.weight for obj in self.objectives]) + @property + def is_fitness(self) -> np.array: + """ + Returns an array of the objective weights. + """ + return np.array([obj.target in ["min", "max"] for obj in self.objectives]) + @property def signed_weights(self) -> np.array: """ diff --git a/src/blop/bayesian/plotting.py b/src/blop/plotting.py similarity index 69% rename from src/blop/bayesian/plotting.py rename to src/blop/plotting.py index 568ad6e..7535aa7 100644 --- a/src/blop/bayesian/plotting.py +++ b/src/blop/plotting.py @@ -3,10 +3,11 @@ from matplotlib import pyplot as plt from matplotlib.patches import Patch -from . import acquisition +from .bayesian import acquisition DEFAULT_COLOR_LIST = ["dodgerblue", "tomato", "mediumseagreen", "goldenrod"] -DEFAULT_COLORMAP = "magma" +# Note: the values near 1 are hard to see on a white background. Turbo goes from red to blue and isn't white in the middle. +DEFAULT_COLORMAP = "turbo" DEFAULT_SCATTER_SIZE = 16 MAX_TEST_INPUTS = 2**11 @@ -89,77 +90,131 @@ def _plot_objs_many_dofs(agent, axes=(0, 1), shading="nearest", cmap=DEFAULT_COL for obj_index, obj in enumerate(agent.objectives): targets = agent.train_targets(obj.name).squeeze(-1).numpy() - obj_vmin, obj_vmax = np.nanpercentile(targets, q=[1, 99]) + values = obj.fitness_inverse(targets) + + val_vmin, val_vmax = np.quantile(values, q=[0.01, 0.99]) + val_norm = mpl.colors.LogNorm(val_vmin, val_vmax) if obj.log else mpl.colors.Normalize(val_vmin, val_vmax) + + obj_vmin, obj_vmax = np.quantile(targets, q=[0.01, 0.99]) obj_norm = mpl.colors.Normalize(obj_vmin, obj_vmax) - data_ax = agent.obj_axes[obj_index, 0].scatter(x_values, y_values, c=targets, s=size, norm=obj_norm, cmap=cmap) + val_ax = agent.obj_axes[obj_index, 0].scatter(x_values, y_values, c=values, s=size, norm=val_norm, cmap=cmap) # mean and sigma will have shape (*input_shape,) test_posterior = obj.model.posterior(test_inputs) test_mean = test_posterior.mean[..., 0, 0].detach().squeeze().numpy() test_sigma = test_posterior.variance.sqrt()[..., 0, 0].detach().squeeze().numpy() + # test_values = obj.fitness_inverse(test_mean) if obj.is_fitness else test_mean + + test_constraint = None + if not obj.is_fitness: + test_constraint = obj.targeting_constraint(test_inputs).detach().squeeze().numpy() + if gridded: - _ = agent.obj_axes[obj_index, 1].pcolormesh( - test_x, - test_y, - test_mean, - shading=shading, - cmap=cmap, - norm=obj_norm, - ) - sigma_ax = agent.obj_axes[obj_index, 2].pcolormesh( - test_x, - test_y, - test_sigma, - shading=shading, - cmap=cmap, - norm=mpl.colors.LogNorm(), - ) + # _ = agent.obj_axes[obj_index, 1].pcolormesh( + # test_x, + # test_y, + # test_values, + # shading=shading, + # cmap=cmap, + # norm=val_norm, + # ) + if obj.is_fitness: + fitness_ax = agent.obj_axes[obj_index, 1].pcolormesh( + test_x, + test_y, + test_mean, + shading=shading, + cmap=cmap, + norm=obj_norm, + ) + fit_err_ax = agent.obj_axes[obj_index, 2].pcolormesh( + test_x, + test_y, + test_sigma, + shading=shading, + cmap=cmap, + norm=mpl.colors.LogNorm(), + ) + + if test_constraint is not None: + constraint_ax = agent.obj_axes[obj_index, 3].pcolormesh( + test_x, + test_y, + test_constraint, + shading=shading, + cmap=cmap, + # norm=mpl.colors.LogNorm(), + ) else: - _ = agent.obj_axes[obj_index, 1].scatter( - test_x, - test_y, - c=test_mean, - s=size, - norm=obj_norm, - cmap=cmap, - ) - sigma_ax = agent.obj_axes[obj_index, 2].scatter( - test_x, - test_y, - c=test_sigma, - s=size, - cmap=cmap, - norm=mpl.colors.LogNorm(), + # _ = agent.obj_axes[obj_index, 1].scatter( + # test_x, + # test_y, + # c=test_values, + # s=size, + # norm=val_norm, + # cmap=cmap, + # ) + if obj.is_fitness: + fitness_ax = agent.obj_axes[obj_index, 1].scatter( + test_x, + test_y, + c=test_mean, + s=size, + norm=obj_norm, + cmap=cmap, + ) + fit_err_ax = agent.obj_axes[obj_index, 2].scatter( + test_x, + test_y, + c=test_sigma, + s=size, + cmap=cmap, + norm=mpl.colors.LogNorm(), + ) + + if test_constraint is not None: + constraint_ax = agent.obj_axes[obj_index, 3].scatter( + test_x, + test_y, + c=test_constraint, + s=size, + cmap=cmap, + norm=mpl.colors.LogNorm(), + ) + + val_cbar = agent.obj_fig.colorbar(val_ax, ax=agent.obj_axes[obj_index, 0], location="bottom", aspect=32, shrink=0.8) + val_cbar.set_label(f"{obj.units or ''}") + + if obj.is_fitness: + _ = agent.obj_fig.colorbar(fitness_ax, ax=agent.obj_axes[obj_index, 1], location="bottom", aspect=32, shrink=0.8) + _ = agent.obj_fig.colorbar(fit_err_ax, ax=agent.obj_axes[obj_index, 2], location="bottom", aspect=32, shrink=0.8) + + # obj_cbar.set_label(f"{obj.label}") + # err_cbar.set_label(f"{obj.label}") + + if test_constraint is not None: + constraint_cbar = agent.obj_fig.colorbar( + constraint_ax, ax=agent.obj_axes[obj_index, 3], location="bottom", aspect=32, shrink=0.8 ) - obj_cbar = agent.obj_fig.colorbar( - data_ax, ax=agent.obj_axes[obj_index, :2], location="bottom", aspect=32, shrink=0.8 - ) - err_cbar = agent.obj_fig.colorbar( - sigma_ax, ax=agent.obj_axes[obj_index, 2], location="bottom", aspect=32, shrink=0.8 - ) - obj_cbar.set_label(obj.label) - err_cbar.set_label(f"{obj.label} error") - - col_names = ["samples", "posterior mean", "posterior std. dev.", "fitness"] - - pad = 5 - - for column_index, ax in enumerate(agent.obj_axes[0]): - ax.annotate( - col_names[column_index], - xy=(0.5, 1), - xytext=(0, pad), - color="k", - xycoords="axes fraction", - textcoords="offset points", - size="large", - ha="center", - va="baseline", - ) + constraint_cbar.set_label(f"{obj.label} constraint") + + col_names = [ + f"{obj.description} samples", + f"pred. {obj.description} fitness", + f"pred. {obj.description} fitness error", + f"{obj.description} constraint", + ] + + pad = 5 + + for column_index, ax in enumerate(agent.obj_axes[obj_index]): + ax.set_title( + col_names[column_index], + ) if len(agent.objectives) > 1: for row_index, ax in enumerate(agent.obj_axes[:, 0]): @@ -370,7 +425,7 @@ def _plot_history(agent, x_key="index", show_all_objs=False): hist_axes[obj_index].plot(x, y, lw=5e-1, c="k") hist_axes[obj_index].set_ylabel(obj.key) - y = agent.scalarized_objectives + y = agent.scalarized_fitnesses cummax_y = np.array([np.nanmax(y[: i + 1]) for i in range(len(y))]) @@ -406,3 +461,25 @@ def inspect_beam(agent, index, border=None): if border is not None: plt.xlim(x_min - border * width_x, x_min + border * width_x) plt.ylim(y_min - border * width_y, y_min + border * width_y) + + +def _plot_pareto_front(agent, obj_indices=(0, 1)): + f_objs = agent.objectives[agent.objectives.type == "fitness"] + (i, j) = obj_indices + + if len(f_objs) < 2: + raise ValueError("Cannot plot Pareto front for agents with fewer than two fitness objectives") + + fig, ax = plt.subplots(1, 1, figsize=(6, 6)) + + y = agent.train_targets()[:, agent.objectives.type == "fitness"] + + front_mask = agent.pareto_front_mask + + ax.scatter(y[front_mask, i], y[front_mask, j], c="b", label="Pareto front") + ax.scatter(y[~front_mask, i], y[~front_mask, j], c="r") + + ax.set_xlabel(f"{f_objs[i].name} fitness") + ax.set_ylabel(f"{f_objs[j].name} fitness") + + ax.legend() diff --git a/src/blop/tests/conftest.py b/src/blop/tests/conftest.py index 99558ea..43196a9 100644 --- a/src/blop/tests/conftest.py +++ b/src/blop/tests/conftest.py @@ -70,7 +70,7 @@ def agent(db): @pytest.fixture(scope="function") -def agent_with_multiple_objs(db): +def mo_agent(db): """ A simple agent minimizing two Himmelblau's functions """ diff --git a/src/blop/tests/test_acq_funcs.py b/src/blop/tests/test_acq_funcs.py index be376b7..2a9d89d 100644 --- a/src/blop/tests/test_acq_funcs.py +++ b/src/blop/tests/test_acq_funcs.py @@ -14,12 +14,12 @@ def test_monte_carlo_acq_funcs_single_objective(agent, RE, acq_func): @pytest.mark.parametrize("acq_func", ["ei", "pi", "em", "ucb"]) -def test_analytic_acq_funcs_multi_objective(agent_with_multiple_objs, RE, acq_func): - RE(agent_with_multiple_objs.learn("qr", n=16)) - RE(agent_with_multiple_objs.learn(acq_func, n=1)) +def test_analytic_acq_funcs_multi_objective(mo_agent, RE, acq_func): + RE(mo_agent.learn("qr", n=16)) + RE(mo_agent.learn(acq_func, n=1)) @pytest.mark.parametrize("acq_func", ["qei", "qpi", "qem", "qucb"]) -def test_monte_carlo_acq_funcs_multi_objective(agent_with_multiple_objs, RE, acq_func): - RE(agent_with_multiple_objs.learn("qr", n=16)) - RE(agent_with_multiple_objs.learn(acq_func, n=4)) +def test_monte_carlo_acq_funcs_multi_objective(mo_agent, RE, acq_func): + RE(mo_agent.learn("qr", n=16)) + RE(mo_agent.learn(acq_func, n=4)) diff --git a/src/blop/tests/test_agent.py b/src/blop/tests/test_agent.py index 9789635..a04b432 100644 --- a/src/blop/tests/test_agent.py +++ b/src/blop/tests/test_agent.py @@ -4,6 +4,9 @@ def test_agent(agent, RE): RE(agent.learn("qr", n=4)) + assert [dof.name in agent.best for dof in agent.dofs] + assert [obj.name in agent.best for obj in agent.objectives] + def test_forget(agent, RE): RE(agent.learn("qr", n=4)) diff --git a/src/blop/tests/test_pareto.py b/src/blop/tests/test_pareto.py new file mode 100644 index 0000000..ddcc482 --- /dev/null +++ b/src/blop/tests/test_pareto.py @@ -0,0 +1,14 @@ +import pytest + + +@pytest.mark.test_func +def test_pareto(mo_agent, RE): + RE(mo_agent.learn("qr", n=16)) + + mo_agent.plot_pareto_front() + + +@pytest.mark.parametrize("acq_func", ["qnehvi"]) +def test_analytic_pareto_acq_funcs(mo_agent, RE, acq_func): + RE(mo_agent.learn("qr", n=4)) + RE(mo_agent.learn(acq_func, n=2)) diff --git a/src/blop/tests/test_plots.py b/src/blop/tests/test_plots.py index f28758a..e67836c 100644 --- a/src/blop/tests/test_plots.py +++ b/src/blop/tests/test_plots.py @@ -10,13 +10,13 @@ def test_plots(RE, agent): agent.plot_history() -def test_plots_multiple_objs(RE, agent_with_multiple_objs): - RE(agent_with_multiple_objs.learn("qr", n=16)) +def test_plots_multiple_objs(RE, mo_agent): + RE(mo_agent.learn("qr", n=16)) - agent_with_multiple_objs.plot_objectives() - agent_with_multiple_objs.plot_acquisition() - agent_with_multiple_objs.plot_validity() - agent_with_multiple_objs.plot_history() + mo_agent.plot_objectives() + mo_agent.plot_acquisition() + mo_agent.plot_validity() + mo_agent.plot_history() def test_plots_passive_dofs(RE, agent_with_passive_dofs): diff --git a/src/blop/utils/functions.py b/src/blop/utils/functions.py index 0a2e949..74f6f42 100644 --- a/src/blop/utils/functions.py +++ b/src/blop/utils/functions.py @@ -1,4 +1,13 @@ import numpy as np +import torch + + +def approximate_erf(x): + """ + An approximation of erf(x), to compute the definite integral of the Gaussian PDF + This is faster and better-conditioned near +/- infinity + """ + return torch.tanh(1.20278247 * x) def sigmoid(x):