From ed3f1d8dd61a55b9383e86290c1a70399fddb9e2 Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Sat, 15 Jul 2023 11:35:23 -0400 Subject: [PATCH 1/8] specify which dims are latent in the kernel --- .isort.cfg | 2 +- bloptools/bayesian/__init__.py | 73 ++++++++++++------- bloptools/bayesian/kernels.py | 71 +++++++++++------- bloptools/bayesian/models.py | 4 +- bloptools/tasks.py | 2 +- .../tutorials/latent-toroid-dimensions.ipynb | 11 +++ 6 files changed, 105 insertions(+), 58 deletions(-) diff --git a/.isort.cfg b/.isort.cfg index e0926f4..43f177b 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,4 +1,4 @@ [settings] -line_length = 115 +line_length = 125 multi_line_output = 3 include_trailing_comma = True diff --git a/bloptools/bayesian/__init__.py b/bloptools/bayesian/__init__.py index c43e64c..5166a3d 100644 --- a/bloptools/bayesian/__init__.py +++ b/bloptools/bayesian/__init__.py @@ -12,7 +12,7 @@ import torch from matplotlib import pyplot as plt -from .. import utils +from .. import devices, utils from . import acquisition, models mpl.rc("image", cmap="coolwarm") @@ -57,7 +57,7 @@ def __init__( dofs : iterable of ophyd objects The degrees of freedom that the agent can control, which determine the output of the model. bounds : iterable of lower and upper bounds - The bounds on each degree of freedom. This should be an array of shape (n_dof, 2). + The bounds on each degree of freedom. This should be an array of shape (n_dofs, 2). tasks : iterable of tasks The tasks which the agent will try to optimize. acquisition : Bluesky plan generator that takes arguments (dofs, inputs, dets) @@ -82,6 +82,8 @@ def __init__( self.acquisition_plan = kwargs.get("acquisition_plan", default_acquisition_plan) self.digestion = kwargs.get("digestion", default_digestion_plan) + self.decoherence = kwargs.get("decoherence", False) + self.tolerate_acquisition_errors = kwargs.get("tolerate_acquisition_errors", True) self.acquisition = acquisition.Acquisition() @@ -89,16 +91,12 @@ def __init__( self.dets = np.atleast_1d(kwargs.get("detectors", [])) self.passive_dofs = np.atleast_1d(kwargs.get("passive_dofs", [])) + if self.decoherence: + self.passive_dofs = np.append(self.passive_dofs, devices.TimeReadback(name="timestamp")) + for i, task in enumerate(self.tasks): task.index = i - self.n_active_dof = self.active_dofs.size - self.n_passive_dof = self.passive_dofs.size - - self.dofs = np.r_[self.active_dofs, self.passive_dofs] - - self.n_dof = self.n_active_dof + self.n_passive_dof - self.n_tasks = len(self.tasks) self.training_iter = kwargs.get("training_iter", 256) @@ -107,12 +105,14 @@ def __init__( # make some test points for sampling - self.normalized_test_active_inputs = utils.normalized_sobol_sampler(n=MAX_TEST_INPUTS, d=self.n_active_dof) + self.normalized_test_active_inputs = utils.normalized_sobol_sampler( + n=MAX_TEST_INPUTS, d=self.n_active_dofs + ) - n_per_active_dim = int(np.power(MAX_TEST_INPUTS, 1 / self.n_active_dof)) + n_per_active_dim = int(np.power(MAX_TEST_INPUTS, 1 / self.n_active_dofs)) self.normalized_test_active_inputs_grid = np.swapaxes( - np.r_[np.meshgrid(*self.n_active_dof * [np.linspace(0, 1, n_per_active_dim)])], 0, -1 + np.r_[np.meshgrid(*self.n_active_dofs * [np.linspace(0, 1, n_per_active_dim)])], 0, -1 ) self.table = pd.DataFrame() @@ -129,7 +129,23 @@ def active_inputs_sampler(self, n=MAX_TEST_INPUTS): """ Returns $n$ quasi-randomly sampled inputs in the bounded parameter space """ - return self.unnormalize_active_inputs(utils.normalized_sobol_sampler(n, self.n_active_dof)) + return self.unnormalize_active_inputs(utils.normalized_sobol_sampler(n, self.n_active_dofs)) + + @property + def dofs(self): + return np.append(self.active_dofs, self.passive_dofs) + + @property + def n_active_dofs(self): + return self.active_dofs.size + + @property + def n_passive_dofs(self): + return self.passive_dofs.size + + @property + def n_dofs(self): + return self.n_active_dofs + self.n_passive_dofs @property def test_active_inputs(self): @@ -155,15 +171,20 @@ def test_active_inputs_grid(self): @property def input_transform(self): - return botorch.models.transforms.input.Normalize(d=self.n_dof) + return botorch.models.transforms.input.Normalize(d=self.n_dofs) def save(self, filepath="./agent_data.h5"): """ Save the sampled inputs and targets of the agent to a file, which can be used to initialize a future agent. """ + with h5py.File(filepath, "w") as f: - f.create_dataset("inputs", data=self.inputs) + for task in self.tasks: + f.create_group(task.name) + for key, value in task.regressor.state_dict().items(): + f[task.name].create_dataset(key, data=value) + self.table.to_hdf(filepath, key="table") def forget(self, index): @@ -175,7 +196,7 @@ def sampler(self, n): """ min_power_of_two = 2 ** int(np.ceil(np.log(n) / np.log(2))) subset = np.random.choice(min_power_of_two, size=n, replace=False) - return sp.stats.qmc.Sobol(d=self.n_active_dof, scramble=True).random(n=min_power_of_two)[subset] + return sp.stats.qmc.Sobol(d=self.n_active_dofs, scramble=True).random(n=min_power_of_two)[subset] def initialize( self, @@ -479,7 +500,7 @@ def test_inputs(self): @property def test_inputs_grid(self): test_passive_inputs_grid = self.passive_inputs.values[-1] * np.ones( - (*self.test_active_inputs_grid.shape[:-1], self.n_passive_dof) + (*self.test_active_inputs_grid.shape[:-1], self.n_passive_dofs) ) return np.concatenate([self.test_active_inputs_grid, test_passive_inputs_grid], axis=-1) @@ -541,7 +562,7 @@ def passive_dof_bounds(self): @property def dof_is_active_mask(self): - return np.r_[np.ones(self.n_active_dof), np.zeros(self.n_passive_dof)].astype(bool) + return np.r_[np.ones(self.n_active_dofs), np.zeros(self.n_passive_dofs)].astype(bool) @property def dof_bounds(self): @@ -604,21 +625,21 @@ def go_to_best_sum_of_tasks(self): yield from self.go_to(self.best_sum_of_tasks_inputs) def plot_tasks(self, **kwargs): - if self.n_active_dof == 1: + if self.n_active_dofs == 1: self._plot_tasks_one_dof(**kwargs) else: self._plot_tasks_many_dofs(**kwargs) def plot_feasibility(self, **kwargs): - if self.n_active_dof == 1: + if self.n_active_dofs == 1: self._plot_feas_one_dof(**kwargs) else: self._plot_feas_many_dofs(**kwargs) def plot_acquisition(self, **kwargs): - if self.n_active_dof == 1: + if self.n_active_dofs == 1: self._plot_acq_one_dof(**kwargs) else: @@ -629,7 +650,7 @@ def _plot_feas_one_dof(self, size=32): self.class_ax.scatter(self.inputs.values, self.all_targets_valid.astype(int), s=size) - x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dof)).double() + x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dofs)).double() log_prob = self.dirichlet_classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) self.class_ax.plot(self.test_inputs_grid.ravel(), np.exp(log_prob)) @@ -638,7 +659,7 @@ def _plot_feas_one_dof(self, size=32): def _plot_feas_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLORMAP, size=32, gridded=None): if gridded is None: - gridded = self.n_dof == 2 + gridded = self.n_dofs == 2 self.class_fig, self.class_axes = plt.subplots( 1, 2, figsize=(8, 4), sharex=True, sharey=True, constrained_layout=True @@ -653,7 +674,7 @@ def _plot_feas_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLO ) if gridded: - x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dof)).double() + x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dofs)).double() log_prob = ( self.dirichlet_classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) ) @@ -718,7 +739,7 @@ def _plot_tasks_one_dof(self, size=32, lw=1e0): def _plot_tasks_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLORMAP, gridded=None, size=32): if gridded is None: - gridded = self.n_dof == 2 + gridded = self.n_dofs == 2 self.task_fig, self.task_axes = plt.subplots( self.n_tasks, @@ -832,7 +853,7 @@ def _plot_acq_many_dofs( ) if gridded is None: - gridded = self.n_active_dof == 2 + gridded = self.n_active_dofs == 2 self.acq_axes = np.atleast_1d(self.acq_axes) self.acq_fig.suptitle(f"(x,y)=({self.dofs[axes[0]].name},{self.dofs[axes[1]].name})") diff --git a/bloptools/bayesian/kernels.py b/bloptools/bayesian/kernels.py index 2fe6f0b..a5bc98f 100644 --- a/bloptools/bayesian/kernels.py +++ b/bloptools/bayesian/kernels.py @@ -11,22 +11,33 @@ class LatentKernel(gpytorch.kernels.Kernel): def __init__( self, num_inputs=1, - off_diag=True, + skew_dims=True, diag_prior=True, - scale_kernel=True, + scale_output=True, **kwargs, ): super(LatentKernel, self).__init__() self.num_inputs = num_inputs - self.n_off_diag = int(num_inputs * (num_inputs - 1) / 2) - self.off_diag = off_diag - self.scale_kernel = scale_kernel + # self.active_dimensions = active_dimensions if active_dimensions is not None else np.arange(self.num_inputs) + + if type(skew_dims) is bool: + if skew_dims: + self.skew_dims = torch.arange(self.num_inputs) + if skew_dims: + self.skew_dims = torch.tensor([]) + elif type(skew_dims) is torch.Tensor: + self.skew_dims = skew_dims + else: + raise ValueError() + + self.off_diag = True if skew_dims else False + self.scale_output = scale_output self.nu = kwargs.get("nu", 1.5) - # kernel_scale_constraint = gpytorch.constraints.Positive() + # output_scale_constraint = gpytorch.constraints.Positive() diag_entries_constraint = gpytorch.constraints.Positive() # gpytorch.constraints.Interval(5e-1, 1e2) skew_entries_constraint = gpytorch.constraints.Interval(-1e0, 1e0) @@ -57,24 +68,32 @@ def __init__( ) self.register_constraint("raw_skew_entries", skew_entries_constraint) - if self.scale_kernel: - kernel_scale_constraint = gpytorch.constraints.Positive() - kernel_scale_prior = gpytorch.priors.GammaPrior(concentration=2, rate=0.15) + if self.scale_output: + output_scale_constraint = gpytorch.constraints.Positive() + output_scale_prior = gpytorch.priors.GammaPrior(concentration=2, rate=0.15) self.register_parameter( - name="raw_kernel_scale", + name="raw_output_scale", parameter=torch.nn.Parameter(torch.ones(1)), ) - self.register_constraint("raw_kernel_scale", constraint=kernel_scale_constraint) + self.register_constraint("raw_output_scale", constraint=output_scale_constraint) self.register_prior( - name="kernel_scale_prior", - prior=kernel_scale_prior, - param_or_closure=lambda m: m.kernel_scale, - setting_closure=lambda m, v: m._set_kernel_scale(v), + name="output_scale_prior", + prior=output_scale_prior, + param_or_closure=lambda m: m.output_scale, + setting_closure=lambda m, v: m._set_output_scale(v), ) + @property + def n_skew_dims(self): + return len(self.skew_dims) + + @property + def n_skew_entries(self): + return int(self.n_skew_dims * (self.n_skew_dims - 1) / 2) + @property def diag_entries(self): return self.raw_diag_entries_constraint.transform(self.raw_diag_entries) @@ -84,8 +103,8 @@ def skew_entries(self): return self.raw_skew_entries_constraint.transform(self.raw_skew_entries) @property - def kernel_scale(self): - return self.raw_kernel_scale_constraint.transform(self.raw_kernel_scale) + def output_scale(self): + return self.raw_output_scale_constraint.transform(self.raw_output_scale) @diag_entries.setter def diag_entries(self, value): @@ -95,9 +114,9 @@ def diag_entries(self, value): def skew_entries(self, value): self._set_skew_entries(value) - @kernel_scale.setter - def kernel_scale(self, value): - self._set_kernel_scale(value) + @output_scale.setter + def output_scale(self, value): + self._set_output_scale(value) def _set_diag_entries(self, value): if not torch.is_tensor(value): @@ -109,14 +128,10 @@ def _set_skew_entries(self, value): value = torch.as_tensor(value).to(self.raw_skew_entries) self.initialize(raw_skew_entries=self.raw_skew_entries_constraint.inverse_transform(value)) - def _set_kernel_scale(self, value): + def _set_output_scale(self, value): if not torch.is_tensor(value): - value = torch.as_tensor(value).to(self.raw_kernel_scale) - self.initialize(raw_kernel_scale=self.raw_kernel_scale_constraint.inverse_transform(value)) - - @property - def output_scale(self): - return self.kernel_scale.sqrt() + value = torch.as_tensor(value).to(self.raw_output_scale) + self.initialize(raw_output_scale=self.raw_output_scale_constraint.inverse_transform(value)) @property def latent_dimensions(self): @@ -145,4 +160,4 @@ def forward(self, x1, x2, diag=False, **params): distance = self.covar_dist(trans_x1, trans_x2, diag=diag, **params) - return self.kernel_scale * (1 + distance) * torch.exp(-distance) + return (self.output_scale if self.scale_output else 1.0) * (1 + distance) * torch.exp(-distance) diff --git a/bloptools/bayesian/models.py b/bloptools/bayesian/models.py index 21ebc36..7ac1377 100644 --- a/bloptools/bayesian/models.py +++ b/bloptools/bayesian/models.py @@ -17,7 +17,7 @@ def __init__(self, train_inputs, train_targets, *args, **kwargs): num_outputs=train_targets.shape[-1], off_diag=True, diag_prior=True, - scale_output=True, + scale=True, **kwargs ) @@ -38,7 +38,7 @@ def __init__(self, train_inputs, train_targets, *args, **kwargs): num_outputs=train_targets.shape[-1], off_diag=True, diag_prior=True, - scale_output=True, + scale=True, **kwargs ) diff --git a/bloptools/tasks.py b/bloptools/tasks.py index 61ce47b..d520a1e 100644 --- a/bloptools/tasks.py +++ b/bloptools/tasks.py @@ -5,7 +5,7 @@ class Task: def __init__(self, key, kind="max", name=None, transform=None, **kwargs): self.key = key self.kind = kind - self.name = name if name is not None else f"{kind}_{key}_fitness" + self.name = name if name is not None else f"{key}_fitness" self.transform = transform if transform is not None else lambda x: x self.weight = 1.0 diff --git a/docs/source/tutorials/latent-toroid-dimensions.ipynb b/docs/source/tutorials/latent-toroid-dimensions.ipynb index d6ffe16..3f8d5c1 100644 --- a/docs/source/tutorials/latent-toroid-dimensions.ipynb +++ b/docs/source/tutorials/latent-toroid-dimensions.ipynb @@ -27,6 +27,17 @@ "toroid_bounds = np.array([[-1e-3, 1e-3], [-4e-1, +4e-1]])" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "7394741f", + "metadata": {}, + "outputs": [], + "source": [ + "for task in agent.tasks:\n", + " print(task.name)" + ] + }, { "cell_type": "code", "execution_count": null, From 310a8b017756e812e57b64577e25ecf3c505a6d6 Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Mon, 17 Jul 2023 12:27:23 -0400 Subject: [PATCH 2/8] cleaner output --- bloptools/bayesian/__init__.py | 227 ++++++++++++++---- bloptools/bayesian/kernels.py | 145 +++++------ bloptools/bayesian/models.py | 10 +- bloptools/devices.py | 24 +- bloptools/experiments/sirepo/tes.py | 19 +- bloptools/test_functions.py | 13 + .../tutorials/constrained-himmelblau.ipynb | 5 +- docs/source/tutorials/introduction.ipynb | 21 ++ examples/prepare_bluesky.py | 5 - examples/prepare_tes_shadow.py | 13 +- 10 files changed, 345 insertions(+), 137 deletions(-) diff --git a/bloptools/bayesian/__init__.py b/bloptools/bayesian/__init__.py index 5166a3d..f5df832 100644 --- a/bloptools/bayesian/__init__.py +++ b/bloptools/bayesian/__init__.py @@ -1,4 +1,6 @@ import logging +import warnings +from collections import OrderedDict import bluesky.plan_stubs as bps import bluesky.plans as bp # noqa F401 @@ -11,10 +13,13 @@ import scipy as sp import torch from matplotlib import pyplot as plt +from matplotlib.patches import Patch -from .. import devices, utils +from .. import utils from . import acquisition, models +warnings.filterwarnings("ignore", category=botorch.exceptions.warnings.InputDataWarning) + mpl.rc("image", cmap="coolwarm") DEFAULT_COLOR_LIST = ["dodgerblue", "tomato", "mediumseagreen", "goldenrod"] @@ -39,6 +44,22 @@ def default_digestion_plan(db, uid): MAX_TEST_INPUTS = 2**11 +AVAILABLE_ACQFS = { + "expected_mean": { + "identifiers": ["em", "expected_mean"], + }, + "expected_improvement": { + "identifiers": ["ei", "expected_improvement"], + }, + "probability_of_improvement": { + "identifiers": ["pi", "probability_of_improvement"], + }, + "upper_confidence_bound": { + "identifiers": ["ucb", "upper_confidence_bound"], + "default_args": {"beta": 4}, + }, +} + class Agent: def __init__( @@ -70,7 +91,9 @@ def __init__( for dof in active_dofs: dof.kind = "hinted" - self.active_dofs = np.atleast_1d(active_dofs) + self.active_dofs = list(np.atleast_1d(active_dofs)) + self.passive_dofs = list(np.atleast_1d(kwargs.get("passive_dofs", []))) + self.active_dof_bounds = np.atleast_2d(active_dof_bounds).astype(float) self.tasks = np.atleast_1d(tasks) self.db = db @@ -89,10 +112,9 @@ def __init__( self.acquisition = acquisition.Acquisition() self.dets = np.atleast_1d(kwargs.get("detectors", [])) - self.passive_dofs = np.atleast_1d(kwargs.get("passive_dofs", [])) - if self.decoherence: - self.passive_dofs = np.append(self.passive_dofs, devices.TimeReadback(name="timestamp")) + # if self.decoherence: + # self.passive_dofs = np.append(self.passive_dofs, devices.TimeReadback(name="timestamp")) for i, task in enumerate(self.tasks): task.index = i @@ -101,8 +123,6 @@ def __init__( self.training_iter = kwargs.get("training_iter", 256) - # self.test_normalized_active_inputs = self.sampler(n=self.MAX_TEST_INPUTS) - # make some test points for sampling self.normalized_test_active_inputs = utils.normalized_sobol_sampler( @@ -118,6 +138,9 @@ def __init__( self.table = pd.DataFrame() self._initialized = False + self._train_models = True + + self.hypers = None def normalize_active_inputs(self, x): return (x - self.active_dof_bounds.min(axis=1)) / self.active_dof_bounds.ptp(axis=1) @@ -137,11 +160,11 @@ def dofs(self): @property def n_active_dofs(self): - return self.active_dofs.size + return len(self.active_dofs) @property def n_passive_dofs(self): - return self.passive_dofs.size + return len(self.passive_dofs) @property def n_dofs(self): @@ -169,21 +192,29 @@ def test_active_inputs_grid(self): # d=self.n_dof, coefficient=coefficient, offset=offset # ) + # @property + # def input_transform(self): + # return botorch.models.transforms.input.Normalize(d=self.n_dofs) + @property def input_transform(self): - return botorch.models.transforms.input.Normalize(d=self.n_dofs) + coefficient = torch.tensor(self.dof_bounds.ptp(axis=1)).unsqueeze(0) + offset = torch.tensor(self.dof_bounds.min(axis=1)).unsqueeze(0) + return botorch.models.transforms.input.AffineInputTransform( + d=self.n_dofs, coefficient=coefficient, offset=offset + ) - def save(self, filepath="./agent_data.h5"): + def save_data(self, filepath="./agent_data.h5"): """ Save the sampled inputs and targets of the agent to a file, which can be used to initialize a future agent. """ - with h5py.File(filepath, "w") as f: - for task in self.tasks: - f.create_group(task.name) - for key, value in task.regressor.state_dict().items(): - f[task.name].create_dataset(key, data=value) + # with h5py.File(filepath, "w") as f: + # for task in self.tasks: + # f.create_group(task.name) + # for key, value in task.regressor.state_dict().items(): + # f[task.name].create_dataset(key, data=value) self.table.to_hdf(filepath, key="table") @@ -198,11 +229,34 @@ def sampler(self, n): subset = np.random.choice(min_power_of_two, size=n, replace=False) return sp.stats.qmc.Sobol(d=self.n_active_dofs, scramble=True).random(n=min_power_of_two)[subset] + def save_hypers(self, filepath): + with h5py.File(filepath, "w") as f: + for task in self.tasks: + f.create_group(task.name) + for key, value in task.regressor.state_dict().items(): + f[task.name].create_dataset(key, data=value) + f.create_group("classifier") + for key, value in self.classifier.state_dict().items(): + f["classifier"].create_dataset(key, data=value) + + @staticmethod + def read_hypers(filepath): + state_dicts = {} + with h5py.File(filepath, "r") as f: + print(f.keys()) + for model_name in f.keys(): + state_dicts[model_name] = OrderedDict() + for key, value in f[model_name].items(): + state_dicts[model_name][key] = torch.tensor(np.atleast_1d(value[()])) + return state_dicts + def initialize( self, filepath=None, acqf=None, n_init=4, + decoherence=False, + hypers=None, ): """ An initialization plan for the agent. @@ -214,8 +268,21 @@ def initialize( if self.initialization is not None: yield from self.initialization() + if hypers is not None: + self.hypers = self.read_hypers(hypers) + if filepath is not None: - self.tell(new_table=pd.read_hdf(filepath, key="table")) + table = pd.read_hdf(filepath, key="table") + + # if decoherence == "batch": + # if "training_batch" in self.table.columns: + # current_training_batch = table.training_batch.max() + 1 + # else: + # table.loc[:, "training_batch"] = 1. + # current_training_batch = 2. + # self.passive_dofs.append(devices.ConstantReadback(name="training_batch", constant=current_training_batch)) + + self.tell(new_table=table) # now let's get bayesian elif acqf in ["qr"]: @@ -255,6 +322,8 @@ def tell(self, new_table=None, append=True, **kwargs): self.all_targets_valid = ~np.isnan(self.targets).any(axis=1) + skew_dims = [tuple(np.arange(self.n_active_dofs))] + for task in self.tasks: task.targets = self.targets.loc[:, task.name] # @@ -263,11 +332,14 @@ def tell(self, new_table=None, append=True, **kwargs): # task.normalized_targets = self.normalized_targets.loc[:, task.name] - task.feasibility = self.all_targets_valid.astype(int) + task.feasibility = self.feasible.loc[:, task.name] + + if not task.feasibility.sum() >= 2: + raise ValueError("There must be at least two feasible data points per task!") - train_inputs = torch.tensor(self.inputs.loc[task.feasibility == 1].values).double().unsqueeze(0) + train_inputs = torch.tensor(self.inputs.loc[task.feasibility].values).double().unsqueeze(0) train_targets = ( - torch.tensor(task.targets.loc[task.feasibility == 1].values).double().unsqueeze(0).unsqueeze(-1) + torch.tensor(task.targets.loc[task.feasibility].values).double().unsqueeze(0).unsqueeze(-1) ) if train_inputs.ndim == 1: @@ -286,6 +358,8 @@ def tell(self, new_table=None, append=True, **kwargs): train_inputs=train_inputs, train_targets=train_targets, likelihood=likelihood, + skew_dims=skew_dims, + batch_dimension=self.batch_dimension, input_transform=self.input_transform, outcome_transform=botorch.models.transforms.outcome.Standardize(m=1, batch_shape=torch.Size((1,))), ).double() @@ -307,22 +381,28 @@ def tell(self, new_table=None, append=True, **kwargs): torch.as_tensor(self.all_targets_valid).long(), learn_additional_noise=True ).double() - self.dirichlet_classifier = models.LatentDirichletClassifier( + self.classifier = models.LatentDirichletClassifier( train_inputs=torch.tensor(self.inputs.values).double(), train_targets=dirichlet_likelihood.transformed_targets.transpose(-1, -2).double(), + skew_dims=skew_dims, + batch_dimension=self.batch_dimension, likelihood=dirichlet_likelihood, input_transform=self.input_transform, ).double() - self.dirichlet_classifier_mll = gpytorch.mlls.ExactMarginalLogLikelihood( - self.dirichlet_classifier.likelihood, self.dirichlet_classifier - ) + self.classifier_mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.classifier.likelihood, self.classifier) self.feas_model = botorch.models.deterministic.GenericDeterministicModel( - f=lambda X: -self.dirichlet_classifier.log_prob(X).square() + f=lambda X: -self.classifier.log_prob(X).square() ) - self.fit_models() + if self.hypers is None: + print("TRAINING") + self.train_models() + else: + for task in self.tasks: + task.regressor.load_state_dict(self.hypers[task.name]) + self.classifier.load_state_dict(self.hypers["classifier"]) self.targets_model = botorch.models.model_list_gp_regression.ModelListGP( *[task.regressor for task in self.tasks] @@ -330,10 +410,10 @@ def tell(self, new_table=None, append=True, **kwargs): self.task_model = botorch.models.model.ModelList(*[task.regressor for task in self.tasks], self.feas_model) - def fit_models(self, **kwargs): + def train_models(self, **kwargs): for task in self.tasks: botorch.fit.fit_gpytorch_mll(task.regressor_mll, **kwargs) - botorch.fit.fit_gpytorch_mll(self.dirichlet_classifier_mll, **kwargs) + botorch.fit.fit_gpytorch_mll(self.classifier_mll, **kwargs) def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_args={}, **kwargs): if not self._initialized: @@ -341,7 +421,7 @@ def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_a f'Can\'t construct acquisition function "{acqf_name}" (the agent is not initialized!)' ) - if acqf_name.lower() == "ei": + if acqf_name.lower() in AVAILABLE_ACQFS["expected_improvement"]["identifiers"]: acqf = botorch.acquisition.analytic.LogExpectedImprovement( self.task_model, best_f=self.best_sum_of_tasks, @@ -350,7 +430,7 @@ def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_a ) acqf_meta = {"name": "expected improvement", "args": {}} - elif acqf_name.lower() == "pi": + elif acqf_name.lower() in AVAILABLE_ACQFS["probability_of_improvement"]["identifiers"]: acqf = botorch.acquisition.analytic.LogProbabilityOfImprovement( self.task_model, best_f=self.best_sum_of_tasks, @@ -359,8 +439,17 @@ def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_a ) acqf_meta = {"name": "probability of improvement", "args": {}} - elif acqf_name.lower() == "ucb": - beta = acqf_args.get("beta", 0.1) + elif acqf_name.lower() in AVAILABLE_ACQFS["expected_mean"]["identifiers"]: + acqf = botorch.acquisition.analytic.UpperConfidenceBound( + self.task_model, + beta=0, + posterior_transform=self.task_scalarization, + **kwargs, + ) + acqf_meta = {"name": "expected mean"} + + elif acqf_name.lower() in AVAILABLE_ACQFS["upper_confidence_bound"]["identifiers"]: + beta = AVAILABLE_ACQFS["upper_confidence_bound"]["default_args"]["beta"] acqf = botorch.acquisition.analytic.UpperConfidenceBound( self.task_model, beta=beta, @@ -490,16 +579,18 @@ def normalize_targets(self, targets): def unnormalize_targets(self, targets): return targets * self.targets_scale + self.targets_mean + @property + def batch_dimension(self): + return self.dof_names.index("training_batch") if "training_batch" in self.dof_names else None + @property def test_inputs(self): - test_passive_inputs = ( - self.passive_inputs.values[-1][None] * np.ones(len(self.test_active_inputs))[..., None] - ) + test_passive_inputs = self.read_passive_dofs[None] * np.ones(len(self.test_active_inputs))[..., None] return np.concatenate([self.test_active_inputs, test_passive_inputs], axis=-1) @property def test_inputs_grid(self): - test_passive_inputs_grid = self.passive_inputs.values[-1] * np.ones( + test_passive_inputs_grid = self.read_passive_dofs * np.ones( (*self.test_active_inputs_grid.shape[:-1], self.n_passive_dofs) ) return np.concatenate([self.test_active_inputs_grid, test_passive_inputs_grid], axis=-1) @@ -557,8 +648,8 @@ def latest_passive_dof_values(self): def passive_dof_bounds(self): # food for thought: should this be the current values, or the latest recorded values? # the former leads to weird extrapolation (especially for time), and the latter to some latency. - # let's go with the first way for now - return np.outer(self.latest_passive_dof_values, [1.0, 1.0]) + # let's go with the second way for now + return np.outer(self.read_passive_dofs, [1.0, 1.0]) @property def dof_is_active_mask(self): @@ -651,7 +742,7 @@ def _plot_feas_one_dof(self, size=32): self.class_ax.scatter(self.inputs.values, self.all_targets_valid.astype(int), s=size) x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dofs)).double() - log_prob = self.dirichlet_classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) + log_prob = self.classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) self.class_ax.plot(self.test_inputs_grid.ravel(), np.exp(log_prob)) @@ -675,9 +766,7 @@ def _plot_feas_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLO if gridded: x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dofs)).double() - log_prob = ( - self.dirichlet_classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) - ) + log_prob = self.classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) self.class_axes[1].pcolormesh( *np.swapaxes(self.test_inputs_grid, 0, -1), @@ -690,7 +779,7 @@ def _plot_feas_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLO else: x = torch.tensor(self.test_inputs).double() - log_prob = self.dirichlet_classifier.log_prob(x).detach().numpy() + log_prob = self.classifier.log_prob(x).detach().numpy() self.class_axes[1].scatter( *x.detach().numpy().T[axes], s=size, c=np.exp(log_prob), vmin=0, vmax=1, cmap=cmap @@ -871,7 +960,7 @@ def _plot_acq_many_dofs( self.acq_axes[iacqf].set_title(acqf_meta["name"]) obj_ax = self.acq_axes[iacqf].pcolormesh( - *np.swapaxes(self.test_inputs_grid, 0, -1), + *np.swapaxes(self.test_inputs_grid, 0, -1)[axes], obj.detach().numpy().reshape(grid_shape).T, shading=shading, cmap=cmap, @@ -917,3 +1006,53 @@ def inspect_beam(self, index, border=None): if border is not None: plt.xlim(x_min - border * width_x, x_min + border * width_x) plt.ylim(y_min - border * width_y, y_min + border * width_y) + + def plot_history(self, x_key="index", show_all_tasks=False): + x = getattr(self.table, x_key).values + + num_task_plots = 1 + if show_all_tasks: + num_task_plots = self.n_tasks + 1 + + self.n_tasks + 1 if self.n_tasks > 1 else 1 + + hist_fig, hist_axes = plt.subplots( + num_task_plots, 1, figsize=(6, 4 * num_task_plots), sharex=True, constrained_layout=True, dpi=200 + ) + hist_axes = np.atleast_1d(hist_axes) + + unique_strategies, acqf_index, acqf_inverse = np.unique( + self.table.acqf, return_index=True, return_inverse=True + ) + + sample_colors = np.array(DEFAULT_COLOR_LIST)[acqf_inverse] + + if show_all_tasks: + for itask, task in enumerate(self.tasks): + y = task.targets.values + hist_axes[itask].scatter(x, y, c=sample_colors) + hist_axes[itask].plot(x, y, lw=5e-1, c="k") + hist_axes[itask].set_ylabel(task.name) + + y = self.table.total_fitness + + cummax_y = np.array([np.nanmax(y[: i + 1]) for i in range(len(y))]) + + hist_axes[-1].scatter(x, y, c=sample_colors) + hist_axes[-1].plot(x, y, lw=5e-1, c="k") + + hist_axes[-1].plot(x, cummax_y, lw=5e-1, c="k", ls=":") + + hist_axes[-1].set_ylabel("total_fitness") + hist_axes[-1].set_xlabel(x_key) + + handles = [] + for i_acqf, acqf in enumerate(unique_strategies): + # i_acqf = np.argsort(acqf_index)[i_handle] + handles.append(Patch(color=DEFAULT_COLOR_LIST[i_acqf], label=acqf)) + legend = hist_axes[0].legend(handles=handles, fontsize=8) + legend.set_title("acquisition function") + + # plot_history(agent, x_key="time") + + # plt.savefig("bo-history.pdf", dpi=256) diff --git a/bloptools/bayesian/kernels.py b/bloptools/bayesian/kernels.py index a5bc98f..e8299df 100644 --- a/bloptools/bayesian/kernels.py +++ b/bloptools/bayesian/kernels.py @@ -5,8 +5,8 @@ class LatentKernel(gpytorch.kernels.Kernel): is_stationary = True - num_outputs = 1 + batch_inverse_lengthscale = 1e6 def __init__( self, @@ -19,39 +19,50 @@ def __init__( super(LatentKernel, self).__init__() self.num_inputs = num_inputs + self.scale_output = scale_output - # self.active_dimensions = active_dimensions if active_dimensions is not None else np.arange(self.num_inputs) + self.nu = kwargs.get("nu", 1.5) + self.batch_dimension = kwargs.get("batch_dimension", None) if type(skew_dims) is bool: if skew_dims: - self.skew_dims = torch.arange(self.num_inputs) - if skew_dims: - self.skew_dims = torch.tensor([]) - elif type(skew_dims) is torch.Tensor: - self.skew_dims = skew_dims + self.skew_dims = [torch.arange(self.num_inputs)] + else: + self.skew_dims = [torch.arange(0)] + elif hasattr(skew_dims, "__iter__"): + self.skew_dims = [torch.tensor(np.atleast_1d(skew_group)) for skew_group in skew_dims] else: - raise ValueError() - - self.off_diag = True if skew_dims else False - self.scale_output = scale_output - - self.nu = kwargs.get("nu", 1.5) - - # output_scale_constraint = gpytorch.constraints.Positive() - diag_entries_constraint = gpytorch.constraints.Positive() # gpytorch.constraints.Interval(5e-1, 1e2) - skew_entries_constraint = gpytorch.constraints.Interval(-1e0, 1e0) + raise ValueError('arg "skew_dims" must be True, False, or an iterable of tuples of ints.') + + # if not all([len(skew_group) >= 2 for skew_group in self.skew_dims]): + # raise ValueError("must have at least two dims per skew group") + skewed_dims = [dim for skew_group in self.skew_dims for dim in skew_group] + if not len(set(skewed_dims)) == len(skewed_dims): + raise ValueError("values in skew_dims must be unique") + if not max(skewed_dims) < self.num_inputs: + raise ValueError("invalud dimension index in skew_dims") + + skew_group_submatrix_indices = [] + for skew_group in self.skew_dims: + i, j = skew_group[torch.triu_indices(len(skew_group), len(skew_group), 1)].unsqueeze(1) + skew_group_submatrix_indices.append(torch.cat((i, j), dim=0)) + + self.skew_matrix_indices = ( + tuple(torch.cat(skew_group_submatrix_indices, dim=1)) + if len(skew_group_submatrix_indices) > 0 + else tuple([[], []]) + ) - # diag_entries_initial = np.ones() - # np.sqrt(diag_entries_constraint.lower_bound * diag_entries_constraint.upper_bound) - raw_diag_entries_initial = diag_entries_constraint.inverse_transform(torch.tensor(2)) + self.n_skew_entries = len(self.skew_matrix_indices[0]) - self.register_parameter( - name="raw_diag_entries", - parameter=torch.nn.Parameter( - raw_diag_entries_initial * torch.ones(self.num_outputs, self.num_inputs).double() - ), + diag_entries_constraint = gpytorch.constraints.Positive() + raw_diag_entries_initial = ( + diag_entries_constraint.inverse_transform(torch.tensor(1e-1)) + * torch.ones(self.num_outputs, self.num_inputs).double() ) - self.register_constraint("raw_diag_entries", constraint=diag_entries_constraint) + + self.register_parameter(name="raw_diag_entries", parameter=torch.nn.Parameter(raw_diag_entries_initial)) + self.register_constraint(param_name="raw_diag_entries", constraint=diag_entries_constraint) if diag_prior: self.register_prior( @@ -61,39 +72,30 @@ def __init__( setting_closure=lambda m, v: m._set_diag_entries(v), ) - if self.off_diag: - self.register_parameter( - name="raw_skew_entries", - parameter=torch.nn.Parameter(torch.zeros(self.num_outputs, self.n_off_diag).double()), - ) - self.register_constraint("raw_skew_entries", skew_entries_constraint) + if self.n_skew_entries > 0: + skew_entries_constraint = gpytorch.constraints.Interval(-1e0, 1e0) + skew_entries_initial = torch.zeros((self.num_outputs, self.n_skew_entries), dtype=torch.float64) + self.register_parameter(name="raw_skew_entries", parameter=torch.nn.Parameter(skew_entries_initial)) + self.register_constraint(param_name="raw_skew_entries", constraint=skew_entries_constraint) if self.scale_output: - output_scale_constraint = gpytorch.constraints.Positive() - output_scale_prior = gpytorch.priors.GammaPrior(concentration=2, rate=0.15) + outputscale_constraint = gpytorch.constraints.Positive() + outputscale_prior = gpytorch.priors.GammaPrior(concentration=2, rate=0.15) self.register_parameter( - name="raw_output_scale", + name="raw_outputscale", parameter=torch.nn.Parameter(torch.ones(1)), ) - self.register_constraint("raw_output_scale", constraint=output_scale_constraint) + self.register_constraint("raw_outputscale", constraint=outputscale_constraint) self.register_prior( - name="output_scale_prior", - prior=output_scale_prior, - param_or_closure=lambda m: m.output_scale, - setting_closure=lambda m, v: m._set_output_scale(v), + name="outputscale_prior", + prior=outputscale_prior, + param_or_closure=lambda m: m.outputscale, + setting_closure=lambda m, v: m._set_outputscale(v), ) - @property - def n_skew_dims(self): - return len(self.skew_dims) - - @property - def n_skew_entries(self): - return int(self.n_skew_dims * (self.n_skew_dims - 1) / 2) - @property def diag_entries(self): return self.raw_diag_entries_constraint.transform(self.raw_diag_entries) @@ -103,8 +105,8 @@ def skew_entries(self): return self.raw_skew_entries_constraint.transform(self.raw_skew_entries) @property - def output_scale(self): - return self.raw_output_scale_constraint.transform(self.raw_output_scale) + def outputscale(self): + return self.raw_outputscale_constraint.transform(self.raw_outputscale) @diag_entries.setter def diag_entries(self, value): @@ -114,9 +116,9 @@ def diag_entries(self, value): def skew_entries(self, value): self._set_skew_entries(value) - @output_scale.setter - def output_scale(self, value): - self._set_output_scale(value) + @outputscale.setter + def outputscale(self, value): + self._set_outputscale(value) def _set_diag_entries(self, value): if not torch.is_tensor(value): @@ -128,25 +130,30 @@ def _set_skew_entries(self, value): value = torch.as_tensor(value).to(self.raw_skew_entries) self.initialize(raw_skew_entries=self.raw_skew_entries_constraint.inverse_transform(value)) - def _set_output_scale(self, value): + def _set_outputscale(self, value): if not torch.is_tensor(value): - value = torch.as_tensor(value).to(self.raw_output_scale) - self.initialize(raw_output_scale=self.raw_output_scale_constraint.inverse_transform(value)) + value = torch.as_tensor(value).to(self.raw_outputscale) + self.initialize(raw_outputscale=self.raw_outputscale_constraint.inverse_transform(value)) @property - def latent_dimensions(self): - # no rotations - if not self.off_diag: - T = torch.eye(self.num_inputs, dtype=torch.float64) + def latent_transform(self): + if self.n_skew_entries > 0: + # construct an orthogonal matrix. fun fact: exp(skew(N)) is the generator of SO(N) + S = torch.zeros((self.num_inputs, self.num_inputs), dtype=torch.float64) + S[self.skew_matrix_indices] = self.skew_entries + S += -S.transpose(-1, -2) + T = torch.linalg.matrix_exp(S) - # construct an orthogonal matrix. fun fact: exp(skew(N)) is the generator of SO(N) else: - A = torch.zeros((self.num_inputs, self.num_inputs)).double() - A[np.triu_indices(self.num_inputs, k=1)] = self.skew_entries - A += -A.transpose(-1, -2) - T = torch.linalg.matrix_exp(A) + # no rotations + T = torch.eye(self.num_inputs, dtype=torch.float64) + + latent_diagonals = self.diag_entries + if self.batch_dimension is not None: + latent_diagonals[:, self.batch_dimension] = self.batch_inverse_lengthscale - diagonal_transform = torch.cat([torch.diag(entries).unsqueeze(0) for entries in self.diag_entries], dim=0) + # this gives everything a nice batchful shape + diagonal_transform = torch.cat([torch.diag(entries).unsqueeze(0) for entries in latent_diagonals], dim=0) T = torch.matmul(diagonal_transform, T) return T @@ -155,9 +162,9 @@ def forward(self, x1, x2, diag=False, **params): # adapted from the Matern kernel mean = x1.reshape(-1, x1.size(-1)).mean(0)[(None,) * (x1.dim() - 1)] - trans_x1 = torch.matmul(self.latent_dimensions.unsqueeze(1), (x1 - mean).unsqueeze(-1)).squeeze(-1) - trans_x2 = torch.matmul(self.latent_dimensions.unsqueeze(1), (x2 - mean).unsqueeze(-1)).squeeze(-1) + trans_x1 = torch.matmul(self.latent_transform.unsqueeze(1), (x1 - mean).unsqueeze(-1)).squeeze(-1) + trans_x2 = torch.matmul(self.latent_transform.unsqueeze(1), (x2 - mean).unsqueeze(-1)).squeeze(-1) distance = self.covar_dist(trans_x1, trans_x2, diag=diag, **params) - return (self.output_scale if self.scale_output else 1.0) * (1 + distance) * torch.exp(-distance) + return (self.outputscale if self.scale_output else 1.0) * (1 + distance) * torch.exp(-distance) diff --git a/bloptools/bayesian/models.py b/bloptools/bayesian/models.py index 7ac1377..05869d1 100644 --- a/bloptools/bayesian/models.py +++ b/bloptools/bayesian/models.py @@ -8,16 +8,17 @@ class LatentDirichletClassifier(botorch.models.gp_regression.SingleTaskGP): - def __init__(self, train_inputs, train_targets, *args, **kwargs): + def __init__(self, train_inputs, train_targets, skew_dims=True, batch_dimension=None, *args, **kwargs): super().__init__(train_inputs, train_targets, *args, **kwargs) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = kernels.LatentKernel( num_inputs=train_inputs.shape[-1], num_outputs=train_targets.shape[-1], - off_diag=True, + skew_dims=skew_dims, diag_prior=True, scale=True, + batch_dimension=batch_dimension, **kwargs ) @@ -28,7 +29,7 @@ def log_prob(self, x, n_samples=256): class LatentGP(botorch.models.gp_regression.SingleTaskGP): - def __init__(self, train_inputs, train_targets, *args, **kwargs): + def __init__(self, train_inputs, train_targets, skew_dims=True, batch_dimension=None, *args, **kwargs): super().__init__(train_inputs, train_targets, *args, **kwargs) self.mean_module = gpytorch.means.ConstantMean(constant_prior=gpytorch.priors.NormalPrior(loc=0, scale=1)) @@ -36,9 +37,10 @@ def __init__(self, train_inputs, train_targets, *args, **kwargs): self.covar_module = kernels.LatentKernel( num_inputs=train_inputs.shape[-1], num_outputs=train_targets.shape[-1], - off_diag=True, + skew_dims=skew_dims, diag_prior=True, scale=True, + batch_dimension=batch_dimension, **kwargs ) diff --git a/bloptools/devices.py b/bloptools/devices.py index 6e56d2a..5840fbd 100644 --- a/bloptools/devices.py +++ b/bloptools/devices.py @@ -4,8 +4,12 @@ from ophyd import Device, Signal, SignalRO +def dummy_dof(name): + return Signal(name=name, value=0.0) + + def dummy_dofs(n=2): - return [Signal(name=f"x{i+1}", value=0) for i in range(n)] + return [dummy_dof(name=f"x{i+1}") for i in range(n)] def get_dummy_device(name="dofs", n=2): @@ -22,8 +26,26 @@ def get_dummy_device(name="dofs", n=2): class TimeReadback(SignalRO): + """ + Returns the current timestamp. + """ + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def get(self): return ttime.time() + + +class ConstantReadback(SignalRO): + """ + Returns a constant every time you read it (more useful than you'd think). + """ + + def __init__(self, constant=1, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.constant = constant + + def get(self): + return self.constant diff --git a/bloptools/experiments/sirepo/tes.py b/bloptools/experiments/sirepo/tes.py index a1b1267..cbf0599 100644 --- a/bloptools/experiments/sirepo/tes.py +++ b/bloptools/experiments/sirepo/tes.py @@ -20,18 +20,19 @@ def image_digestion(db, uid, image_name): flux = image.sum() n_y, n_x = image.shape - X, Y = np.meshgrid(np.linspace(*horizontal_extent, n_x), np.linspace(*vertical_extent, n_y)) + # reject if there is no flux, or we can't estimate the position and size of the beam for some reason + bad = False + bad |= not (flux > 0) + if not bad: + X, Y = np.meshgrid(np.linspace(*horizontal_extent, n_x), np.linspace(*vertical_extent, n_y)) - mean_x = np.sum(X * image) / np.sum(image) - mean_y = np.sum(Y * image) / np.sum(image) + mean_x = np.sum(X * image) / np.sum(image) + mean_y = np.sum(Y * image) / np.sum(image) - sigma_x = np.sqrt(np.sum((X - mean_x) ** 2 * image) / np.sum(image)) - sigma_y = np.sqrt(np.sum((Y - mean_y) ** 2 * image) / np.sum(image)) + sigma_x = np.sqrt(np.sum((X - mean_x) ** 2 * image) / np.sum(image)) + sigma_y = np.sqrt(np.sum((Y - mean_y) ** 2 * image) / np.sum(image)) - # reject if there is no flux, or we can't estimate the position and size of the beam - bad = False - bad |= not (flux > 0) - bad |= any(np.isnan([mean_x, mean_y, sigma_x, sigma_y])) + bad |= any(np.isnan([mean_x, mean_y, sigma_x, sigma_y])) if not bad: products.loc[index, ["flux", "x_pos", "y_pos", "x_width", "y_width"]] = ( diff --git a/bloptools/test_functions.py b/bloptools/test_functions.py index 25693a2..2facdf2 100644 --- a/bloptools/test_functions.py +++ b/bloptools/test_functions.py @@ -13,6 +13,19 @@ def himmelblau(x1, x2): return (x1**2 + x2 - 11) ** 2 + (x1 + x2**2 - 7) ** 2 +def constrained_himmelblau(x1, x2): + if x1**2 + x2**2 > 50: + return np.nan + return himmelblau(x1, x2) + + +def skewed_himmelblau(x1, x2): + _x1 = 2 * x1 + x2 + _x2 = 0.5 * (x1 - 2 * x2) + + return constrained_himmelblau(_x1, _x2) + + def rastrigin(*x): X = np.c_[x] return 10 * X.shape[-1] + (X**2 - 10 * np.cos(2 * np.pi * X)).sum(axis=1) diff --git a/docs/source/tutorials/constrained-himmelblau.ipynb b/docs/source/tutorials/constrained-himmelblau.ipynb index 8a8f20e..702c007 100644 --- a/docs/source/tutorials/constrained-himmelblau.ipynb +++ b/docs/source/tutorials/constrained-himmelblau.ipynb @@ -65,10 +65,7 @@ " products = db[uid].table()\n", "\n", " for index, entry in products.iterrows():\n", - " if entry.x1**2 + entry.x2**2 < 50:\n", - " products.loc[index, \"himmelblau\"] = test_functions.himmelblau(entry.x1, entry.x2)\n", - " else:\n", - " products.loc[index, \"himmelblau\"] = np.nan\n", + " products.loc[index, \"himmelblau\"] = test_functions.constrained_himmelblau(entry.x1, entry.x2)\n", "\n", " return products" ] diff --git a/docs/source/tutorials/introduction.ipynb b/docs/source/tutorials/introduction.ipynb index 4aabba2..af3b8a5 100644 --- a/docs/source/tutorials/introduction.ipynb +++ b/docs/source/tutorials/introduction.ipynb @@ -107,6 +107,19 @@ "task = Task(key=\"rastrigin\", kind=\"min\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "190156c3", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"BLUESKY_DEBUG_CALLBACKS\"] = \"True\"\n", + "\n", + "import bluesky\n", + "print(bluesky.__version__)" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -208,6 +221,14 @@ "RE(agent.learn(acqf=\"ei\", n_iter=8))\n", "agent.plot_tasks()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06505db8", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/examples/prepare_bluesky.py b/examples/prepare_bluesky.py index f913ad7..da47150 100644 --- a/examples/prepare_bluesky.py +++ b/examples/prepare_bluesky.py @@ -11,11 +11,6 @@ from bluesky.callbacks import best_effort from bluesky.run_engine import RunEngine from databroker import Broker -from ophyd.utils import make_dir_tree -from sirepo_bluesky.shadow_handler import ShadowFileHandler -from sirepo_bluesky.sirepo_bluesky import SirepoBluesky -from sirepo_bluesky.sirepo_ophyd import create_classes -from sirepo_bluesky.srw_handler import SRWFileHandler RE = RunEngine({}) diff --git a/examples/prepare_tes_shadow.py b/examples/prepare_tes_shadow.py index ff5e8b8..fe4fb4f 100644 --- a/examples/prepare_tes_shadow.py +++ b/examples/prepare_tes_shadow.py @@ -1,3 +1,10 @@ +import sirepo_bluesky +from ophyd.utils import make_dir_tree +from sirepo_bluesky.shadow_handler import ShadowFileHandler +from sirepo_bluesky.sirepo_bluesky import SirepoBluesky +from sirepo_bluesky.sirepo_ophyd import create_classes +from sirepo_bluesky.srw_handler import SRWFileHandler + db.reg.register_handler("shadow", ShadowFileHandler, overwrite=True) db.reg.register_handler("SIREPO_FLYER", SRWFileHandler, overwrite=True) @@ -13,13 +20,17 @@ globals().update(**objects) data["models"]["simulation"]["npoint"] = 50000 -data["models"]["watchpointReport12"]["histogramBins"] = 16 +data["models"]["watchpointReport12"]["histogramBins"] = 32 # w9.duration.kind = "hinted" bec.disable_baseline() bec.disable_heading() # bec.disable_table() +import warnings + +warnings.filterwarnings("ignore", module="sirepo_bluesky") + # This should be done by installing the package with `pip install -e .` or something similar. # import sys # sys.path.insert(0, "../") From 8264b9021123f41f567594c4f654273233f4e29b Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Tue, 18 Jul 2023 14:52:20 -0400 Subject: [PATCH 3/8] load saved hyperparameters on initialization --- bloptools/bayesian/__init__.py | 87 ++++++--------------- bloptools/bayesian/models.py | 76 ++---------------- bloptools/tasks.py | 4 +- bloptools/test_functions.py | 19 +++++ docs/source/tutorials/hyperparameters.ipynb | 15 ++-- docs/source/tutorials/introduction.ipynb | 23 ++---- examples/prepare_tes_shadow.py | 2 +- 7 files changed, 68 insertions(+), 158 deletions(-) diff --git a/bloptools/bayesian/__init__.py b/bloptools/bayesian/__init__.py index f5df832..5d38278 100644 --- a/bloptools/bayesian/__init__.py +++ b/bloptools/bayesian/__init__.py @@ -88,12 +88,12 @@ def __init__( db : A databroker instance. """ - for dof in active_dofs: - dof.kind = "hinted" - self.active_dofs = list(np.atleast_1d(active_dofs)) self.passive_dofs = list(np.atleast_1d(kwargs.get("passive_dofs", []))) + for dof in self.dofs: + dof.kind = "hinted" + self.active_dof_bounds = np.atleast_2d(active_dof_bounds).astype(float) self.tasks = np.atleast_1d(tasks) self.db = db @@ -113,9 +113,6 @@ def __init__( self.dets = np.atleast_1d(kwargs.get("detectors", [])) - # if self.decoherence: - # self.passive_dofs = np.append(self.passive_dofs, devices.TimeReadback(name="timestamp")) - for i, task in enumerate(self.tasks): task.index = i @@ -184,14 +181,6 @@ def test_active_inputs_grid(self): """ return self.unnormalize_active_inputs(self.normalized_test_active_inputs_grid) - # @property - # def input_transform(self): - # coefficient = torch.tensor(self.inputs.values.ptp(axis=0)) - # offset = torch.tensor(self.inputs.values.min(axis=0)) - # return botorch.models.transforms.input.AffineInputTransform( - # d=self.n_dof, coefficient=coefficient, offset=offset - # ) - # @property # def input_transform(self): # return botorch.models.transforms.input.Normalize(d=self.n_dofs) @@ -210,12 +199,6 @@ def save_data(self, filepath="./agent_data.h5"): to initialize a future agent. """ - # with h5py.File(filepath, "w") as f: - # for task in self.tasks: - # f.create_group(task.name) - # for key, value in task.regressor.state_dict().items(): - # f[task.name].create_dataset(key, data=value) - self.table.to_hdf(filepath, key="table") def forget(self, index): @@ -243,7 +226,6 @@ def save_hypers(self, filepath): def read_hypers(filepath): state_dicts = {} with h5py.File(filepath, "r") as f: - print(f.keys()) for model_name in f.keys(): state_dicts[model_name] = OrderedDict() for key, value in f[model_name].items(): @@ -252,10 +234,9 @@ def read_hypers(filepath): def initialize( self, - filepath=None, acqf=None, n_init=4, - decoherence=False, + data=None, hypers=None, ): """ @@ -271,18 +252,11 @@ def initialize( if hypers is not None: self.hypers = self.read_hypers(hypers) - if filepath is not None: - table = pd.read_hdf(filepath, key="table") - - # if decoherence == "batch": - # if "training_batch" in self.table.columns: - # current_training_batch = table.training_batch.max() + 1 - # else: - # table.loc[:, "training_batch"] = 1. - # current_training_batch = 2. - # self.passive_dofs.append(devices.ConstantReadback(name="training_batch", constant=current_training_batch)) - - self.tell(new_table=table) + if data is not None: + if type(data) == str: + self.tell(new_table=pd.read_hdf(data, key="table")) + else: + self.tell(new_table=data) # now let's get bayesian elif acqf in ["qr"]: @@ -294,16 +268,6 @@ def initialize( ['qr'].""" ) - # if init_table is None: - # raise RuntimeError("Unhandled initialization error.") - - no_good_samples_tasks = np.isnan(self.table.loc[:, self.target_names]).all(axis=0) - if no_good_samples_tasks.any(): - raise ValueError( - f"The tasks {[self.tasks[i].name for i in np.where(no_good_samples_tasks)[0]]} " - f"don't have any good samples." - ) - self._initialized = True def tell(self, new_table=None, append=True, **kwargs): @@ -318,21 +282,12 @@ def tell(self, new_table=None, append=True, **kwargs): self.table.loc[:, "total_fitness"] = self.table.loc[:, self.task_names].fillna(-np.inf).sum(axis=1) self.table.index = np.arange(len(self.table)) - # self.normalized_inputs = self.normalize_inputs(self.inputs) - - self.all_targets_valid = ~np.isnan(self.targets).any(axis=1) - skew_dims = [tuple(np.arange(self.n_active_dofs))] for task in self.tasks: task.targets = self.targets.loc[:, task.name] - # - # task.targets_mean = np.nanmean(task.targets, axis=0) - # task.targets_scale = np.nanstd(task.targets, axis=0) - - # task.normalized_targets = self.normalized_targets.loc[:, task.name] - task.feasibility = self.feasible.loc[:, task.name] + task.feasibility = self.feasible_for_all_tasks if not task.feasibility.sum() >= 2: raise ValueError("There must be at least two feasible data points per task!") @@ -359,7 +314,6 @@ def tell(self, new_table=None, append=True, **kwargs): train_targets=train_targets, likelihood=likelihood, skew_dims=skew_dims, - batch_dimension=self.batch_dimension, input_transform=self.input_transform, outcome_transform=botorch.models.transforms.outcome.Standardize(m=1, batch_shape=torch.Size((1,))), ).double() @@ -378,14 +332,13 @@ def tell(self, new_table=None, append=True, **kwargs): ) dirichlet_likelihood = gpytorch.likelihoods.DirichletClassificationLikelihood( - torch.as_tensor(self.all_targets_valid).long(), learn_additional_noise=True + torch.as_tensor(self.feasible_for_all_tasks.values).long(), learn_additional_noise=True ).double() self.classifier = models.LatentDirichletClassifier( train_inputs=torch.tensor(self.inputs.values).double(), train_targets=dirichlet_likelihood.transformed_targets.transpose(-1, -2).double(), skew_dims=skew_dims, - batch_dimension=self.batch_dimension, likelihood=dirichlet_likelihood, input_transform=self.input_transform, ).double() @@ -397,7 +350,6 @@ def tell(self, new_table=None, append=True, **kwargs): ) if self.hypers is None: - print("TRAINING") self.train_models() else: for task in self.tasks: @@ -611,10 +563,19 @@ def passive_inputs(self): def targets(self): return self.table.loc[:, self.task_names].astype(float) + # @property + # def feasible(self): + # with pd.option_context("mode.use_inf_as_null", True): + # feasible = ~self.targets.isna() + # return feasible + @property - def feasible(self): + def feasible_for_all_tasks(self): with pd.option_context("mode.use_inf_as_null", True): - feasible = ~self.targets.isna() + feasible = ~self.targets.isna().any(axis=1) + for task in self.tasks: + if task.min is not None: + feasible &= self.targets.loc[:, task.name].values > task.transform(task.min) return feasible # @property @@ -739,7 +700,7 @@ def plot_acquisition(self, **kwargs): def _plot_feas_one_dof(self, size=32): self.class_fig, self.class_ax = plt.subplots(1, 1, figsize=(4, 4), sharex=True, constrained_layout=True) - self.class_ax.scatter(self.inputs.values, self.all_targets_valid.astype(int), s=size) + self.class_ax.scatter(self.inputs.values, self.feasible_for_all_tasks.astype(int), s=size) x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dofs)).double() log_prob = self.classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) @@ -761,7 +722,7 @@ def _plot_feas_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLO ax.set_ylabel(self.dofs[axes[1]].name) data_ax = self.class_axes[0].scatter( - *self.inputs.values.T[:2], s=size, c=self.all_targets_valid.astype(int), vmin=0, vmax=1, cmap=cmap + *self.inputs.values.T[:2], s=size, c=self.feasible_for_all_tasks.astype(int), vmin=0, vmax=1, cmap=cmap ) if gridded: diff --git a/bloptools/bayesian/models.py b/bloptools/bayesian/models.py index 05869d1..a160703 100644 --- a/bloptools/bayesian/models.py +++ b/bloptools/bayesian/models.py @@ -1,101 +1,41 @@ import botorch import gpytorch import torch -from botorch.models.gpytorch import GPyTorchModel -from gpytorch.models import ExactGP from . import kernels -class LatentDirichletClassifier(botorch.models.gp_regression.SingleTaskGP): - def __init__(self, train_inputs, train_targets, skew_dims=True, batch_dimension=None, *args, **kwargs): +class LatentGP(botorch.models.gp_regression.SingleTaskGP): + def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs): super().__init__(train_inputs, train_targets, *args, **kwargs) - self.mean_module = gpytorch.means.ConstantMean() + self.mean_module = gpytorch.means.ConstantMean(constant_prior=gpytorch.priors.NormalPrior(loc=0, scale=1)) + self.covar_module = kernels.LatentKernel( num_inputs=train_inputs.shape[-1], num_outputs=train_targets.shape[-1], skew_dims=skew_dims, diag_prior=True, scale=True, - batch_dimension=batch_dimension, **kwargs ) - def log_prob(self, x, n_samples=256): - *input_shape, n_dim = x.shape - samples = self.posterior(x.reshape(-1, n_dim)).sample(torch.Size((n_samples,))).exp() - return torch.log((samples / samples.sum(-1, keepdim=True)).mean(0)[:, 1]).reshape(*input_shape, 1) - -class LatentGP(botorch.models.gp_regression.SingleTaskGP): - def __init__(self, train_inputs, train_targets, skew_dims=True, batch_dimension=None, *args, **kwargs): +class LatentDirichletClassifier(botorch.models.gp_regression.SingleTaskGP): + def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs): super().__init__(train_inputs, train_targets, *args, **kwargs) - self.mean_module = gpytorch.means.ConstantMean(constant_prior=gpytorch.priors.NormalPrior(loc=0, scale=1)) - + self.mean_module = gpytorch.means.ConstantMean() self.covar_module = kernels.LatentKernel( num_inputs=train_inputs.shape[-1], num_outputs=train_targets.shape[-1], skew_dims=skew_dims, diag_prior=True, scale=True, - batch_dimension=batch_dimension, **kwargs ) - -class OldBoTorchSingleTaskGP(ExactGP, GPyTorchModel): - def __init__(self, train_inputs, train_targets, likelihood): - super(OldBoTorchSingleTaskGP, self).__init__(train_inputs, train_targets, likelihood) - self.mean_module = gpytorch.means.ConstantMean() - self.covar_module = gpytorch.kernels.ScaleKernel( - kernels.LatentMaternKernel(n_dim=train_inputs.shape[-1], off_diag=True, diagonal_prior=True) - ) - - def forward(self, x): - mean_x = self.mean_module(x) - covar_x = self.covar_module(x) - - return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) - - -class BoTorchMultiTaskGP(ExactGP, GPyTorchModel): - _num_outputs = 1 # to inform GPyTorchModel API - - def __init__(self, train_inputs, train_targets, likelihood): - self._num_outputs = train_targets.shape[-1] - - super(BoTorchMultiTaskGP, self).__init__(train_inputs, train_targets, likelihood) - self.mean_module = gpytorch.means.MultitaskMean(gpytorch.means.ConstantMean(), num_tasks=self._num_outputs) - self.covar_module = gpytorch.kernels.MultitaskKernel( - kernels.LatentMaternKernel(n_dim=train_inputs.shape[-1], off_diag=True, diagonal_prior=True), - num_tasks=self._num_outputs, - rank=1, - ) - - def forward(self, x): - mean_x = self.mean_module(x) - covar_x = self.covar_module(x) - return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x) - - -class OldBoTorchDirichletClassifier(gpytorch.models.ExactGP, botorch.models.gpytorch.GPyTorchModel): - _num_outputs = 1 # to inform GPyTorchModel API - - def __init__(self, train_inputs, train_targets, likelihood): - super(OldBoTorchDirichletClassifier, self).__init__(train_inputs, train_targets, likelihood) - self.mean_module = gpytorch.means.ConstantMean(batch_shape=len(train_targets.unique())) - self.covar_module = gpytorch.kernels.ScaleKernel( - kernels.LatentMaternKernel(n_dim=train_inputs.shape[-1], off_diag=False, diagonal_prior=False) - ) - - def forward(self, x): - mean_x = self.mean_module(x) - covar_x = self.covar_module(x) - return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) - def log_prob(self, x, n_samples=256): *input_shape, n_dim = x.shape samples = self.posterior(x.reshape(-1, n_dim)).sample(torch.Size((n_samples,))).exp() - return torch.log((samples / samples.sum(-3, keepdim=True)).mean(0)[1]).reshape(*input_shape) + return torch.log((samples / samples.sum(-1, keepdim=True)).mean(0)[:, 1]).reshape(*input_shape, 1) diff --git a/bloptools/tasks.py b/bloptools/tasks.py index d520a1e..aaa12ba 100644 --- a/bloptools/tasks.py +++ b/bloptools/tasks.py @@ -2,8 +2,10 @@ class Task: MIN_NOISE_LEVEL = 1e-6 MAX_NOISE_LEVEL = 1e-2 - def __init__(self, key, kind="max", name=None, transform=None, **kwargs): + def __init__(self, key, kind="max", min=None, name=None, transform=None, **kwargs): self.key = key + self.min = min + self.max = max self.kind = kind self.name = name if name is not None else f"{key}_fitness" self.transform = transform if transform is not None else lambda x: x diff --git a/bloptools/test_functions.py b/bloptools/test_functions.py index 2facdf2..9b1983e 100644 --- a/bloptools/test_functions.py +++ b/bloptools/test_functions.py @@ -26,11 +26,30 @@ def skewed_himmelblau(x1, x2): return constrained_himmelblau(_x1, _x2) +def bukin(x1, x2): + return 100 * np.sqrt(np.abs(x2 - 1e-2 * x1**2)) + 0.01 * np.abs(x1) + + def rastrigin(*x): X = np.c_[x] return 10 * X.shape[-1] + (X**2 - 10 * np.cos(2 * np.pi * X)).sum(axis=1) +def styblinski_tang(*x): + X = np.c_[x] + return 0.5 * (X**4 - 16 * X**2 + 5 * X).sum(axis=1) + + +def ackley(*x): + X = np.c_[x] + return ( + -20 * np.exp(-0.2 * np.sqrt(0.5 * (X**2).sum(axis=1))) + - np.exp(0.5 * np.cos(2 * np.pi * X).sum(axis=1)) + + np.e + + 20 + ) + + def gaussian_beam_waist(x1, x2): return np.sqrt(1 + 0.25 * (x1 - x2) ** 2 + 16 * (x1 + x2 - 2) ** 2) diff --git a/docs/source/tutorials/hyperparameters.ipynb b/docs/source/tutorials/hyperparameters.ipynb index 0fe5e2a..124295d 100644 --- a/docs/source/tutorials/hyperparameters.ipynb +++ b/docs/source/tutorials/hyperparameters.ipynb @@ -8,7 +8,7 @@ "source": [ "# Hyperparameters\n", "\n", - "Consider the test function of two inputs $f(x_1, x_2) = e^{- (x_1/10)^2 - x_2^2}$. We can see that this function varies more quickly in the $x_1$ direction than the $x_2$ direction, especially if we look at the plot:" + "Consider the Booth test function (below). This function varies differently in different directions, and these directions are somewhat skewed with respect to the inputs. Our agent will automatically fit the right hyperparameters to account for this." ] }, { @@ -21,13 +21,14 @@ "import numpy as np\n", "import matplotlib as mpl\n", "from matplotlib import pyplot as plt\n", + "from bloptools import test_functions\n", "\n", "x1 = x2 = np.linspace(-10, 10, 256)\n", "X1, X2 = np.meshgrid(x1, x2)\n", "\n", - "F = -np.exp(-((X1 / 8) ** 2)) * np.exp(-(X2**2))\n", + "F = test_functions.booth(X1, X2)\n", "\n", - "plt.pcolormesh(x1, x2, F, shading=\"auto\")\n", + "plt.pcolormesh(x1, x2, F, norm=mpl.colors.LogNorm(), shading=\"auto\")\n", "plt.colorbar()\n", "plt.xlabel(\"x1\")\n", "plt.ylabel(\"x2\")" @@ -59,14 +60,14 @@ "dofs = bloptools.devices.dummy_dofs(n=2)\n", "bounds = [(-8, 8), (-8, 8)]\n", "\n", - "task = Task(key=\"loss\", kind=\"min\")\n", + "task = Task(key=\"booth\", kind=\"min\")\n", "\n", "\n", "def digestion(db, uid):\n", " products = db[uid].table()\n", "\n", " for index, entry in products.iterrows():\n", - " products.loc[index, \"loss\"] = -np.exp(-1e-2 * entry.x1**2) * np.exp(-1e-1 * entry.x2**2)\n", + " products.loc[index, \"booth\"] = test_functions.booth(entry.x1, entry.x2)\n", "\n", " return products\n", "\n", @@ -92,7 +93,7 @@ "metadata": {}, "outputs": [], "source": [ - "agent.tasks[0].regressor.covar_module.latent_dimensions" + "agent.tasks[0].regressor.covar_module.latent_transform" ] }, { @@ -123,7 +124,7 @@ "metadata": {}, "outputs": [], "source": [ - "RE(agent.learn(\"ei\", n_iter=4))\n", + "RE(agent.learn(\"ei\", n_iter=8))\n", "agent.plot_tasks()" ] } diff --git a/docs/source/tutorials/introduction.ipynb b/docs/source/tutorials/introduction.ipynb index af3b8a5..d881ef1 100644 --- a/docs/source/tutorials/introduction.ipynb +++ b/docs/source/tutorials/introduction.ipynb @@ -33,9 +33,9 @@ "\n", "from bloptools import test_functions\n", "\n", - "x = np.linspace(-4, 4, 256)\n", + "x = np.linspace(-5, 5, 256)\n", "\n", - "plt.plot(x, test_functions.rastrigin(x), c=\"b\")" + "plt.plot(x, test_functions.ackley(x), c=\"b\")" ] }, { @@ -81,7 +81,7 @@ " products = db[uid].table()\n", "\n", " for index, entry in products.iterrows():\n", - " products.loc[index, \"rastrigin\"] = test_functions.rastrigin(entry.x1)\n", + " products.loc[index, \"ackley\"] = test_functions.ackley(entry.x1)\n", "\n", " return products" ] @@ -104,20 +104,7 @@ "source": [ "from bloptools.tasks import Task\n", "\n", - "task = Task(key=\"rastrigin\", kind=\"min\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "190156c3", - "metadata": {}, - "outputs": [], - "source": [ - "os.environ[\"BLUESKY_DEBUG_CALLBACKS\"] = \"True\"\n", - "\n", - "import bluesky\n", - "print(bluesky.__version__)" + "task = Task(key=\"ackley\", kind=\"min\")" ] }, { @@ -149,7 +136,7 @@ " db=db,\n", ")\n", "\n", - "RE(agent.initialize(acqf=\"qr\", n_init=12))" + "RE(agent.initialize(acqf=\"qr\", n_init=4))" ] }, { diff --git a/examples/prepare_tes_shadow.py b/examples/prepare_tes_shadow.py index fe4fb4f..e590741 100644 --- a/examples/prepare_tes_shadow.py +++ b/examples/prepare_tes_shadow.py @@ -19,7 +19,7 @@ classes, objects = create_classes(connection.data, connection=connection) globals().update(**objects) -data["models"]["simulation"]["npoint"] = 50000 +data["models"]["simulation"]["npoint"] = 100000 data["models"]["watchpointReport12"]["histogramBins"] = 32 # w9.duration.kind = "hinted" From 1a49c559eac900e037c7480650098ade8b475c6b Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Wed, 19 Jul 2023 11:55:08 -0400 Subject: [PATCH 4/8] added test for reading hypers --- bloptools/bayesian/__init__.py | 226 ++++++++++-------- bloptools/bayesian/kernels.py | 43 ++-- bloptools/tests/conftest.py | 39 +++ bloptools/tests/test_agent.py | 10 + bloptools/tests/test_bayesian_shadow.py | 6 +- bloptools/tests/test_bayesian_test_funcs.py | 8 +- bloptools/utils.py | 27 +-- .../tutorials/constrained-himmelblau.ipynb | 6 +- docs/source/tutorials/introduction.ipynb | 6 +- .../tutorials/latent-toroid-dimensions.ipynb | 21 +- docs/source/tutorials/multi-task-sirepo.ipynb | 6 +- hypers.h5 | Bin 0 -> 32040 bytes 12 files changed, 230 insertions(+), 168 deletions(-) create mode 100644 bloptools/tests/test_agent.py create mode 100644 hypers.h5 diff --git a/bloptools/bayesian/__init__.py b/bloptools/bayesian/__init__.py index 5d38278..abf476c 100644 --- a/bloptools/bayesian/__init__.py +++ b/bloptools/bayesian/__init__.py @@ -1,4 +1,5 @@ import logging +import time as ttime import warnings from collections import OrderedDict @@ -40,7 +41,7 @@ def default_digestion_plan(db, uid): # dofs: degrees of freedom are things that can change # inputs: these are the values of the dofs, which may be transformed/normalized # targets: these are what our model tries to predict from the inputs -# tasks: these are quantities that our agent will try to optimize over +# tasks: these are quantities that our self will try to optimize over MAX_TEST_INPUTS = 2**11 @@ -71,16 +72,16 @@ def __init__( **kwargs, ): """ - A Bayesian optimization agent. + A Bayesian optimization self. Parameters ---------- dofs : iterable of ophyd objects - The degrees of freedom that the agent can control, which determine the output of the model. + The degrees of freedom that the self can control, which determine the output of the model. bounds : iterable of lower and upper bounds The bounds on each degree of freedom. This should be an array of shape (n_dofs, 2). tasks : iterable of tasks - The tasks which the agent will try to optimize. + The tasks which the self will try to optimize. acquisition : Bluesky plan generator that takes arguments (dofs, inputs, dets) A plan that samples the beamline for some given inputs. digestion : function that takes arguments (db, uid) @@ -137,7 +138,7 @@ def __init__( self._initialized = False self._train_models = True - self.hypers = None + self.a_priori_hypers = None def normalize_active_inputs(self, x): return (x - self.active_dof_bounds.min(axis=1)) / self.active_dof_bounds.ptp(axis=1) @@ -193,10 +194,10 @@ def input_transform(self): d=self.n_dofs, coefficient=coefficient, offset=offset ) - def save_data(self, filepath="./agent_data.h5"): + def save_data(self, filepath="./self_data.h5"): """ - Save the sampled inputs and targets of the agent to a file, which can be used - to initialize a future agent. + Save the sampled inputs and targets of the self to a file, which can be used + to initialize a future self. """ self.table.to_hdf(filepath, key="table") @@ -212,25 +213,40 @@ def sampler(self, n): subset = np.random.choice(min_power_of_two, size=n, replace=False) return sp.stats.qmc.Sobol(d=self.n_active_dofs, scramble=True).random(n=min_power_of_two)[subset] + def _set_hypers(self, hypers): + for task in self.tasks: + task.regressor.load_state_dict(hypers[task.name]) + self.classifier.load_state_dict(hypers["classifier"]) + + @property + def hypers(self): + hypers = {"classifier": {}} + for key, value in self.classifier.state_dict().items(): + hypers["classifier"][key] = value + for task in self.tasks: + hypers[task.name] = {} + for key, value in task.regressor.state_dict().items(): + hypers[task.name][key] = value + + return hypers + def save_hypers(self, filepath): + hypers = self.hypers with h5py.File(filepath, "w") as f: - for task in self.tasks: - f.create_group(task.name) - for key, value in task.regressor.state_dict().items(): - f[task.name].create_dataset(key, data=value) - f.create_group("classifier") - for key, value in self.classifier.state_dict().items(): - f["classifier"].create_dataset(key, data=value) + for model_key in hypers.keys(): + f.create_group(model_key) + for param_key, param_value in hypers[model_key].items(): + f[model_key].create_dataset(param_key, data=param_value) @staticmethod - def read_hypers(filepath): - state_dicts = {} + def load_hypers(filepath): + hypers = {} with h5py.File(filepath, "r") as f: - for model_name in f.keys(): - state_dicts[model_name] = OrderedDict() - for key, value in f[model_name].items(): - state_dicts[model_name][key] = torch.tensor(np.atleast_1d(value[()])) - return state_dicts + for model_key in f.keys(): + hypers[model_key] = OrderedDict() + for param_key, param_value in f[model_key].items(): + hypers[model_key][param_key] = torch.tensor(np.atleast_1d(param_value[()])) + return hypers def initialize( self, @@ -240,8 +256,8 @@ def initialize( hypers=None, ): """ - An initialization plan for the agent. - This must be run before the agent can learn. + An initialization plan for the self. + This must be run before the self can learn. It should be passed to a Bluesky RunEngine. """ @@ -250,7 +266,7 @@ def initialize( yield from self.initialization() if hypers is not None: - self.hypers = self.read_hypers(hypers) + self.a_priori_hypers = self.load_hypers(hypers) if data is not None: if type(data) == str: @@ -260,7 +276,7 @@ def initialize( # now let's get bayesian elif acqf in ["qr"]: - yield from self.learn(acqf=acqf, n_iter=1, n_per_iter=n_init, route=True) + yield from self.learn("qr", n_iter=1, n_per_iter=n_init, route=True) else: raise Exception( @@ -270,9 +286,9 @@ def initialize( self._initialized = True - def tell(self, new_table=None, append=True, **kwargs): + def tell(self, new_table=None, append=True, train=True, **kwargs): """ - Inform the agent about new inputs and targets for the model. + Inform the self about new inputs and targets for the model. """ new_table = pd.DataFrame() if new_table is None else new_table @@ -284,6 +300,9 @@ def tell(self, new_table=None, append=True, **kwargs): skew_dims = [tuple(np.arange(self.n_active_dofs))] + if not train: + hypers = self.hypers + for task in self.tasks: task.targets = self.targets.loc[:, task.name] @@ -349,31 +368,30 @@ def tell(self, new_table=None, append=True, **kwargs): f=lambda X: -self.classifier.log_prob(X).square() ) - if self.hypers is None: - self.train_models() + if self.a_priori_hypers is not None: + self._set_hypers(self.a_priori_hypers) + elif not train: + self._set_hypers(hypers) else: - for task in self.tasks: - task.regressor.load_state_dict(self.hypers[task.name]) - self.classifier.load_state_dict(self.hypers["classifier"]) - - self.targets_model = botorch.models.model_list_gp_regression.ModelListGP( - *[task.regressor for task in self.tasks] - ) + self.train_models() self.task_model = botorch.models.model.ModelList(*[task.regressor for task in self.tasks], self.feas_model) def train_models(self, **kwargs): + t0 = ttime.monotonic() for task in self.tasks: botorch.fit.fit_gpytorch_mll(task.regressor_mll, **kwargs) botorch.fit.fit_gpytorch_mll(self.classifier_mll, **kwargs) + if self.verbose: + print(f"trained models in {ttime.monotonic() - t0:.02f} seconds") - def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_args={}, **kwargs): + def get_acquisition_function(self, acqf_identifier="ei", return_metadata=False, acqf_args={}, **kwargs): if not self._initialized: raise RuntimeError( - f'Can\'t construct acquisition function "{acqf_name}" (the agent is not initialized!)' + f'Can\'t construct acquisition function "{acqf_identifier}" (the self is not initialized!)' ) - if acqf_name.lower() in AVAILABLE_ACQFS["expected_improvement"]["identifiers"]: + if acqf_identifier.lower() in AVAILABLE_ACQFS["expected_improvement"]["identifiers"]: acqf = botorch.acquisition.analytic.LogExpectedImprovement( self.task_model, best_f=self.best_sum_of_tasks, @@ -382,7 +400,7 @@ def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_a ) acqf_meta = {"name": "expected improvement", "args": {}} - elif acqf_name.lower() in AVAILABLE_ACQFS["probability_of_improvement"]["identifiers"]: + elif acqf_identifier.lower() in AVAILABLE_ACQFS["probability_of_improvement"]["identifiers"]: acqf = botorch.acquisition.analytic.LogProbabilityOfImprovement( self.task_model, best_f=self.best_sum_of_tasks, @@ -391,7 +409,7 @@ def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_a ) acqf_meta = {"name": "probability of improvement", "args": {}} - elif acqf_name.lower() in AVAILABLE_ACQFS["expected_mean"]["identifiers"]: + elif acqf_identifier.lower() in AVAILABLE_ACQFS["expected_mean"]["identifiers"]: acqf = botorch.acquisition.analytic.UpperConfidenceBound( self.task_model, beta=0, @@ -400,7 +418,7 @@ def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_a ) acqf_meta = {"name": "expected mean"} - elif acqf_name.lower() in AVAILABLE_ACQFS["upper_confidence_bound"]["identifiers"]: + elif acqf_identifier.lower() in AVAILABLE_ACQFS["upper_confidence_bound"]["identifiers"]: beta = AVAILABLE_ACQFS["upper_confidence_bound"]["default_args"]["beta"] acqf = botorch.acquisition.analytic.UpperConfidenceBound( self.task_model, @@ -411,71 +429,77 @@ def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_a acqf_meta = {"name": "upper confidence bound", "args": {"beta": beta}} else: - raise ValueError(f'Unrecognized acquisition acqf_name "{acqf_name}".') + raise ValueError(f'Unrecognized acquisition acqf_identifier "{acqf_identifier}".') return (acqf, acqf_meta) if return_metadata else acqf - def ask( + def ask(self, acqf_identifier="ei", n=1, route=True, return_metadata=False): + if acqf_identifier.lower() == "qr": + x = self.active_inputs_sampler(n=n) + acqf_meta = {"name": "quasi-random", "args": {}} + + elif n == 1: + x, acqf_meta = self.ask_single(acqf_identifier, return_metadata=True) + return (x, acqf_meta) if return_metadata else x + + elif n > 1: + for i in range(n): + x, acqf_meta = self.ask_single(acqf_identifier, return_metadata=True) + + if i < (n - 1): + task_samples = [ + task.regressor.posterior(torch.tensor(x)).sample().item() for task in self.tasks + ] + fantasy_table = pd.DataFrame( + np.append(x, task_samples)[None], columns=[*self.dof_names, *self.task_names] + ) + self.tell(fantasy_table, train=False) + + x = self.active_inputs.iloc[-n:].values + + if n > 1: + self.forget(self.table.index[-(n - 1) :]) + + if route: + x = x[utils.route(self.read_active_dofs, x)] + + return (x, acqf_meta) if return_metadata else x + + def ask_single( self, - tasks=None, - classifier=None, - acqf_name="ei", - greedy=True, - n=1, - disappointment=0, - route=False, - cost_model=None, - n_test=1024, - optimize=True, + acqf_identifier="ei", return_metadata=False, ): """ - The next $n$ points to sample, recommended by the agent. + The next $n$ points to sample, recommended by the self. """ - # if route: - # unrouted_points = self.ask( - # tasks=tasks, - # classifier=classifier, - # strategy=strategy, - # greedy=greedy, - # n=n, - # disappointment=disappointment, - # route=False, - # cost_model=cost_model, - # n_test=n_test, - # ) - - # routing_index, _ = utils.get_routing(self.read_active_dofs, unrouted_points) - # x = unrouted_points[routing_index] - - if acqf_name.lower() == "qr": - x = self.active_inputs_sampler(n=n) - acqf_meta = {"name": "quasi-random", "args": {}} + t0 = ttime.monotonic() - else: - acqf, acqf_meta = self.get_acquisition_function(acqf_name=acqf_name, return_metadata=True) - - BATCH_SIZE = 1 - NUM_RESTARTS = 7 - RAW_SAMPLES = 512 - - candidates, _ = botorch.optim.optimize_acqf( - acq_function=acqf, - bounds=torch.tensor(self.dof_bounds).T, - q=BATCH_SIZE, - num_restarts=NUM_RESTARTS, - raw_samples=RAW_SAMPLES, # used for intialization heuristic - options={"batch_limit": 3, "maxiter": 1024}, - ) + acqf, acqf_meta = self.get_acquisition_function(acqf_identifier=acqf_identifier, return_metadata=True) + + BATCH_SIZE = 1 + NUM_RESTARTS = 8 + RAW_SAMPLES = 256 + + candidates, _ = botorch.optim.optimize_acqf( + acq_function=acqf, + bounds=torch.tensor(self.dof_bounds).T, + q=BATCH_SIZE, + num_restarts=NUM_RESTARTS, + raw_samples=RAW_SAMPLES, # used for intialization heuristic + ) - x = candidates.detach().numpy()[..., self.dof_is_active_mask] + x = candidates.detach().numpy()[..., self.dof_is_active_mask] + + if self.verbose: + print(f"found point {x} in {ttime.monotonic() - t0:.02f} seconds") return (x, acqf_meta) if return_metadata else x def acquire(self, active_inputs): """ - Acquire and digest according to the agent's acquisition and digestion plans. + Acquire and digest according to the self's acquisition and digestion plans. This should yield a table of sampled tasks with the same length as the sampled inputs. """ @@ -504,14 +528,24 @@ def acquire(self, active_inputs): return products - def learn(self, acqf, n_iter=1, n_per_iter=1, reuse_hypers=True, upsample=1, verbose=True, plots=[], **kwargs): + def learn( + self, + acqf_identifier, + n_iter=1, + n_per_iter=1, + reuse_hypers=True, + upsample=1, + verbose=True, + plots=[], + **kwargs, + ): """ This iterates the learning algorithm, looping over ask -> acquire -> tell. It should be passed to a Bluesky RunEngine. """ - for i in range(n_iter): - x, acqf_meta = self.ask(n=n_per_iter, acqf_name=acqf, return_metadata=True, **kwargs) + for iteration in range(n_iter): + x, acqf_meta = self.ask(n=n_per_iter, acqf_identifier=acqf_identifier, return_metadata=True, **kwargs) new_table = yield from self.acquire(x) @@ -1014,6 +1048,6 @@ def plot_history(self, x_key="index", show_all_tasks=False): legend = hist_axes[0].legend(handles=handles, fontsize=8) legend.set_title("acquisition function") - # plot_history(agent, x_key="time") + # plot_history(self, x_key="time") # plt.savefig("bo-history.pdf", dpi=256) diff --git a/bloptools/bayesian/kernels.py b/bloptools/bayesian/kernels.py index e8299df..db2d99f 100644 --- a/bloptools/bayesian/kernels.py +++ b/bloptools/bayesian/kernels.py @@ -43,9 +43,18 @@ def __init__( raise ValueError("invalud dimension index in skew_dims") skew_group_submatrix_indices = [] - for skew_group in self.skew_dims: - i, j = skew_group[torch.triu_indices(len(skew_group), len(skew_group), 1)].unsqueeze(1) - skew_group_submatrix_indices.append(torch.cat((i, j), dim=0)) + for dim in range(self.num_outputs): + for skew_group in self.skew_dims: + j, k = skew_group[torch.triu_indices(len(skew_group), len(skew_group), 1)].unsqueeze(1) + i = dim * torch.ones(j.shape).long() + skew_group_submatrix_indices.append(torch.cat((i, j, k), dim=0)) + + self.diag_matrix_indices = tuple( + [ + torch.kron(torch.arange(self.num_outputs), torch.ones(self.num_inputs)).long(), + *2 * [torch.arange(self.num_inputs).repeat(self.num_outputs)], + ] + ) self.skew_matrix_indices = ( tuple(torch.cat(skew_group_submatrix_indices, dim=1)) @@ -136,27 +145,23 @@ def _set_outputscale(self, value): self.initialize(raw_outputscale=self.raw_outputscale_constraint.inverse_transform(value)) @property - def latent_transform(self): + def skew_matrix(self): + S = torch.zeros((self.num_outputs, self.num_inputs, self.num_inputs), dtype=torch.float64) if self.n_skew_entries > 0: - # construct an orthogonal matrix. fun fact: exp(skew(N)) is the generator of SO(N) - S = torch.zeros((self.num_inputs, self.num_inputs), dtype=torch.float64) + # to construct an orthogonal matrix. fun fact: exp(skew(N)) is the generator of SO(N) S[self.skew_matrix_indices] = self.skew_entries S += -S.transpose(-1, -2) - T = torch.linalg.matrix_exp(S) - - else: - # no rotations - T = torch.eye(self.num_inputs, dtype=torch.float64) + return torch.linalg.matrix_exp(S) - latent_diagonals = self.diag_entries - if self.batch_dimension is not None: - latent_diagonals[:, self.batch_dimension] = self.batch_inverse_lengthscale - - # this gives everything a nice batchful shape - diagonal_transform = torch.cat([torch.diag(entries).unsqueeze(0) for entries in latent_diagonals], dim=0) - T = torch.matmul(diagonal_transform, T) + @property + def diag_matrix(self): + D = torch.zeros((self.num_outputs, self.num_inputs, self.num_inputs), dtype=torch.float64) + D[self.diag_matrix_indices] = self.diag_entries.ravel() + return D - return T + @property + def latent_transform(self): + return torch.matmul(self.diag_matrix, self.skew_matrix) def forward(self, x1, x2, diag=False, **params): # adapted from the Matern kernel diff --git a/bloptools/tests/conftest.py b/bloptools/tests/conftest.py index 7324618..9e23e8c 100644 --- a/bloptools/tests/conftest.py +++ b/bloptools/tests/conftest.py @@ -12,6 +12,11 @@ from sirepo_bluesky.sirepo_bluesky import SirepoBluesky from sirepo_bluesky.srw_handler import SRWFileHandler +from bloptools.bayesian import Agent +from bloptools.tasks import Task + +from .. import devices, test_functions + @pytest.fixture(scope="function") def db(): @@ -49,6 +54,40 @@ def RE(db): return RE +@pytest.fixture(scope="function") +def agent(db): + """ + A simple agent maximizing two Styblinski-Tang functions + """ + + dofs = devices.dummy_dofs(n=2) + bounds = [(-5, 5), (-5, 5)] + + def digestion(db, uid): + products = db[uid].table() + + for index, entry in products.iterrows(): + products.loc[index, "loss1"] = test_functions.styblinski_tang(entry.x1, entry.x2) + products.loc[index, "loss2"] = test_functions.styblinski_tang(entry.x1, -entry.x2) + + return products + + tasks = [Task(key="loss1", kind="min"), Task(key="loss2", kind="min")] + + agent = Agent( + active_dofs=dofs, + passive_dofs=[], + active_dof_bounds=bounds, + tasks=tasks, + digestion=digestion, + db=db, + verbose=True, + tolerate_acquisition_errors=False, + ) + + return agent + + @pytest.fixture(scope="function") def make_dirs(): root_dir = "/tmp/sirepo-bluesky-data" diff --git a/bloptools/tests/test_agent.py b/bloptools/tests/test_agent.py new file mode 100644 index 0000000..384ffe6 --- /dev/null +++ b/bloptools/tests/test_agent.py @@ -0,0 +1,10 @@ +import pytest + + +@pytest.mark.agent +def test_writing_hypers(RE, agent): + RE(agent.initialize("qr", n_init=32)) + + agent.save_hypers("hypers.h5") + + RE(agent.initialize("qr", n_init=8, hypers="hypers.h5")) diff --git a/bloptools/tests/test_bayesian_shadow.py b/bloptools/tests/test_bayesian_shadow.py index aa6b908..d281582 100644 --- a/bloptools/tests/test_bayesian_shadow.py +++ b/bloptools/tests/test_bayesian_shadow.py @@ -33,9 +33,9 @@ def test_bayesian_agent_tes_shadow(RE, db, shadow_tes_simulation): db=db, ) - RE(agent.initialize(acqf="qr", n_init=4)) + RE(agent.initialize("qr", n_init=4)) - RE(agent.learn(acqf="ei", n_iter=2)) - RE(agent.learn(acqf="pi", n_iter=2)) + RE(agent.learn("ei", n_iter=2)) + RE(agent.learn("pi", n_iter=2)) agent.plot_tasks() diff --git a/bloptools/tests/test_bayesian_test_funcs.py b/bloptools/tests/test_bayesian_test_funcs.py index 058b709..04eeb55 100644 --- a/bloptools/tests/test_bayesian_test_funcs.py +++ b/bloptools/tests/test_bayesian_test_funcs.py @@ -20,9 +20,9 @@ def test_bayesian_agent_himmelblau(RE, db): db=db, ) - RE(agent.initialize(acqf="qr", n_init=16)) + RE(agent.initialize("qr", n_init=16)) - RE(agent.learn(acqf="ei", n_iter=2)) + RE(agent.learn("ei", n_iter=2)) agent.plot_tasks() @@ -43,8 +43,8 @@ def test_bayesian_agent_mock_kbs(RE, db): db=db, ) - RE(agent.initialize(acqf="qr", n_init=16)) + RE(agent.initialize("qr", n_init=16)) - RE(agent.learn(acqf="ei", n_iter=4)) + RE(agent.learn("ei", n_iter=4)) agent.plot_tasks() diff --git a/bloptools/utils.py b/bloptools/utils.py index 380ca69..f875cb2 100644 --- a/bloptools/utils.py +++ b/bloptools/utils.py @@ -36,25 +36,18 @@ def mprod(*M): return res -def get_routing(origin, points): +def route(start_point, points): """ - Finds an efficient routing between $n$ points, after normalizing each dimension. - Returns $n-1$ indices, ignoring the zeroeth index (the origin). + Returns the indices of the most efficient way to visit `points`, starting from `start_point`. """ - if not len(points) > 1: - return np.array([0]), 1.0 - - _points = np.r_[np.atleast_2d(origin), points] - - rel_points = _points / np.array([std if std > 0 else 1.0 for std in points.std(axis=0)]) - - # delay_matrix = gpo.delay_estimate(rel_points[:,None,:] - rel_points[None,:,:]) - delay_matrix = np.sqrt(np.square(rel_points[:, None, :] - rel_points[None, :, :]).sum(axis=-1)) - delay_matrix = (1e3 * delay_matrix).astype(int) # it likes integers idk + total_points = np.r_[np.atleast_2d(start_point), points] + normalized_points = (total_points - total_points.min(axis=0)) / total_points.ptp(axis=0) + delay_matrix = np.sqrt(np.square(normalized_points[:, None, :] - normalized_points[None, :, :]).sum(axis=-1)) + delay_matrix = (1e4 * delay_matrix).astype(int) # it likes integers idk manager = pywrapcp.RoutingIndexManager( - len(_points), 1, 0 + len(total_points), 1, 0 ) # number of depots, number of salesmen, starting index routing = pywrapcp.RoutingModel(manager) @@ -71,17 +64,17 @@ def delay_callback(from_index, to_index): search_parameters.first_solution_strategy = routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC solution = routing.SolveWithParameters(search_parameters) - dir(solution) index = routing.Start(0) - route_indices, route_delays = [], [] + route_indices, route_delays = [0], [] while not routing.IsEnd(index): previous_index = index index = solution.Value(routing.NextVar(index)) route_delays.append(routing.GetArcCostForVehicle(previous_index, index, 0)) route_indices.append(index) - return np.array(route_indices)[:-1] - 1, route_delays[:-1] + # omit the first and last indices, which correspond to the start + return np.array(route_indices)[1:-1] - 1 def get_movement_time(x, v_max, a): diff --git a/docs/source/tutorials/constrained-himmelblau.ipynb b/docs/source/tutorials/constrained-himmelblau.ipynb index 702c007..0a36267 100644 --- a/docs/source/tutorials/constrained-himmelblau.ipynb +++ b/docs/source/tutorials/constrained-himmelblau.ipynb @@ -94,7 +94,7 @@ "from bloptools.tasks import Task\n", "\n", "dofs = bloptools.devices.dummy_dofs(n=2)\n", - "bounds = [(-8, 8), (-8, 8)]\n", + "bounds = [(-10, 10), (-10, 10)]\n", "\n", "task = Task(key=\"himmelblau\", kind=\"min\")\n", "\n", @@ -107,7 +107,7 @@ " db=db,\n", ")\n", "\n", - "RE(agent.initialize(acqf=\"qr\", n_init=32))\n", + "RE(agent.initialize(\"qr\", n_init=32))\n", "\n", "agent.plot_tasks()" ] @@ -161,7 +161,7 @@ "metadata": {}, "outputs": [], "source": [ - "RE(agent.learn(\"ei\", n_iter=4))" + "RE(agent.learn(\"ei\", n_per_iter=4))" ] }, { diff --git a/docs/source/tutorials/introduction.ipynb b/docs/source/tutorials/introduction.ipynb index d881ef1..c960697 100644 --- a/docs/source/tutorials/introduction.ipynb +++ b/docs/source/tutorials/introduction.ipynb @@ -136,7 +136,7 @@ " db=db,\n", ")\n", "\n", - "RE(agent.initialize(acqf=\"qr\", n_init=4))" + "RE(agent.initialize(\"qr\", n_init=4))" ] }, { @@ -205,14 +205,14 @@ "metadata": {}, "outputs": [], "source": [ - "RE(agent.learn(acqf=\"ei\", n_iter=8))\n", + "RE(agent.learn(\"ei\", n_per_iter=4))\n", "agent.plot_tasks()" ] }, { "cell_type": "code", "execution_count": null, - "id": "06505db8", + "id": "87908041", "metadata": {}, "outputs": [], "source": [] diff --git a/docs/source/tutorials/latent-toroid-dimensions.ipynb b/docs/source/tutorials/latent-toroid-dimensions.ipynb index 3f8d5c1..e4b5065 100644 --- a/docs/source/tutorials/latent-toroid-dimensions.ipynb +++ b/docs/source/tutorials/latent-toroid-dimensions.ipynb @@ -27,17 +27,6 @@ "toroid_bounds = np.array([[-1e-3, 1e-3], [-4e-1, +4e-1]])" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "7394741f", - "metadata": {}, - "outputs": [], - "source": [ - "for task in agent.tasks:\n", - " print(task.name)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -62,7 +51,7 @@ " db=db,\n", ")\n", "\n", - "RE(agent.initialize(acqf=\"qr\", n_init=24))" + "RE(agent.initialize(\"qr\", n_init=24))" ] }, { @@ -97,14 +86,6 @@ "agent.plot_feasibility()\n", "agent.plot_acquisition(strategy=[\"ei\", \"pi\", \"ucb\"])" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9093dd73", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/source/tutorials/multi-task-sirepo.ipynb b/docs/source/tutorials/multi-task-sirepo.ipynb index d3fefd0..1d318be 100644 --- a/docs/source/tutorials/multi-task-sirepo.ipynb +++ b/docs/source/tutorials/multi-task-sirepo.ipynb @@ -55,7 +55,7 @@ " db=db,\n", ")\n", "\n", - "RE(agent.initialize(acqf=\"qr\", n_init=16))" + "RE(agent.initialize(\"qr\", n_init=16))" ] }, { @@ -98,7 +98,7 @@ }, "outputs": [], "source": [ - "RE(agent.learn(acqf=\"ei\", n_iter=2))\n", + "RE(agent.learn(\"ei\", n_iter=2))\n", "agent.plot_tasks()" ] }, @@ -128,7 +128,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16 (main, Mar 8 2023, 14:00:05) \n[GCC 11.2.0]" + "version": "3.9.16" }, "vscode": { "interpreter": { diff --git a/hypers.h5 b/hypers.h5 new file mode 100644 index 0000000000000000000000000000000000000000..476c476fdf3fa4ec3c0da54fa503e64f61e26a9b GIT binary patch literal 32040 zcmeHQZ)_Vy79S@mxb1ODX@O9B!W59)9)!>q@CmM2xE4}`ifM}o9ALYSjZGb|YdcNu z4h{<;5hVUtaJtJu5f-9SK7eGwmr50iB@hbg2^plG@&(BP33Bw{towih_XlP@@8R|C z&e(CY_HMhbWY_H%tl7ARC?B_*^pXDBC4T(?AMfL=1oJ2l z!i5XcX-H>#*oBKI{|Fg?@^x^3E$+a+-Ma}z;{7p&YNWD?U3syxv zt$mQrgPlQ^gcG4mCLWDP(ySyQXEN6ZqVa4plF85_CBIFTN4xdl4Kyjwen)y4Fa~zX z?6;R|zdkp^4`G9sv3wQUEB%696d~^0Tx;0*>YE|Lr2HYR@tDr97z^1Fjy%5H3T= zmp0!^pMZ*Noo5)vLLxiXk{v(5c&wtOioBLd6O6k~% zKTMvwwnSU0e0Jl;Vb)spiSUqg!DBD}th8y*&TT*c*B5gWfBN*~AOG>i+y;5>xx>lp z=AI6%y_vjxvBdEBU3~(spuCFrn`7meH^fWZx4#GYtDrun8%nIkhSJgZ|NPz7)&Eg& zxxfD9@ULlF>-W*y{{O-)tG|SH7@Cmk@T+$|`|Y0BUDCr#pFj2XP@nYG6{jw}ukWj2 zfBM~ZZ+)^wLL^Jiy#If{ru5RmXD*ZW?S1o@>zh5&=64=C`L~{9&<^T@XZb#Rxc=iaX0m^IFg_2~o-gpu z-L&TmdV6c!m*YCB-H&6w{q=Pg__x-+yj4Ds7j4=;AICZN*xhpqJs<3Z%jII4ubloR zj{ei#c`$KW)$_sp1LUFo@t^1DZ*6~ronx&cF(4p#5r;S^?gw0jnGqKp!*i@FxsaNm z0Jzbs#JDPm%a@38-IAJTS8n!h@~0DQ)X>^?g}pR8W%lbPOJJAYE5t1hfxY=QH^G75 zz4S{l&JQ^EwPGAwWeD+}FUAdlIJZuWLw8?VFUA!??B5{9nPF1c4`OS7AQ3+lNyKBaJT{n#gyrN|ASuT)kw92J5=sxILz65| zTEx+M$xJpKiYK#!33)P-4jhyxl4CT^rL}g{^(RuPdj0W8C>a=+$0ibyTCZyDgpyf0 z6tz#SziPl(JoHE)lFX*#5hcz>C(j&;Op4_4L^d^%&4fdVh|++rzWV;^UN>mHjCu{) zYU(w#l}g9u^dQwgtY{a?#^t28e+GC8G}c4S!*cZC)f4HVn3IZEL34V(IrTKnHP%zt zF{Z5<9%&=-B(;VB)%IWbz+V@B_8V|?mASm{slgrU)CX%hiP$=>N@N<=G=i@M7 zoM8l#-jnSpbM(OoR>fqY>< z%j16DqeKYN&lH>&u$=R4R|fR+g&l~l{JFZ1v%XsET%Dc6olh{%(RW*ukp=%`Dyeb)y*9s#I@REA@Ut;@FXMj@;_lw1P8l z{6=g!e&+Gvcle+NSwBb3+meq~=G1a9=Goh*E%`N%kKkFtN2m6EG2qLd`#x}o(0%$T zZ9iUOieo=k+XtKF*XKC;Thp(P!6NM6AzqD3ATHk|##KOE@riK-5C?A%?hUT9ul&4o`FJ$l89rd3=-{KB!UF{la-$^3m$N>K4vC`<=1nc$vpX z!Ipfm^RPZ5f#cMFi`Y_U;5@8!hZt7|+!XwUDi7^JhwWw_!XD-3+10Eq?aDkpiVh!i zO4Og5$8E_6&T3Wsi*bMr0Gr`>F%!$Fr;eFu7FVN|*REL{MM4g<4s+zK#m&%L=;CVB zaV17_RZNG{16}=w*cVmG0A~Qr^p2?0KF)9z#1cA(9#hmnUJj;yp!S_$Kn>k-yXTZGYbJ>>9 zBRwDNz{l0g98`2uTTR+gpYMK-{YV9&;mVBJqY%{;MdGk4C%keUgk11R7p^cZ5Al`##nHVouRep>_ z@WFVw*{A?SMR=||bWn`5y3{33or l#>)*nCdRQPI|0oIwBp literal 0 HcmV?d00001 From 02c6631d3286c733d741bec0cd59c07e94a46769 Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Wed, 19 Jul 2023 12:51:13 -0400 Subject: [PATCH 5/8] fixed notebooks --- bloptools/tests/test_agent.py | 4 ++++ .../tutorials/latent-toroid-dimensions.ipynb | 2 +- hypers.h5 | Bin 32040 -> 0 bytes 3 files changed, 5 insertions(+), 1 deletion(-) delete mode 100644 hypers.h5 diff --git a/bloptools/tests/test_agent.py b/bloptools/tests/test_agent.py index 384ffe6..2d660b3 100644 --- a/bloptools/tests/test_agent.py +++ b/bloptools/tests/test_agent.py @@ -1,3 +1,5 @@ +import os + import pytest @@ -8,3 +10,5 @@ def test_writing_hypers(RE, agent): agent.save_hypers("hypers.h5") RE(agent.initialize("qr", n_init=8, hypers="hypers.h5")) + + os.remove("hypers.h5") diff --git a/docs/source/tutorials/latent-toroid-dimensions.ipynb b/docs/source/tutorials/latent-toroid-dimensions.ipynb index e4b5065..0bd3aa4 100644 --- a/docs/source/tutorials/latent-toroid-dimensions.ipynb +++ b/docs/source/tutorials/latent-toroid-dimensions.ipynb @@ -70,7 +70,7 @@ "metadata": {}, "outputs": [], "source": [ - "agent.tasks[0].regressor.covar_module.latent_dimensions" + "agent.tasks[0].regressor.covar_module.latent_transform" ] }, { diff --git a/hypers.h5 b/hypers.h5 deleted file mode 100644 index 476c476fdf3fa4ec3c0da54fa503e64f61e26a9b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32040 zcmeHQZ)_Vy79S@mxb1ODX@O9B!W59)9)!>q@CmM2xE4}`ifM}o9ALYSjZGb|YdcNu z4h{<;5hVUtaJtJu5f-9SK7eGwmr50iB@hbg2^plG@&(BP33Bw{towih_XlP@@8R|C z&e(CY_HMhbWY_H%tl7ARC?B_*^pXDBC4T(?AMfL=1oJ2l z!i5XcX-H>#*oBKI{|Fg?@^x^3E$+a+-Ma}z;{7p&YNWD?U3syxv zt$mQrgPlQ^gcG4mCLWDP(ySyQXEN6ZqVa4plF85_CBIFTN4xdl4Kyjwen)y4Fa~zX z?6;R|zdkp^4`G9sv3wQUEB%696d~^0Tx;0*>YE|Lr2HYR@tDr97z^1Fjy%5H3T= zmp0!^pMZ*Noo5)vLLxiXk{v(5c&wtOioBLd6O6k~% zKTMvwwnSU0e0Jl;Vb)spiSUqg!DBD}th8y*&TT*c*B5gWfBN*~AOG>i+y;5>xx>lp z=AI6%y_vjxvBdEBU3~(spuCFrn`7meH^fWZx4#GYtDrun8%nIkhSJgZ|NPz7)&Eg& zxxfD9@ULlF>-W*y{{O-)tG|SH7@Cmk@T+$|`|Y0BUDCr#pFj2XP@nYG6{jw}ukWj2 zfBM~ZZ+)^wLL^Jiy#If{ru5RmXD*ZW?S1o@>zh5&=64=C`L~{9&<^T@XZb#Rxc=iaX0m^IFg_2~o-gpu z-L&TmdV6c!m*YCB-H&6w{q=Pg__x-+yj4Ds7j4=;AICZN*xhpqJs<3Z%jII4ubloR zj{ei#c`$KW)$_sp1LUFo@t^1DZ*6~ronx&cF(4p#5r;S^?gw0jnGqKp!*i@FxsaNm z0Jzbs#JDPm%a@38-IAJTS8n!h@~0DQ)X>^?g}pR8W%lbPOJJAYE5t1hfxY=QH^G75 zz4S{l&JQ^EwPGAwWeD+}FUAdlIJZuWLw8?VFUA!??B5{9nPF1c4`OS7AQ3+lNyKBaJT{n#gyrN|ASuT)kw92J5=sxILz65| zTEx+M$xJpKiYK#!33)P-4jhyxl4CT^rL}g{^(RuPdj0W8C>a=+$0ibyTCZyDgpyf0 z6tz#SziPl(JoHE)lFX*#5hcz>C(j&;Op4_4L^d^%&4fdVh|++rzWV;^UN>mHjCu{) zYU(w#l}g9u^dQwgtY{a?#^t28e+GC8G}c4S!*cZC)f4HVn3IZEL34V(IrTKnHP%zt zF{Z5<9%&=-B(;VB)%IWbz+V@B_8V|?mASm{slgrU)CX%hiP$=>N@N<=G=i@M7 zoM8l#-jnSpbM(OoR>fqY>< z%j16DqeKYN&lH>&u$=R4R|fR+g&l~l{JFZ1v%XsET%Dc6olh{%(RW*ukp=%`Dyeb)y*9s#I@REA@Ut;@FXMj@;_lw1P8l z{6=g!e&+Gvcle+NSwBb3+meq~=G1a9=Goh*E%`N%kKkFtN2m6EG2qLd`#x}o(0%$T zZ9iUOieo=k+XtKF*XKC;Thp(P!6NM6AzqD3ATHk|##KOE@riK-5C?A%?hUT9ul&4o`FJ$l89rd3=-{KB!UF{la-$^3m$N>K4vC`<=1nc$vpX z!Ipfm^RPZ5f#cMFi`Y_U;5@8!hZt7|+!XwUDi7^JhwWw_!XD-3+10Eq?aDkpiVh!i zO4Og5$8E_6&T3Wsi*bMr0Gr`>F%!$Fr;eFu7FVN|*REL{MM4g<4s+zK#m&%L=;CVB zaV17_RZNG{16}=w*cVmG0A~Qr^p2?0KF)9z#1cA(9#hmnUJj;yp!S_$Kn>k-yXTZGYbJ>>9 zBRwDNz{l0g98`2uTTR+gpYMK-{YV9&;mVBJqY%{;MdGk4C%keUgk11R7p^cZ5Al`##nHVouRep>_ z@WFVw*{A?SMR=||bWn`5y3{33or l#>)*nCdRQPI|0oIwBp From 10912335fefd1b2a370c4282e2fccfa87d71bc70 Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Wed, 19 Jul 2023 14:16:36 -0400 Subject: [PATCH 6/8] oops --- .pre-commit-config.yaml | 4 ++-- bloptools/bayesian/kernels.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6de8684..8e6ebed 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,9 +11,9 @@ repos: rev: 23.1.0 hooks: - id: black - language_version: python3.10 + language_version: python3 - id: black-jupyter - language_version: python3.10 + language_version: python3 - repo: https://github.com/pycqa/flake8 rev: 6.0.0 hooks: diff --git a/bloptools/bayesian/kernels.py b/bloptools/bayesian/kernels.py index 0a461a4..db2d99f 100644 --- a/bloptools/bayesian/kernels.py +++ b/bloptools/bayesian/kernels.py @@ -68,6 +68,7 @@ def __init__( raw_diag_entries_initial = ( diag_entries_constraint.inverse_transform(torch.tensor(1e-1)) * torch.ones(self.num_outputs, self.num_inputs).double() + ) self.register_parameter(name="raw_diag_entries", parameter=torch.nn.Parameter(raw_diag_entries_initial)) self.register_constraint(param_name="raw_diag_entries", constraint=diag_entries_constraint) From ee213aa439156ab16f21859c22b108f8af19eda5 Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Wed, 19 Jul 2023 14:21:01 -0400 Subject: [PATCH 7/8] fixed utils.py --- bloptools/bayesian/__init__.py | 24 ++++++------------------ bloptools/utils.py | 4 +--- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/bloptools/bayesian/__init__.py b/bloptools/bayesian/__init__.py index f597e40..dd22000 100644 --- a/bloptools/bayesian/__init__.py +++ b/bloptools/bayesian/__init__.py @@ -123,9 +123,7 @@ def __init__( # make some test points for sampling - self.normalized_test_active_inputs = utils.normalized_sobol_sampler( - n=MAX_TEST_INPUTS, d=self.n_active_dofs - ) + self.normalized_test_active_inputs = utils.normalized_sobol_sampler(n=MAX_TEST_INPUTS, d=self.n_active_dofs) n_per_active_dim = int(np.power(MAX_TEST_INPUTS, 1 / self.n_active_dofs)) @@ -190,9 +188,7 @@ def test_active_inputs_grid(self): def input_transform(self): coefficient = torch.tensor(self.dof_bounds.ptp(axis=1)).unsqueeze(0) offset = torch.tensor(self.dof_bounds.min(axis=1)).unsqueeze(0) - return botorch.models.transforms.input.AffineInputTransform( - d=self.n_dofs, coefficient=coefficient, offset=offset - ) + return botorch.models.transforms.input.AffineInputTransform(d=self.n_dofs, coefficient=coefficient, offset=offset) def save_data(self, filepath="./self_data.h5"): """ @@ -312,9 +308,7 @@ def tell(self, new_table=None, append=True, train=True, **kwargs): raise ValueError("There must be at least two feasible data points per task!") train_inputs = torch.tensor(self.inputs.loc[task.feasibility].values).double().unsqueeze(0) - train_targets = ( - torch.tensor(task.targets.loc[task.feasibility].values).double().unsqueeze(0).unsqueeze(-1) - ) + train_targets = torch.tensor(task.targets.loc[task.feasibility].values).double().unsqueeze(0).unsqueeze(-1) if train_inputs.ndim == 1: train_inputs = train_inputs.unsqueeze(-1) @@ -383,9 +377,7 @@ def train_models(self, **kwargs): def get_acquisition_function(self, acqf_identifier="ei", return_metadata=False, acqf_args={}, **kwargs): if not self._initialized: - raise RuntimeError( - f'Can\'t construct acquisition function "{acqf_identifier}" (the self is not initialized!)' - ) + raise RuntimeError(f'Can\'t construct acquisition function "{acqf_identifier}" (the self is not initialized!)') if acqf_identifier.lower() in AVAILABLE_ACQFS["expected_improvement"]["identifiers"]: acqf = botorch.acquisition.analytic.LogExpectedImprovement( @@ -443,9 +435,7 @@ def ask(self, acqf_identifier="ei", n=1, route=True, return_metadata=False): x, acqf_meta = self.ask_single(acqf_identifier, return_metadata=True) if i < (n - 1): - task_samples = [ - task.regressor.posterior(torch.tensor(x)).sample().item() for task in self.tasks - ] + task_samples = [task.regressor.posterior(torch.tensor(x)).sample().item() for task in self.tasks] fantasy_table = pd.DataFrame( np.append(x, task_samples)[None], columns=[*self.dof_names, *self.task_names] ) @@ -996,9 +986,7 @@ def plot_history(self, x_key="index", show_all_tasks=False): ) hist_axes = np.atleast_1d(hist_axes) - unique_strategies, acqf_index, acqf_inverse = np.unique( - self.table.acqf, return_index=True, return_inverse=True - ) + unique_strategies, acqf_index, acqf_inverse = np.unique(self.table.acqf, return_index=True, return_inverse=True) sample_colors = np.array(DEFAULT_COLOR_LIST)[acqf_inverse] diff --git a/bloptools/utils.py b/bloptools/utils.py index f875cb2..492ef5c 100644 --- a/bloptools/utils.py +++ b/bloptools/utils.py @@ -46,9 +46,7 @@ def route(start_point, points): delay_matrix = np.sqrt(np.square(normalized_points[:, None, :] - normalized_points[None, :, :]).sum(axis=-1)) delay_matrix = (1e4 * delay_matrix).astype(int) # it likes integers idk - manager = pywrapcp.RoutingIndexManager( - len(total_points), 1, 0 - ) # number of depots, number of salesmen, starting index + manager = pywrapcp.RoutingIndexManager(len(total_points), 1, 0) # number of depots, number of salesmen, starting index routing = pywrapcp.RoutingModel(manager) def delay_callback(from_index, to_index): From 0f985330119b9b5509ad8005af1268e2b7080841 Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Wed, 19 Jul 2023 14:39:23 -0400 Subject: [PATCH 8/8] pandas context --- bloptools/bayesian/__init__.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bloptools/bayesian/__init__.py b/bloptools/bayesian/__init__.py index dd22000..c15e1d0 100644 --- a/bloptools/bayesian/__init__.py +++ b/bloptools/bayesian/__init__.py @@ -589,11 +589,12 @@ def targets(self): @property def feasible_for_all_tasks(self): - with pd.option_context("mode.use_inf_as_null", True): - feasible = ~self.targets.isna().any(axis=1) - for task in self.tasks: - if task.min is not None: - feasible &= self.targets.loc[:, task.name].values > task.transform(task.min) + # TODO: make this more robust + # with pd.option_context("mode.use_inf_as_null", True): + feasible = ~self.targets.isna().any(axis=1) + for task in self.tasks: + if task.min is not None: + feasible &= self.targets.loc[:, task.name].values > task.transform(task.min) return feasible # @property