diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6de8684..8e6ebed 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,9 +11,9 @@ repos: rev: 23.1.0 hooks: - id: black - language_version: python3.10 + language_version: python3 - id: black-jupyter - language_version: python3.10 + language_version: python3 - repo: https://github.com/pycqa/flake8 rev: 6.0.0 hooks: diff --git a/bloptools/bayesian/__init__.py b/bloptools/bayesian/__init__.py index 13c3297..c15e1d0 100644 --- a/bloptools/bayesian/__init__.py +++ b/bloptools/bayesian/__init__.py @@ -1,4 +1,7 @@ import logging +import time as ttime +import warnings +from collections import OrderedDict import bluesky.plan_stubs as bps import bluesky.plans as bp # noqa F401 @@ -11,10 +14,13 @@ import scipy as sp import torch from matplotlib import pyplot as plt +from matplotlib.patches import Patch from .. import utils from . import acquisition, models +warnings.filterwarnings("ignore", category=botorch.exceptions.warnings.InputDataWarning) + mpl.rc("image", cmap="coolwarm") DEFAULT_COLOR_LIST = ["dodgerblue", "tomato", "mediumseagreen", "goldenrod"] @@ -35,10 +41,26 @@ def default_digestion_plan(db, uid): # dofs: degrees of freedom are things that can change # inputs: these are the values of the dofs, which may be transformed/normalized # targets: these are what our model tries to predict from the inputs -# tasks: these are quantities that our agent will try to optimize over +# tasks: these are quantities that our self will try to optimize over MAX_TEST_INPUTS = 2**11 +AVAILABLE_ACQFS = { + "expected_mean": { + "identifiers": ["em", "expected_mean"], + }, + "expected_improvement": { + "identifiers": ["ei", "expected_improvement"], + }, + "probability_of_improvement": { + "identifiers": ["pi", "probability_of_improvement"], + }, + "upper_confidence_bound": { + "identifiers": ["ucb", "upper_confidence_bound"], + "default_args": {"beta": 4}, + }, +} + class Agent: def __init__( @@ -50,16 +72,16 @@ def __init__( **kwargs, ): """ - A Bayesian optimization agent. + A Bayesian optimization self. Parameters ---------- dofs : iterable of ophyd objects - The degrees of freedom that the agent can control, which determine the output of the model. + The degrees of freedom that the self can control, which determine the output of the model. bounds : iterable of lower and upper bounds - The bounds on each degree of freedom. This should be an array of shape (n_dof, 2). + The bounds on each degree of freedom. This should be an array of shape (n_dofs, 2). tasks : iterable of tasks - The tasks which the agent will try to optimize. + The tasks which the self will try to optimize. acquisition : Bluesky plan generator that takes arguments (dofs, inputs, dets) A plan that samples the beamline for some given inputs. digestion : function that takes arguments (db, uid) @@ -67,10 +89,12 @@ def __init__( db : A databroker instance. """ - for dof in active_dofs: + self.active_dofs = list(np.atleast_1d(active_dofs)) + self.passive_dofs = list(np.atleast_1d(kwargs.get("passive_dofs", []))) + + for dof in self.dofs: dof.kind = "hinted" - self.active_dofs = np.atleast_1d(active_dofs) self.active_dof_bounds = np.atleast_2d(active_dof_bounds).astype(float) self.tasks = np.atleast_1d(tasks) self.db = db @@ -82,42 +106,37 @@ def __init__( self.acquisition_plan = kwargs.get("acquisition_plan", default_acquisition_plan) self.digestion = kwargs.get("digestion", default_digestion_plan) + self.decoherence = kwargs.get("decoherence", False) + self.tolerate_acquisition_errors = kwargs.get("tolerate_acquisition_errors", True) self.acquisition = acquisition.Acquisition() self.dets = np.atleast_1d(kwargs.get("detectors", [])) - self.passive_dofs = np.atleast_1d(kwargs.get("passive_dofs", [])) for i, task in enumerate(self.tasks): task.index = i - self.n_active_dof = self.active_dofs.size - self.n_passive_dof = self.passive_dofs.size - - self.dofs = np.r_[self.active_dofs, self.passive_dofs] - - self.n_dof = self.n_active_dof + self.n_passive_dof - self.n_tasks = len(self.tasks) self.training_iter = kwargs.get("training_iter", 256) - # self.test_normalized_active_inputs = self.sampler(n=self.MAX_TEST_INPUTS) - # make some test points for sampling - self.normalized_test_active_inputs = utils.normalized_sobol_sampler(n=MAX_TEST_INPUTS, d=self.n_active_dof) + self.normalized_test_active_inputs = utils.normalized_sobol_sampler(n=MAX_TEST_INPUTS, d=self.n_active_dofs) - n_per_active_dim = int(np.power(MAX_TEST_INPUTS, 1 / self.n_active_dof)) + n_per_active_dim = int(np.power(MAX_TEST_INPUTS, 1 / self.n_active_dofs)) self.normalized_test_active_inputs_grid = np.swapaxes( - np.r_[np.meshgrid(*self.n_active_dof * [np.linspace(0, 1, n_per_active_dim)])], 0, -1 + np.r_[np.meshgrid(*self.n_active_dofs * [np.linspace(0, 1, n_per_active_dim)])], 0, -1 ) self.table = pd.DataFrame() self._initialized = False + self._train_models = True + + self.a_priori_hypers = None def normalize_active_inputs(self, x): return (x - self.active_dof_bounds.min(axis=1)) / self.active_dof_bounds.ptp(axis=1) @@ -129,7 +148,23 @@ def active_inputs_sampler(self, n=MAX_TEST_INPUTS): """ Returns $n$ quasi-randomly sampled inputs in the bounded parameter space """ - return self.unnormalize_active_inputs(utils.normalized_sobol_sampler(n, self.n_active_dof)) + return self.unnormalize_active_inputs(utils.normalized_sobol_sampler(n, self.n_active_dofs)) + + @property + def dofs(self): + return np.append(self.active_dofs, self.passive_dofs) + + @property + def n_active_dofs(self): + return len(self.active_dofs) + + @property + def n_passive_dofs(self): + return len(self.passive_dofs) + + @property + def n_dofs(self): + return self.n_active_dofs + self.n_passive_dofs @property def test_active_inputs(self): @@ -147,23 +182,20 @@ def test_active_inputs_grid(self): # @property # def input_transform(self): - # coefficient = torch.tensor(self.inputs.values.ptp(axis=0)) - # offset = torch.tensor(self.inputs.values.min(axis=0)) - # return botorch.models.transforms.input.AffineInputTransform( - # d=self.n_dof, coefficient=coefficient, offset=offset - # ) + # return botorch.models.transforms.input.Normalize(d=self.n_dofs) @property def input_transform(self): - return botorch.models.transforms.input.Normalize(d=self.n_dof) + coefficient = torch.tensor(self.dof_bounds.ptp(axis=1)).unsqueeze(0) + offset = torch.tensor(self.dof_bounds.min(axis=1)).unsqueeze(0) + return botorch.models.transforms.input.AffineInputTransform(d=self.n_dofs, coefficient=coefficient, offset=offset) - def save(self, filepath="./agent_data.h5"): + def save_data(self, filepath="./self_data.h5"): """ - Save the sampled inputs and targets of the agent to a file, which can be used - to initialize a future agent. + Save the sampled inputs and targets of the self to a file, which can be used + to initialize a future self. """ - with h5py.File(filepath, "w") as f: - f.create_dataset("inputs", data=self.inputs) + self.table.to_hdf(filepath, key="table") def forget(self, index): @@ -175,17 +207,53 @@ def sampler(self, n): """ min_power_of_two = 2 ** int(np.ceil(np.log(n) / np.log(2))) subset = np.random.choice(min_power_of_two, size=n, replace=False) - return sp.stats.qmc.Sobol(d=self.n_active_dof, scramble=True).random(n=min_power_of_two)[subset] + return sp.stats.qmc.Sobol(d=self.n_active_dofs, scramble=True).random(n=min_power_of_two)[subset] + + def _set_hypers(self, hypers): + for task in self.tasks: + task.regressor.load_state_dict(hypers[task.name]) + self.classifier.load_state_dict(hypers["classifier"]) + + @property + def hypers(self): + hypers = {"classifier": {}} + for key, value in self.classifier.state_dict().items(): + hypers["classifier"][key] = value + for task in self.tasks: + hypers[task.name] = {} + for key, value in task.regressor.state_dict().items(): + hypers[task.name][key] = value + + return hypers + + def save_hypers(self, filepath): + hypers = self.hypers + with h5py.File(filepath, "w") as f: + for model_key in hypers.keys(): + f.create_group(model_key) + for param_key, param_value in hypers[model_key].items(): + f[model_key].create_dataset(param_key, data=param_value) + + @staticmethod + def load_hypers(filepath): + hypers = {} + with h5py.File(filepath, "r") as f: + for model_key in f.keys(): + hypers[model_key] = OrderedDict() + for param_key, param_value in f[model_key].items(): + hypers[model_key][param_key] = torch.tensor(np.atleast_1d(param_value[()])) + return hypers def initialize( self, - filepath=None, acqf=None, n_init=4, + data=None, + hypers=None, ): """ - An initialization plan for the agent. - This must be run before the agent can learn. + An initialization plan for the self. + This must be run before the self can learn. It should be passed to a Bluesky RunEngine. """ @@ -193,12 +261,18 @@ def initialize( if self.initialization is not None: yield from self.initialization() - if filepath is not None: - self.tell(new_table=pd.read_hdf(filepath, key="table")) + if hypers is not None: + self.a_priori_hypers = self.load_hypers(hypers) + + if data is not None: + if type(data) == str: + self.tell(new_table=pd.read_hdf(data, key="table")) + else: + self.tell(new_table=data) # now let's get bayesian elif acqf in ["qr"]: - yield from self.learn(acqf=acqf, n_iter=1, n_per_iter=n_init, route=True) + yield from self.learn("qr", n_iter=1, n_per_iter=n_init, route=True) else: raise Exception( @@ -206,21 +280,11 @@ def initialize( ['qr'].""" ) - # if init_table is None: - # raise RuntimeError("Unhandled initialization error.") - - no_good_samples_tasks = np.isnan(self.table.loc[:, self.target_names]).all(axis=0) - if no_good_samples_tasks.any(): - raise ValueError( - f"The tasks {[self.tasks[i].name for i in np.where(no_good_samples_tasks)[0]]} " - f"don't have any good samples." - ) - self._initialized = True - def tell(self, new_table=None, append=True, **kwargs): + def tell(self, new_table=None, append=True, train=True, **kwargs): """ - Inform the agent about new inputs and targets for the model. + Inform the self about new inputs and targets for the model. """ new_table = pd.DataFrame() if new_table is None else new_table @@ -230,22 +294,21 @@ def tell(self, new_table=None, append=True, **kwargs): self.table.loc[:, "total_fitness"] = self.table.loc[:, self.task_names].fillna(-np.inf).sum(axis=1) self.table.index = np.arange(len(self.table)) - # self.normalized_inputs = self.normalize_inputs(self.inputs) + skew_dims = [tuple(np.arange(self.n_active_dofs))] - self.all_targets_valid = ~np.isnan(self.targets).any(axis=1) + if not train: + hypers = self.hypers for task in self.tasks: task.targets = self.targets.loc[:, task.name] - # - # task.targets_mean = np.nanmean(task.targets, axis=0) - # task.targets_scale = np.nanstd(task.targets, axis=0) - # task.normalized_targets = self.normalized_targets.loc[:, task.name] + task.feasibility = self.feasible_for_all_tasks - task.feasibility = self.all_targets_valid.astype(int) + if not task.feasibility.sum() >= 2: + raise ValueError("There must be at least two feasible data points per task!") - train_inputs = torch.tensor(self.inputs.loc[task.feasibility == 1].values).double().unsqueeze(0) - train_targets = torch.tensor(task.targets.loc[task.feasibility == 1].values).double().unsqueeze(0).unsqueeze(-1) + train_inputs = torch.tensor(self.inputs.loc[task.feasibility].values).double().unsqueeze(0) + train_targets = torch.tensor(task.targets.loc[task.feasibility].values).double().unsqueeze(0).unsqueeze(-1) if train_inputs.ndim == 1: train_inputs = train_inputs.unsqueeze(-1) @@ -263,6 +326,7 @@ def tell(self, new_table=None, append=True, **kwargs): train_inputs=train_inputs, train_targets=train_targets, likelihood=likelihood, + skew_dims=skew_dims, input_transform=self.input_transform, outcome_transform=botorch.models.transforms.outcome.Standardize(m=1, batch_shape=torch.Size((1,))), ).double() @@ -277,40 +341,45 @@ def tell(self, new_table=None, append=True, **kwargs): ) dirichlet_likelihood = gpytorch.likelihoods.DirichletClassificationLikelihood( - torch.as_tensor(self.all_targets_valid).long(), learn_additional_noise=True + torch.as_tensor(self.feasible_for_all_tasks.values).long(), learn_additional_noise=True ).double() - self.dirichlet_classifier = models.LatentDirichletClassifier( + self.classifier = models.LatentDirichletClassifier( train_inputs=torch.tensor(self.inputs.values).double(), train_targets=dirichlet_likelihood.transformed_targets.transpose(-1, -2).double(), + skew_dims=skew_dims, likelihood=dirichlet_likelihood, input_transform=self.input_transform, ).double() - self.dirichlet_classifier_mll = gpytorch.mlls.ExactMarginalLogLikelihood( - self.dirichlet_classifier.likelihood, self.dirichlet_classifier - ) + self.classifier_mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.classifier.likelihood, self.classifier) self.feas_model = botorch.models.deterministic.GenericDeterministicModel( - f=lambda X: -self.dirichlet_classifier.log_prob(X).square() + f=lambda X: -self.classifier.log_prob(X).square() ) - self.fit_models() - - self.targets_model = botorch.models.model_list_gp_regression.ModelListGP(*[task.regressor for task in self.tasks]) + if self.a_priori_hypers is not None: + self._set_hypers(self.a_priori_hypers) + elif not train: + self._set_hypers(hypers) + else: + self.train_models() self.task_model = botorch.models.model.ModelList(*[task.regressor for task in self.tasks], self.feas_model) - def fit_models(self, **kwargs): + def train_models(self, **kwargs): + t0 = ttime.monotonic() for task in self.tasks: botorch.fit.fit_gpytorch_mll(task.regressor_mll, **kwargs) - botorch.fit.fit_gpytorch_mll(self.dirichlet_classifier_mll, **kwargs) + botorch.fit.fit_gpytorch_mll(self.classifier_mll, **kwargs) + if self.verbose: + print(f"trained models in {ttime.monotonic() - t0:.02f} seconds") - def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_args={}, **kwargs): + def get_acquisition_function(self, acqf_identifier="ei", return_metadata=False, acqf_args={}, **kwargs): if not self._initialized: - raise RuntimeError(f'Can\'t construct acquisition function "{acqf_name}" (the agent is not initialized!)') + raise RuntimeError(f'Can\'t construct acquisition function "{acqf_identifier}" (the self is not initialized!)') - if acqf_name.lower() == "ei": + if acqf_identifier.lower() in AVAILABLE_ACQFS["expected_improvement"]["identifiers"]: acqf = botorch.acquisition.analytic.LogExpectedImprovement( self.task_model, best_f=self.best_sum_of_tasks, @@ -319,7 +388,7 @@ def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_a ) acqf_meta = {"name": "expected improvement", "args": {}} - elif acqf_name.lower() == "pi": + elif acqf_identifier.lower() in AVAILABLE_ACQFS["probability_of_improvement"]["identifiers"]: acqf = botorch.acquisition.analytic.LogProbabilityOfImprovement( self.task_model, best_f=self.best_sum_of_tasks, @@ -328,8 +397,17 @@ def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_a ) acqf_meta = {"name": "probability of improvement", "args": {}} - elif acqf_name.lower() == "ucb": - beta = acqf_args.get("beta", 0.1) + elif acqf_identifier.lower() in AVAILABLE_ACQFS["expected_mean"]["identifiers"]: + acqf = botorch.acquisition.analytic.UpperConfidenceBound( + self.task_model, + beta=0, + posterior_transform=self.task_scalarization, + **kwargs, + ) + acqf_meta = {"name": "expected mean"} + + elif acqf_identifier.lower() in AVAILABLE_ACQFS["upper_confidence_bound"]["identifiers"]: + beta = AVAILABLE_ACQFS["upper_confidence_bound"]["default_args"]["beta"] acqf = botorch.acquisition.analytic.UpperConfidenceBound( self.task_model, beta=beta, @@ -339,71 +417,75 @@ def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_a acqf_meta = {"name": "upper confidence bound", "args": {"beta": beta}} else: - raise ValueError(f'Unrecognized acquisition acqf_name "{acqf_name}".') + raise ValueError(f'Unrecognized acquisition acqf_identifier "{acqf_identifier}".') return (acqf, acqf_meta) if return_metadata else acqf - def ask( + def ask(self, acqf_identifier="ei", n=1, route=True, return_metadata=False): + if acqf_identifier.lower() == "qr": + x = self.active_inputs_sampler(n=n) + acqf_meta = {"name": "quasi-random", "args": {}} + + elif n == 1: + x, acqf_meta = self.ask_single(acqf_identifier, return_metadata=True) + return (x, acqf_meta) if return_metadata else x + + elif n > 1: + for i in range(n): + x, acqf_meta = self.ask_single(acqf_identifier, return_metadata=True) + + if i < (n - 1): + task_samples = [task.regressor.posterior(torch.tensor(x)).sample().item() for task in self.tasks] + fantasy_table = pd.DataFrame( + np.append(x, task_samples)[None], columns=[*self.dof_names, *self.task_names] + ) + self.tell(fantasy_table, train=False) + + x = self.active_inputs.iloc[-n:].values + + if n > 1: + self.forget(self.table.index[-(n - 1) :]) + + if route: + x = x[utils.route(self.read_active_dofs, x)] + + return (x, acqf_meta) if return_metadata else x + + def ask_single( self, - tasks=None, - classifier=None, - acqf_name="ei", - greedy=True, - n=1, - disappointment=0, - route=False, - cost_model=None, - n_test=1024, - optimize=True, + acqf_identifier="ei", return_metadata=False, ): """ - The next $n$ points to sample, recommended by the agent. + The next $n$ points to sample, recommended by the self. """ - # if route: - # unrouted_points = self.ask( - # tasks=tasks, - # classifier=classifier, - # strategy=strategy, - # greedy=greedy, - # n=n, - # disappointment=disappointment, - # route=False, - # cost_model=cost_model, - # n_test=n_test, - # ) - - # routing_index, _ = utils.get_routing(self.read_active_dofs, unrouted_points) - # x = unrouted_points[routing_index] - - if acqf_name.lower() == "qr": - x = self.active_inputs_sampler(n=n) - acqf_meta = {"name": "quasi-random", "args": {}} + t0 = ttime.monotonic() - else: - acqf, acqf_meta = self.get_acquisition_function(acqf_name=acqf_name, return_metadata=True) - - BATCH_SIZE = 1 - NUM_RESTARTS = 7 - RAW_SAMPLES = 512 - - candidates, _ = botorch.optim.optimize_acqf( - acq_function=acqf, - bounds=torch.tensor(self.dof_bounds).T, - q=BATCH_SIZE, - num_restarts=NUM_RESTARTS, - raw_samples=RAW_SAMPLES, # used for intialization heuristic - options={"batch_limit": 3, "maxiter": 1024}, - ) + acqf, acqf_meta = self.get_acquisition_function(acqf_identifier=acqf_identifier, return_metadata=True) + + BATCH_SIZE = 1 + NUM_RESTARTS = 8 + RAW_SAMPLES = 256 + + candidates, _ = botorch.optim.optimize_acqf( + acq_function=acqf, + bounds=torch.tensor(self.dof_bounds).T, + q=BATCH_SIZE, + num_restarts=NUM_RESTARTS, + raw_samples=RAW_SAMPLES, # used for intialization heuristic + ) - x = candidates.detach().numpy()[..., self.dof_is_active_mask] + x = candidates.detach().numpy()[..., self.dof_is_active_mask] + + if self.verbose: + print(f"found point {x} in {ttime.monotonic() - t0:.02f} seconds") return (x, acqf_meta) if return_metadata else x def acquire(self, active_inputs): """ - Acquire and digest according to the agent's acquisition and digestion plans. + Acquire and digest according to the self's acquisition and digestion plans. This should yield a table of sampled tasks with the same length as the sampled inputs. """ @@ -430,14 +512,24 @@ def acquire(self, active_inputs): return products - def learn(self, acqf, n_iter=1, n_per_iter=1, reuse_hypers=True, upsample=1, verbose=True, plots=[], **kwargs): + def learn( + self, + acqf_identifier, + n_iter=1, + n_per_iter=1, + reuse_hypers=True, + upsample=1, + verbose=True, + plots=[], + **kwargs, + ): """ This iterates the learning algorithm, looping over ask -> acquire -> tell. It should be passed to a Bluesky RunEngine. """ - for i in range(n_iter): - x, acqf_meta = self.ask(n=n_per_iter, acqf_name=acqf, return_metadata=True, **kwargs) + for iteration in range(n_iter): + x, acqf_meta = self.ask(n=n_per_iter, acqf_identifier=acqf_identifier, return_metadata=True, **kwargs) new_table = yield from self.acquire(x) @@ -457,15 +549,19 @@ def normalize_targets(self, targets): def unnormalize_targets(self, targets): return targets * self.targets_scale + self.targets_mean + @property + def batch_dimension(self): + return self.dof_names.index("training_batch") if "training_batch" in self.dof_names else None + @property def test_inputs(self): - test_passive_inputs = self.passive_inputs.values[-1][None] * np.ones(len(self.test_active_inputs))[..., None] + test_passive_inputs = self.read_passive_dofs[None] * np.ones(len(self.test_active_inputs))[..., None] return np.concatenate([self.test_active_inputs, test_passive_inputs], axis=-1) @property def test_inputs_grid(self): - test_passive_inputs_grid = self.passive_inputs.values[-1] * np.ones( - (*self.test_active_inputs_grid.shape[:-1], self.n_passive_dof) + test_passive_inputs_grid = self.read_passive_dofs * np.ones( + (*self.test_active_inputs_grid.shape[:-1], self.n_passive_dofs) ) return np.concatenate([self.test_active_inputs_grid, test_passive_inputs_grid], axis=-1) @@ -485,10 +581,20 @@ def passive_inputs(self): def targets(self): return self.table.loc[:, self.task_names].astype(float) + # @property + # def feasible(self): + # with pd.option_context("mode.use_inf_as_null", True): + # feasible = ~self.targets.isna() + # return feasible + @property - def feasible(self): - with pd.option_context("mode.use_inf_as_null", True): - feasible = ~self.targets.isna() + def feasible_for_all_tasks(self): + # TODO: make this more robust + # with pd.option_context("mode.use_inf_as_null", True): + feasible = ~self.targets.isna().any(axis=1) + for task in self.tasks: + if task.min is not None: + feasible &= self.targets.loc[:, task.name].values > task.transform(task.min) return feasible # @property @@ -522,12 +628,12 @@ def latest_passive_dof_values(self): def passive_dof_bounds(self): # food for thought: should this be the current values, or the latest recorded values? # the former leads to weird extrapolation (especially for time), and the latter to some latency. - # let's go with the first way for now - return np.outer(self.latest_passive_dof_values, [1.0, 1.0]) + # let's go with the second way for now + return np.outer(self.read_passive_dofs, [1.0, 1.0]) @property def dof_is_active_mask(self): - return np.r_[np.ones(self.n_active_dof), np.zeros(self.n_passive_dof)].astype(bool) + return np.r_[np.ones(self.n_active_dofs), np.zeros(self.n_passive_dofs)].astype(bool) @property def dof_bounds(self): @@ -590,21 +696,21 @@ def go_to_best_sum_of_tasks(self): yield from self.go_to(self.best_sum_of_tasks_inputs) def plot_tasks(self, **kwargs): - if self.n_active_dof == 1: + if self.n_active_dofs == 1: self._plot_tasks_one_dof(**kwargs) else: self._plot_tasks_many_dofs(**kwargs) def plot_feasibility(self, **kwargs): - if self.n_active_dof == 1: + if self.n_active_dofs == 1: self._plot_feas_one_dof(**kwargs) else: self._plot_feas_many_dofs(**kwargs) def plot_acquisition(self, **kwargs): - if self.n_active_dof == 1: + if self.n_active_dofs == 1: self._plot_acq_one_dof(**kwargs) else: @@ -613,10 +719,10 @@ def plot_acquisition(self, **kwargs): def _plot_feas_one_dof(self, size=32): self.class_fig, self.class_ax = plt.subplots(1, 1, figsize=(4, 4), sharex=True, constrained_layout=True) - self.class_ax.scatter(self.inputs.values, self.all_targets_valid.astype(int), s=size) + self.class_ax.scatter(self.inputs.values, self.feasible_for_all_tasks.astype(int), s=size) - x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dof)).double() - log_prob = self.dirichlet_classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) + x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dofs)).double() + log_prob = self.classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) self.class_ax.plot(self.test_inputs_grid.ravel(), np.exp(log_prob)) @@ -624,7 +730,7 @@ def _plot_feas_one_dof(self, size=32): def _plot_feas_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLORMAP, size=32, gridded=None): if gridded is None: - gridded = self.n_dof == 2 + gridded = self.n_dofs == 2 self.class_fig, self.class_axes = plt.subplots( 1, 2, figsize=(8, 4), sharex=True, sharey=True, constrained_layout=True @@ -635,12 +741,12 @@ def _plot_feas_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLO ax.set_ylabel(self.dofs[axes[1]].name) data_ax = self.class_axes[0].scatter( - *self.inputs.values.T[:2], s=size, c=self.all_targets_valid.astype(int), vmin=0, vmax=1, cmap=cmap + *self.inputs.values.T[:2], s=size, c=self.feasible_for_all_tasks.astype(int), vmin=0, vmax=1, cmap=cmap ) if gridded: - x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dof)).double() - log_prob = self.dirichlet_classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) + x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dofs)).double() + log_prob = self.classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) self.class_axes[1].pcolormesh( *np.swapaxes(self.test_inputs_grid, 0, -1), @@ -653,7 +759,7 @@ def _plot_feas_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLO else: x = torch.tensor(self.test_inputs).double() - log_prob = self.dirichlet_classifier.log_prob(x).detach().numpy() + log_prob = self.classifier.log_prob(x).detach().numpy() self.class_axes[1].scatter(*x.detach().numpy().T[axes], s=size, c=np.exp(log_prob), vmin=0, vmax=1, cmap=cmap) @@ -700,7 +806,7 @@ def _plot_tasks_one_dof(self, size=32, lw=1e0): def _plot_tasks_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLORMAP, gridded=None, size=32): if gridded is None: - gridded = self.n_dof == 2 + gridded = self.n_dofs == 2 self.task_fig, self.task_axes = plt.subplots( self.n_tasks, @@ -802,7 +908,7 @@ def _plot_acq_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLOR ) if gridded is None: - gridded = self.n_active_dof == 2 + gridded = self.n_active_dofs == 2 self.acq_axes = np.atleast_1d(self.acq_axes) self.acq_fig.suptitle(f"(x,y)=({self.dofs[axes[0]].name},{self.dofs[axes[1]].name})") @@ -820,7 +926,7 @@ def _plot_acq_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLOR self.acq_axes[iacqf].set_title(acqf_meta["name"]) obj_ax = self.acq_axes[iacqf].pcolormesh( - *np.swapaxes(self.test_inputs_grid, 0, -1), + *np.swapaxes(self.test_inputs_grid, 0, -1)[axes], obj.detach().numpy().reshape(grid_shape).T, shading=shading, cmap=cmap, @@ -866,3 +972,51 @@ def inspect_beam(self, index, border=None): if border is not None: plt.xlim(x_min - border * width_x, x_min + border * width_x) plt.ylim(y_min - border * width_y, y_min + border * width_y) + + def plot_history(self, x_key="index", show_all_tasks=False): + x = getattr(self.table, x_key).values + + num_task_plots = 1 + if show_all_tasks: + num_task_plots = self.n_tasks + 1 + + self.n_tasks + 1 if self.n_tasks > 1 else 1 + + hist_fig, hist_axes = plt.subplots( + num_task_plots, 1, figsize=(6, 4 * num_task_plots), sharex=True, constrained_layout=True, dpi=200 + ) + hist_axes = np.atleast_1d(hist_axes) + + unique_strategies, acqf_index, acqf_inverse = np.unique(self.table.acqf, return_index=True, return_inverse=True) + + sample_colors = np.array(DEFAULT_COLOR_LIST)[acqf_inverse] + + if show_all_tasks: + for itask, task in enumerate(self.tasks): + y = task.targets.values + hist_axes[itask].scatter(x, y, c=sample_colors) + hist_axes[itask].plot(x, y, lw=5e-1, c="k") + hist_axes[itask].set_ylabel(task.name) + + y = self.table.total_fitness + + cummax_y = np.array([np.nanmax(y[: i + 1]) for i in range(len(y))]) + + hist_axes[-1].scatter(x, y, c=sample_colors) + hist_axes[-1].plot(x, y, lw=5e-1, c="k") + + hist_axes[-1].plot(x, cummax_y, lw=5e-1, c="k", ls=":") + + hist_axes[-1].set_ylabel("total_fitness") + hist_axes[-1].set_xlabel(x_key) + + handles = [] + for i_acqf, acqf in enumerate(unique_strategies): + # i_acqf = np.argsort(acqf_index)[i_handle] + handles.append(Patch(color=DEFAULT_COLOR_LIST[i_acqf], label=acqf)) + legend = hist_axes[0].legend(handles=handles, fontsize=8) + legend.set_title("acquisition function") + + # plot_history(self, x_key="time") + + # plt.savefig("bo-history.pdf", dpi=256) diff --git a/bloptools/bayesian/kernels.py b/bloptools/bayesian/kernels.py index 0bb1430..db2d99f 100644 --- a/bloptools/bayesian/kernels.py +++ b/bloptools/bayesian/kernels.py @@ -5,40 +5,73 @@ class LatentKernel(gpytorch.kernels.Kernel): is_stationary = True - num_outputs = 1 + batch_inverse_lengthscale = 1e6 def __init__( self, num_inputs=1, - off_diag=True, + skew_dims=True, diag_prior=True, - scale_kernel=True, + scale_output=True, **kwargs, ): super(LatentKernel, self).__init__() self.num_inputs = num_inputs - self.n_off_diag = int(num_inputs * (num_inputs - 1) / 2) - - self.off_diag = off_diag - self.scale_kernel = scale_kernel + self.scale_output = scale_output self.nu = kwargs.get("nu", 1.5) + self.batch_dimension = kwargs.get("batch_dimension", None) + + if type(skew_dims) is bool: + if skew_dims: + self.skew_dims = [torch.arange(self.num_inputs)] + else: + self.skew_dims = [torch.arange(0)] + elif hasattr(skew_dims, "__iter__"): + self.skew_dims = [torch.tensor(np.atleast_1d(skew_group)) for skew_group in skew_dims] + else: + raise ValueError('arg "skew_dims" must be True, False, or an iterable of tuples of ints.') + + # if not all([len(skew_group) >= 2 for skew_group in self.skew_dims]): + # raise ValueError("must have at least two dims per skew group") + skewed_dims = [dim for skew_group in self.skew_dims for dim in skew_group] + if not len(set(skewed_dims)) == len(skewed_dims): + raise ValueError("values in skew_dims must be unique") + if not max(skewed_dims) < self.num_inputs: + raise ValueError("invalud dimension index in skew_dims") + + skew_group_submatrix_indices = [] + for dim in range(self.num_outputs): + for skew_group in self.skew_dims: + j, k = skew_group[torch.triu_indices(len(skew_group), len(skew_group), 1)].unsqueeze(1) + i = dim * torch.ones(j.shape).long() + skew_group_submatrix_indices.append(torch.cat((i, j, k), dim=0)) + + self.diag_matrix_indices = tuple( + [ + torch.kron(torch.arange(self.num_outputs), torch.ones(self.num_inputs)).long(), + *2 * [torch.arange(self.num_inputs).repeat(self.num_outputs)], + ] + ) - # kernel_scale_constraint = gpytorch.constraints.Positive() - diag_entries_constraint = gpytorch.constraints.Positive() # gpytorch.constraints.Interval(5e-1, 1e2) - skew_entries_constraint = gpytorch.constraints.Interval(-1e0, 1e0) + self.skew_matrix_indices = ( + tuple(torch.cat(skew_group_submatrix_indices, dim=1)) + if len(skew_group_submatrix_indices) > 0 + else tuple([[], []]) + ) - # diag_entries_initial = np.ones() - # np.sqrt(diag_entries_constraint.lower_bound * diag_entries_constraint.upper_bound) - raw_diag_entries_initial = diag_entries_constraint.inverse_transform(torch.tensor(2)) + self.n_skew_entries = len(self.skew_matrix_indices[0]) - self.register_parameter( - name="raw_diag_entries", - parameter=torch.nn.Parameter(raw_diag_entries_initial * torch.ones(self.num_outputs, self.num_inputs).double()), + diag_entries_constraint = gpytorch.constraints.Positive() + raw_diag_entries_initial = ( + diag_entries_constraint.inverse_transform(torch.tensor(1e-1)) + * torch.ones(self.num_outputs, self.num_inputs).double() ) - self.register_constraint("raw_diag_entries", constraint=diag_entries_constraint) + + self.register_parameter(name="raw_diag_entries", parameter=torch.nn.Parameter(raw_diag_entries_initial)) + self.register_constraint(param_name="raw_diag_entries", constraint=diag_entries_constraint) if diag_prior: self.register_prior( @@ -48,29 +81,28 @@ def __init__( setting_closure=lambda m, v: m._set_diag_entries(v), ) - if self.off_diag: - self.register_parameter( - name="raw_skew_entries", - parameter=torch.nn.Parameter(torch.zeros(self.num_outputs, self.n_off_diag).double()), - ) - self.register_constraint("raw_skew_entries", skew_entries_constraint) + if self.n_skew_entries > 0: + skew_entries_constraint = gpytorch.constraints.Interval(-1e0, 1e0) + skew_entries_initial = torch.zeros((self.num_outputs, self.n_skew_entries), dtype=torch.float64) + self.register_parameter(name="raw_skew_entries", parameter=torch.nn.Parameter(skew_entries_initial)) + self.register_constraint(param_name="raw_skew_entries", constraint=skew_entries_constraint) - if self.scale_kernel: - kernel_scale_constraint = gpytorch.constraints.Positive() - kernel_scale_prior = gpytorch.priors.GammaPrior(concentration=2, rate=0.15) + if self.scale_output: + outputscale_constraint = gpytorch.constraints.Positive() + outputscale_prior = gpytorch.priors.GammaPrior(concentration=2, rate=0.15) self.register_parameter( - name="raw_kernel_scale", + name="raw_outputscale", parameter=torch.nn.Parameter(torch.ones(1)), ) - self.register_constraint("raw_kernel_scale", constraint=kernel_scale_constraint) + self.register_constraint("raw_outputscale", constraint=outputscale_constraint) self.register_prior( - name="kernel_scale_prior", - prior=kernel_scale_prior, - param_or_closure=lambda m: m.kernel_scale, - setting_closure=lambda m, v: m._set_kernel_scale(v), + name="outputscale_prior", + prior=outputscale_prior, + param_or_closure=lambda m: m.outputscale, + setting_closure=lambda m, v: m._set_outputscale(v), ) @property @@ -82,8 +114,8 @@ def skew_entries(self): return self.raw_skew_entries_constraint.transform(self.raw_skew_entries) @property - def kernel_scale(self): - return self.raw_kernel_scale_constraint.transform(self.raw_kernel_scale) + def outputscale(self): + return self.raw_outputscale_constraint.transform(self.raw_outputscale) @diag_entries.setter def diag_entries(self, value): @@ -93,9 +125,9 @@ def diag_entries(self, value): def skew_entries(self, value): self._set_skew_entries(value) - @kernel_scale.setter - def kernel_scale(self, value): - self._set_kernel_scale(value) + @outputscale.setter + def outputscale(self, value): + self._set_outputscale(value) def _set_diag_entries(self, value): if not torch.is_tensor(value): @@ -107,40 +139,37 @@ def _set_skew_entries(self, value): value = torch.as_tensor(value).to(self.raw_skew_entries) self.initialize(raw_skew_entries=self.raw_skew_entries_constraint.inverse_transform(value)) - def _set_kernel_scale(self, value): + def _set_outputscale(self, value): if not torch.is_tensor(value): - value = torch.as_tensor(value).to(self.raw_kernel_scale) - self.initialize(raw_kernel_scale=self.raw_kernel_scale_constraint.inverse_transform(value)) + value = torch.as_tensor(value).to(self.raw_outputscale) + self.initialize(raw_outputscale=self.raw_outputscale_constraint.inverse_transform(value)) @property - def output_scale(self): - return self.kernel_scale.sqrt() + def skew_matrix(self): + S = torch.zeros((self.num_outputs, self.num_inputs, self.num_inputs), dtype=torch.float64) + if self.n_skew_entries > 0: + # to construct an orthogonal matrix. fun fact: exp(skew(N)) is the generator of SO(N) + S[self.skew_matrix_indices] = self.skew_entries + S += -S.transpose(-1, -2) + return torch.linalg.matrix_exp(S) @property - def latent_dimensions(self): - # no rotations - if not self.off_diag: - T = torch.eye(self.num_inputs, dtype=torch.float64) - - # construct an orthogonal matrix. fun fact: exp(skew(N)) is the generator of SO(N) - else: - A = torch.zeros((self.num_inputs, self.num_inputs)).double() - A[np.triu_indices(self.num_inputs, k=1)] = self.skew_entries - A += -A.transpose(-1, -2) - T = torch.linalg.matrix_exp(A) - - diagonal_transform = torch.cat([torch.diag(entries).unsqueeze(0) for entries in self.diag_entries], dim=0) - T = torch.matmul(diagonal_transform, T) + def diag_matrix(self): + D = torch.zeros((self.num_outputs, self.num_inputs, self.num_inputs), dtype=torch.float64) + D[self.diag_matrix_indices] = self.diag_entries.ravel() + return D - return T + @property + def latent_transform(self): + return torch.matmul(self.diag_matrix, self.skew_matrix) def forward(self, x1, x2, diag=False, **params): # adapted from the Matern kernel mean = x1.reshape(-1, x1.size(-1)).mean(0)[(None,) * (x1.dim() - 1)] - trans_x1 = torch.matmul(self.latent_dimensions.unsqueeze(1), (x1 - mean).unsqueeze(-1)).squeeze(-1) - trans_x2 = torch.matmul(self.latent_dimensions.unsqueeze(1), (x2 - mean).unsqueeze(-1)).squeeze(-1) + trans_x1 = torch.matmul(self.latent_transform.unsqueeze(1), (x1 - mean).unsqueeze(-1)).squeeze(-1) + trans_x2 = torch.matmul(self.latent_transform.unsqueeze(1), (x2 - mean).unsqueeze(-1)).squeeze(-1) distance = self.covar_dist(trans_x1, trans_x2, diag=diag, **params) - return self.kernel_scale * (1 + distance) * torch.exp(-distance) + return (self.outputscale if self.scale_output else 1.0) * (1 + distance) * torch.exp(-distance) diff --git a/bloptools/bayesian/models.py b/bloptools/bayesian/models.py index 21ebc36..a160703 100644 --- a/bloptools/bayesian/models.py +++ b/bloptools/bayesian/models.py @@ -1,99 +1,41 @@ import botorch import gpytorch import torch -from botorch.models.gpytorch import GPyTorchModel -from gpytorch.models import ExactGP from . import kernels -class LatentDirichletClassifier(botorch.models.gp_regression.SingleTaskGP): - def __init__(self, train_inputs, train_targets, *args, **kwargs): +class LatentGP(botorch.models.gp_regression.SingleTaskGP): + def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs): super().__init__(train_inputs, train_targets, *args, **kwargs) - self.mean_module = gpytorch.means.ConstantMean() + self.mean_module = gpytorch.means.ConstantMean(constant_prior=gpytorch.priors.NormalPrior(loc=0, scale=1)) + self.covar_module = kernels.LatentKernel( num_inputs=train_inputs.shape[-1], num_outputs=train_targets.shape[-1], - off_diag=True, + skew_dims=skew_dims, diag_prior=True, - scale_output=True, + scale=True, **kwargs ) - def log_prob(self, x, n_samples=256): - *input_shape, n_dim = x.shape - samples = self.posterior(x.reshape(-1, n_dim)).sample(torch.Size((n_samples,))).exp() - return torch.log((samples / samples.sum(-1, keepdim=True)).mean(0)[:, 1]).reshape(*input_shape, 1) - -class LatentGP(botorch.models.gp_regression.SingleTaskGP): - def __init__(self, train_inputs, train_targets, *args, **kwargs): +class LatentDirichletClassifier(botorch.models.gp_regression.SingleTaskGP): + def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs): super().__init__(train_inputs, train_targets, *args, **kwargs) - self.mean_module = gpytorch.means.ConstantMean(constant_prior=gpytorch.priors.NormalPrior(loc=0, scale=1)) - + self.mean_module = gpytorch.means.ConstantMean() self.covar_module = kernels.LatentKernel( num_inputs=train_inputs.shape[-1], num_outputs=train_targets.shape[-1], - off_diag=True, + skew_dims=skew_dims, diag_prior=True, - scale_output=True, + scale=True, **kwargs ) - -class OldBoTorchSingleTaskGP(ExactGP, GPyTorchModel): - def __init__(self, train_inputs, train_targets, likelihood): - super(OldBoTorchSingleTaskGP, self).__init__(train_inputs, train_targets, likelihood) - self.mean_module = gpytorch.means.ConstantMean() - self.covar_module = gpytorch.kernels.ScaleKernel( - kernels.LatentMaternKernel(n_dim=train_inputs.shape[-1], off_diag=True, diagonal_prior=True) - ) - - def forward(self, x): - mean_x = self.mean_module(x) - covar_x = self.covar_module(x) - - return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) - - -class BoTorchMultiTaskGP(ExactGP, GPyTorchModel): - _num_outputs = 1 # to inform GPyTorchModel API - - def __init__(self, train_inputs, train_targets, likelihood): - self._num_outputs = train_targets.shape[-1] - - super(BoTorchMultiTaskGP, self).__init__(train_inputs, train_targets, likelihood) - self.mean_module = gpytorch.means.MultitaskMean(gpytorch.means.ConstantMean(), num_tasks=self._num_outputs) - self.covar_module = gpytorch.kernels.MultitaskKernel( - kernels.LatentMaternKernel(n_dim=train_inputs.shape[-1], off_diag=True, diagonal_prior=True), - num_tasks=self._num_outputs, - rank=1, - ) - - def forward(self, x): - mean_x = self.mean_module(x) - covar_x = self.covar_module(x) - return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x) - - -class OldBoTorchDirichletClassifier(gpytorch.models.ExactGP, botorch.models.gpytorch.GPyTorchModel): - _num_outputs = 1 # to inform GPyTorchModel API - - def __init__(self, train_inputs, train_targets, likelihood): - super(OldBoTorchDirichletClassifier, self).__init__(train_inputs, train_targets, likelihood) - self.mean_module = gpytorch.means.ConstantMean(batch_shape=len(train_targets.unique())) - self.covar_module = gpytorch.kernels.ScaleKernel( - kernels.LatentMaternKernel(n_dim=train_inputs.shape[-1], off_diag=False, diagonal_prior=False) - ) - - def forward(self, x): - mean_x = self.mean_module(x) - covar_x = self.covar_module(x) - return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) - def log_prob(self, x, n_samples=256): *input_shape, n_dim = x.shape samples = self.posterior(x.reshape(-1, n_dim)).sample(torch.Size((n_samples,))).exp() - return torch.log((samples / samples.sum(-3, keepdim=True)).mean(0)[1]).reshape(*input_shape) + return torch.log((samples / samples.sum(-1, keepdim=True)).mean(0)[:, 1]).reshape(*input_shape, 1) diff --git a/bloptools/devices.py b/bloptools/devices.py index 6e56d2a..5840fbd 100644 --- a/bloptools/devices.py +++ b/bloptools/devices.py @@ -4,8 +4,12 @@ from ophyd import Device, Signal, SignalRO +def dummy_dof(name): + return Signal(name=name, value=0.0) + + def dummy_dofs(n=2): - return [Signal(name=f"x{i+1}", value=0) for i in range(n)] + return [dummy_dof(name=f"x{i+1}") for i in range(n)] def get_dummy_device(name="dofs", n=2): @@ -22,8 +26,26 @@ def get_dummy_device(name="dofs", n=2): class TimeReadback(SignalRO): + """ + Returns the current timestamp. + """ + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def get(self): return ttime.time() + + +class ConstantReadback(SignalRO): + """ + Returns a constant every time you read it (more useful than you'd think). + """ + + def __init__(self, constant=1, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.constant = constant + + def get(self): + return self.constant diff --git a/bloptools/experiments/sirepo/tes.py b/bloptools/experiments/sirepo/tes.py index a1b1267..cbf0599 100644 --- a/bloptools/experiments/sirepo/tes.py +++ b/bloptools/experiments/sirepo/tes.py @@ -20,18 +20,19 @@ def image_digestion(db, uid, image_name): flux = image.sum() n_y, n_x = image.shape - X, Y = np.meshgrid(np.linspace(*horizontal_extent, n_x), np.linspace(*vertical_extent, n_y)) + # reject if there is no flux, or we can't estimate the position and size of the beam for some reason + bad = False + bad |= not (flux > 0) + if not bad: + X, Y = np.meshgrid(np.linspace(*horizontal_extent, n_x), np.linspace(*vertical_extent, n_y)) - mean_x = np.sum(X * image) / np.sum(image) - mean_y = np.sum(Y * image) / np.sum(image) + mean_x = np.sum(X * image) / np.sum(image) + mean_y = np.sum(Y * image) / np.sum(image) - sigma_x = np.sqrt(np.sum((X - mean_x) ** 2 * image) / np.sum(image)) - sigma_y = np.sqrt(np.sum((Y - mean_y) ** 2 * image) / np.sum(image)) + sigma_x = np.sqrt(np.sum((X - mean_x) ** 2 * image) / np.sum(image)) + sigma_y = np.sqrt(np.sum((Y - mean_y) ** 2 * image) / np.sum(image)) - # reject if there is no flux, or we can't estimate the position and size of the beam - bad = False - bad |= not (flux > 0) - bad |= any(np.isnan([mean_x, mean_y, sigma_x, sigma_y])) + bad |= any(np.isnan([mean_x, mean_y, sigma_x, sigma_y])) if not bad: products.loc[index, ["flux", "x_pos", "y_pos", "x_width", "y_width"]] = ( diff --git a/bloptools/tasks.py b/bloptools/tasks.py index 61ce47b..aaa12ba 100644 --- a/bloptools/tasks.py +++ b/bloptools/tasks.py @@ -2,10 +2,12 @@ class Task: MIN_NOISE_LEVEL = 1e-6 MAX_NOISE_LEVEL = 1e-2 - def __init__(self, key, kind="max", name=None, transform=None, **kwargs): + def __init__(self, key, kind="max", min=None, name=None, transform=None, **kwargs): self.key = key + self.min = min + self.max = max self.kind = kind - self.name = name if name is not None else f"{kind}_{key}_fitness" + self.name = name if name is not None else f"{key}_fitness" self.transform = transform if transform is not None else lambda x: x self.weight = 1.0 diff --git a/bloptools/test_functions.py b/bloptools/test_functions.py index 25693a2..9b1983e 100644 --- a/bloptools/test_functions.py +++ b/bloptools/test_functions.py @@ -13,11 +13,43 @@ def himmelblau(x1, x2): return (x1**2 + x2 - 11) ** 2 + (x1 + x2**2 - 7) ** 2 +def constrained_himmelblau(x1, x2): + if x1**2 + x2**2 > 50: + return np.nan + return himmelblau(x1, x2) + + +def skewed_himmelblau(x1, x2): + _x1 = 2 * x1 + x2 + _x2 = 0.5 * (x1 - 2 * x2) + + return constrained_himmelblau(_x1, _x2) + + +def bukin(x1, x2): + return 100 * np.sqrt(np.abs(x2 - 1e-2 * x1**2)) + 0.01 * np.abs(x1) + + def rastrigin(*x): X = np.c_[x] return 10 * X.shape[-1] + (X**2 - 10 * np.cos(2 * np.pi * X)).sum(axis=1) +def styblinski_tang(*x): + X = np.c_[x] + return 0.5 * (X**4 - 16 * X**2 + 5 * X).sum(axis=1) + + +def ackley(*x): + X = np.c_[x] + return ( + -20 * np.exp(-0.2 * np.sqrt(0.5 * (X**2).sum(axis=1))) + - np.exp(0.5 * np.cos(2 * np.pi * X).sum(axis=1)) + + np.e + + 20 + ) + + def gaussian_beam_waist(x1, x2): return np.sqrt(1 + 0.25 * (x1 - x2) ** 2 + 16 * (x1 + x2 - 2) ** 2) diff --git a/bloptools/tests/conftest.py b/bloptools/tests/conftest.py index 7324618..9e23e8c 100644 --- a/bloptools/tests/conftest.py +++ b/bloptools/tests/conftest.py @@ -12,6 +12,11 @@ from sirepo_bluesky.sirepo_bluesky import SirepoBluesky from sirepo_bluesky.srw_handler import SRWFileHandler +from bloptools.bayesian import Agent +from bloptools.tasks import Task + +from .. import devices, test_functions + @pytest.fixture(scope="function") def db(): @@ -49,6 +54,40 @@ def RE(db): return RE +@pytest.fixture(scope="function") +def agent(db): + """ + A simple agent maximizing two Styblinski-Tang functions + """ + + dofs = devices.dummy_dofs(n=2) + bounds = [(-5, 5), (-5, 5)] + + def digestion(db, uid): + products = db[uid].table() + + for index, entry in products.iterrows(): + products.loc[index, "loss1"] = test_functions.styblinski_tang(entry.x1, entry.x2) + products.loc[index, "loss2"] = test_functions.styblinski_tang(entry.x1, -entry.x2) + + return products + + tasks = [Task(key="loss1", kind="min"), Task(key="loss2", kind="min")] + + agent = Agent( + active_dofs=dofs, + passive_dofs=[], + active_dof_bounds=bounds, + tasks=tasks, + digestion=digestion, + db=db, + verbose=True, + tolerate_acquisition_errors=False, + ) + + return agent + + @pytest.fixture(scope="function") def make_dirs(): root_dir = "/tmp/sirepo-bluesky-data" diff --git a/bloptools/tests/test_agent.py b/bloptools/tests/test_agent.py new file mode 100644 index 0000000..2d660b3 --- /dev/null +++ b/bloptools/tests/test_agent.py @@ -0,0 +1,14 @@ +import os + +import pytest + + +@pytest.mark.agent +def test_writing_hypers(RE, agent): + RE(agent.initialize("qr", n_init=32)) + + agent.save_hypers("hypers.h5") + + RE(agent.initialize("qr", n_init=8, hypers="hypers.h5")) + + os.remove("hypers.h5") diff --git a/bloptools/tests/test_bayesian_shadow.py b/bloptools/tests/test_bayesian_shadow.py index aa6b908..d281582 100644 --- a/bloptools/tests/test_bayesian_shadow.py +++ b/bloptools/tests/test_bayesian_shadow.py @@ -33,9 +33,9 @@ def test_bayesian_agent_tes_shadow(RE, db, shadow_tes_simulation): db=db, ) - RE(agent.initialize(acqf="qr", n_init=4)) + RE(agent.initialize("qr", n_init=4)) - RE(agent.learn(acqf="ei", n_iter=2)) - RE(agent.learn(acqf="pi", n_iter=2)) + RE(agent.learn("ei", n_iter=2)) + RE(agent.learn("pi", n_iter=2)) agent.plot_tasks() diff --git a/bloptools/tests/test_bayesian_test_funcs.py b/bloptools/tests/test_bayesian_test_funcs.py index 058b709..04eeb55 100644 --- a/bloptools/tests/test_bayesian_test_funcs.py +++ b/bloptools/tests/test_bayesian_test_funcs.py @@ -20,9 +20,9 @@ def test_bayesian_agent_himmelblau(RE, db): db=db, ) - RE(agent.initialize(acqf="qr", n_init=16)) + RE(agent.initialize("qr", n_init=16)) - RE(agent.learn(acqf="ei", n_iter=2)) + RE(agent.learn("ei", n_iter=2)) agent.plot_tasks() @@ -43,8 +43,8 @@ def test_bayesian_agent_mock_kbs(RE, db): db=db, ) - RE(agent.initialize(acqf="qr", n_init=16)) + RE(agent.initialize("qr", n_init=16)) - RE(agent.learn(acqf="ei", n_iter=4)) + RE(agent.learn("ei", n_iter=4)) agent.plot_tasks() diff --git a/bloptools/utils.py b/bloptools/utils.py index 82953af..492ef5c 100644 --- a/bloptools/utils.py +++ b/bloptools/utils.py @@ -36,24 +36,17 @@ def mprod(*M): return res -def get_routing(origin, points): +def route(start_point, points): """ - Finds an efficient routing between $n$ points, after normalizing each dimension. - Returns $n-1$ indices, ignoring the zeroeth index (the origin). + Returns the indices of the most efficient way to visit `points`, starting from `start_point`. """ - if not len(points) > 1: - return np.array([0]), 1.0 + total_points = np.r_[np.atleast_2d(start_point), points] + normalized_points = (total_points - total_points.min(axis=0)) / total_points.ptp(axis=0) + delay_matrix = np.sqrt(np.square(normalized_points[:, None, :] - normalized_points[None, :, :]).sum(axis=-1)) + delay_matrix = (1e4 * delay_matrix).astype(int) # it likes integers idk - _points = np.r_[np.atleast_2d(origin), points] - - rel_points = _points / np.array([std if std > 0 else 1.0 for std in points.std(axis=0)]) - - # delay_matrix = gpo.delay_estimate(rel_points[:,None,:] - rel_points[None,:,:]) - delay_matrix = np.sqrt(np.square(rel_points[:, None, :] - rel_points[None, :, :]).sum(axis=-1)) - delay_matrix = (1e3 * delay_matrix).astype(int) # it likes integers idk - - manager = pywrapcp.RoutingIndexManager(len(_points), 1, 0) # number of depots, number of salesmen, starting index + manager = pywrapcp.RoutingIndexManager(len(total_points), 1, 0) # number of depots, number of salesmen, starting index routing = pywrapcp.RoutingModel(manager) def delay_callback(from_index, to_index): @@ -69,17 +62,17 @@ def delay_callback(from_index, to_index): search_parameters.first_solution_strategy = routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC solution = routing.SolveWithParameters(search_parameters) - dir(solution) index = routing.Start(0) - route_indices, route_delays = [], [] + route_indices, route_delays = [0], [] while not routing.IsEnd(index): previous_index = index index = solution.Value(routing.NextVar(index)) route_delays.append(routing.GetArcCostForVehicle(previous_index, index, 0)) route_indices.append(index) - return np.array(route_indices)[:-1] - 1, route_delays[:-1] + # omit the first and last indices, which correspond to the start + return np.array(route_indices)[1:-1] - 1 def get_movement_time(x, v_max, a): diff --git a/docs/source/tutorials/constrained-himmelblau.ipynb b/docs/source/tutorials/constrained-himmelblau.ipynb index 8a8f20e..0a36267 100644 --- a/docs/source/tutorials/constrained-himmelblau.ipynb +++ b/docs/source/tutorials/constrained-himmelblau.ipynb @@ -65,10 +65,7 @@ " products = db[uid].table()\n", "\n", " for index, entry in products.iterrows():\n", - " if entry.x1**2 + entry.x2**2 < 50:\n", - " products.loc[index, \"himmelblau\"] = test_functions.himmelblau(entry.x1, entry.x2)\n", - " else:\n", - " products.loc[index, \"himmelblau\"] = np.nan\n", + " products.loc[index, \"himmelblau\"] = test_functions.constrained_himmelblau(entry.x1, entry.x2)\n", "\n", " return products" ] @@ -97,7 +94,7 @@ "from bloptools.tasks import Task\n", "\n", "dofs = bloptools.devices.dummy_dofs(n=2)\n", - "bounds = [(-8, 8), (-8, 8)]\n", + "bounds = [(-10, 10), (-10, 10)]\n", "\n", "task = Task(key=\"himmelblau\", kind=\"min\")\n", "\n", @@ -110,7 +107,7 @@ " db=db,\n", ")\n", "\n", - "RE(agent.initialize(acqf=\"qr\", n_init=32))\n", + "RE(agent.initialize(\"qr\", n_init=32))\n", "\n", "agent.plot_tasks()" ] @@ -164,7 +161,7 @@ "metadata": {}, "outputs": [], "source": [ - "RE(agent.learn(\"ei\", n_iter=4))" + "RE(agent.learn(\"ei\", n_per_iter=4))" ] }, { diff --git a/docs/source/tutorials/hyperparameters.ipynb b/docs/source/tutorials/hyperparameters.ipynb index 0fe5e2a..124295d 100644 --- a/docs/source/tutorials/hyperparameters.ipynb +++ b/docs/source/tutorials/hyperparameters.ipynb @@ -8,7 +8,7 @@ "source": [ "# Hyperparameters\n", "\n", - "Consider the test function of two inputs $f(x_1, x_2) = e^{- (x_1/10)^2 - x_2^2}$. We can see that this function varies more quickly in the $x_1$ direction than the $x_2$ direction, especially if we look at the plot:" + "Consider the Booth test function (below). This function varies differently in different directions, and these directions are somewhat skewed with respect to the inputs. Our agent will automatically fit the right hyperparameters to account for this." ] }, { @@ -21,13 +21,14 @@ "import numpy as np\n", "import matplotlib as mpl\n", "from matplotlib import pyplot as plt\n", + "from bloptools import test_functions\n", "\n", "x1 = x2 = np.linspace(-10, 10, 256)\n", "X1, X2 = np.meshgrid(x1, x2)\n", "\n", - "F = -np.exp(-((X1 / 8) ** 2)) * np.exp(-(X2**2))\n", + "F = test_functions.booth(X1, X2)\n", "\n", - "plt.pcolormesh(x1, x2, F, shading=\"auto\")\n", + "plt.pcolormesh(x1, x2, F, norm=mpl.colors.LogNorm(), shading=\"auto\")\n", "plt.colorbar()\n", "plt.xlabel(\"x1\")\n", "plt.ylabel(\"x2\")" @@ -59,14 +60,14 @@ "dofs = bloptools.devices.dummy_dofs(n=2)\n", "bounds = [(-8, 8), (-8, 8)]\n", "\n", - "task = Task(key=\"loss\", kind=\"min\")\n", + "task = Task(key=\"booth\", kind=\"min\")\n", "\n", "\n", "def digestion(db, uid):\n", " products = db[uid].table()\n", "\n", " for index, entry in products.iterrows():\n", - " products.loc[index, \"loss\"] = -np.exp(-1e-2 * entry.x1**2) * np.exp(-1e-1 * entry.x2**2)\n", + " products.loc[index, \"booth\"] = test_functions.booth(entry.x1, entry.x2)\n", "\n", " return products\n", "\n", @@ -92,7 +93,7 @@ "metadata": {}, "outputs": [], "source": [ - "agent.tasks[0].regressor.covar_module.latent_dimensions" + "agent.tasks[0].regressor.covar_module.latent_transform" ] }, { @@ -123,7 +124,7 @@ "metadata": {}, "outputs": [], "source": [ - "RE(agent.learn(\"ei\", n_iter=4))\n", + "RE(agent.learn(\"ei\", n_iter=8))\n", "agent.plot_tasks()" ] } diff --git a/docs/source/tutorials/introduction.ipynb b/docs/source/tutorials/introduction.ipynb index 4aabba2..c960697 100644 --- a/docs/source/tutorials/introduction.ipynb +++ b/docs/source/tutorials/introduction.ipynb @@ -33,9 +33,9 @@ "\n", "from bloptools import test_functions\n", "\n", - "x = np.linspace(-4, 4, 256)\n", + "x = np.linspace(-5, 5, 256)\n", "\n", - "plt.plot(x, test_functions.rastrigin(x), c=\"b\")" + "plt.plot(x, test_functions.ackley(x), c=\"b\")" ] }, { @@ -81,7 +81,7 @@ " products = db[uid].table()\n", "\n", " for index, entry in products.iterrows():\n", - " products.loc[index, \"rastrigin\"] = test_functions.rastrigin(entry.x1)\n", + " products.loc[index, \"ackley\"] = test_functions.ackley(entry.x1)\n", "\n", " return products" ] @@ -104,7 +104,7 @@ "source": [ "from bloptools.tasks import Task\n", "\n", - "task = Task(key=\"rastrigin\", kind=\"min\")" + "task = Task(key=\"ackley\", kind=\"min\")" ] }, { @@ -136,7 +136,7 @@ " db=db,\n", ")\n", "\n", - "RE(agent.initialize(acqf=\"qr\", n_init=12))" + "RE(agent.initialize(\"qr\", n_init=4))" ] }, { @@ -205,9 +205,17 @@ "metadata": {}, "outputs": [], "source": [ - "RE(agent.learn(acqf=\"ei\", n_iter=8))\n", + "RE(agent.learn(\"ei\", n_per_iter=4))\n", "agent.plot_tasks()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87908041", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/docs/source/tutorials/latent-toroid-dimensions.ipynb b/docs/source/tutorials/latent-toroid-dimensions.ipynb index d6ffe16..0bd3aa4 100644 --- a/docs/source/tutorials/latent-toroid-dimensions.ipynb +++ b/docs/source/tutorials/latent-toroid-dimensions.ipynb @@ -51,7 +51,7 @@ " db=db,\n", ")\n", "\n", - "RE(agent.initialize(acqf=\"qr\", n_init=24))" + "RE(agent.initialize(\"qr\", n_init=24))" ] }, { @@ -70,7 +70,7 @@ "metadata": {}, "outputs": [], "source": [ - "agent.tasks[0].regressor.covar_module.latent_dimensions" + "agent.tasks[0].regressor.covar_module.latent_transform" ] }, { @@ -86,14 +86,6 @@ "agent.plot_feasibility()\n", "agent.plot_acquisition(strategy=[\"ei\", \"pi\", \"ucb\"])" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9093dd73", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/source/tutorials/multi-task-sirepo.ipynb b/docs/source/tutorials/multi-task-sirepo.ipynb index d3fefd0..1d318be 100644 --- a/docs/source/tutorials/multi-task-sirepo.ipynb +++ b/docs/source/tutorials/multi-task-sirepo.ipynb @@ -55,7 +55,7 @@ " db=db,\n", ")\n", "\n", - "RE(agent.initialize(acqf=\"qr\", n_init=16))" + "RE(agent.initialize(\"qr\", n_init=16))" ] }, { @@ -98,7 +98,7 @@ }, "outputs": [], "source": [ - "RE(agent.learn(acqf=\"ei\", n_iter=2))\n", + "RE(agent.learn(\"ei\", n_iter=2))\n", "agent.plot_tasks()" ] }, @@ -128,7 +128,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16 (main, Mar 8 2023, 14:00:05) \n[GCC 11.2.0]" + "version": "3.9.16" }, "vscode": { "interpreter": { diff --git a/examples/prepare_bluesky.py b/examples/prepare_bluesky.py index f913ad7..da47150 100644 --- a/examples/prepare_bluesky.py +++ b/examples/prepare_bluesky.py @@ -11,11 +11,6 @@ from bluesky.callbacks import best_effort from bluesky.run_engine import RunEngine from databroker import Broker -from ophyd.utils import make_dir_tree -from sirepo_bluesky.shadow_handler import ShadowFileHandler -from sirepo_bluesky.sirepo_bluesky import SirepoBluesky -from sirepo_bluesky.sirepo_ophyd import create_classes -from sirepo_bluesky.srw_handler import SRWFileHandler RE = RunEngine({}) diff --git a/examples/prepare_tes_shadow.py b/examples/prepare_tes_shadow.py index ff5e8b8..e590741 100644 --- a/examples/prepare_tes_shadow.py +++ b/examples/prepare_tes_shadow.py @@ -1,3 +1,10 @@ +import sirepo_bluesky +from ophyd.utils import make_dir_tree +from sirepo_bluesky.shadow_handler import ShadowFileHandler +from sirepo_bluesky.sirepo_bluesky import SirepoBluesky +from sirepo_bluesky.sirepo_ophyd import create_classes +from sirepo_bluesky.srw_handler import SRWFileHandler + db.reg.register_handler("shadow", ShadowFileHandler, overwrite=True) db.reg.register_handler("SIREPO_FLYER", SRWFileHandler, overwrite=True) @@ -12,14 +19,18 @@ classes, objects = create_classes(connection.data, connection=connection) globals().update(**objects) -data["models"]["simulation"]["npoint"] = 50000 -data["models"]["watchpointReport12"]["histogramBins"] = 16 +data["models"]["simulation"]["npoint"] = 100000 +data["models"]["watchpointReport12"]["histogramBins"] = 32 # w9.duration.kind = "hinted" bec.disable_baseline() bec.disable_heading() # bec.disable_table() +import warnings + +warnings.filterwarnings("ignore", module="sirepo_bluesky") + # This should be done by installing the package with `pip install -e .` or something similar. # import sys # sys.path.insert(0, "../")