diff --git a/.flake8 b/.flake8 index a82760d..83b1308 100644 --- a/.flake8 +++ b/.flake8 @@ -9,6 +9,6 @@ exclude = examples/*.py, bloptools/tests/test_bayesian_shadow.py, versioneer.py, -max-line-length = 115 +max-line-length = 125 # Ignore some style 'errors' produced while formatting by 'black' ignore = E203, W503 diff --git a/.isort.cfg b/.isort.cfg index e0926f4..43f177b 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,4 +1,4 @@ [settings] -line_length = 115 +line_length = 125 multi_line_output = 3 include_trailing_comma = True diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4336326..6de8684 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,6 +11,9 @@ repos: rev: 23.1.0 hooks: - id: black + language_version: python3.10 + - id: black-jupyter + language_version: python3.10 - repo: https://github.com/pycqa/flake8 rev: 6.0.0 hooks: diff --git a/bloptools/__init__.py b/bloptools/__init__.py index 16275c3..8ba74e4 100644 --- a/bloptools/__init__.py +++ b/bloptools/__init__.py @@ -1,4 +1,4 @@ -from . import bayesian, experiments, tasks # noqa F401 +from . import bayesian, devices, tasks # noqa F401 from ._version import get_versions __version__ = get_versions()["version"] diff --git a/bloptools/bayesian/__init__.py b/bloptools/bayesian/__init__.py index 1eeb815..13c3297 100644 --- a/bloptools/bayesian/__init__.py +++ b/bloptools/bayesian/__init__.py @@ -1,4 +1,3 @@ -import copy import logging import bluesky.plan_stubs as bps @@ -18,7 +17,8 @@ mpl.rc("image", cmap="coolwarm") -COLOR_LIST = ["dodgerblue", "tomato", "mediumseagreen"] +DEFAULT_COLOR_LIST = ["dodgerblue", "tomato", "mediumseagreen", "goldenrod"] +DEFAULT_COLORMAP = "turbo" def default_acquisition_plan(dofs, inputs, dets): @@ -26,19 +26,28 @@ def default_acquisition_plan(dofs, inputs, dets): return uid +def default_digestion_plan(db, uid): + return db[uid].table() + + +# let's be specific about our terminology. +# +# dofs: degrees of freedom are things that can change +# inputs: these are the values of the dofs, which may be transformed/normalized +# targets: these are what our model tries to predict from the inputs +# tasks: these are quantities that our agent will try to optimize over + +MAX_TEST_INPUTS = 2**11 + + class Agent: def __init__( self, - dofs, - bounds, + active_dofs, + active_dof_bounds, tasks, - digestion, db, - acquisition=None, - detectors=None, - initialization=None, - training_iter=256, - verbose=True, + **kwargs, ): """ A Bayesian optimization agent. @@ -58,119 +67,95 @@ def __init__( db : A databroker instance. """ - self.dofs = np.atleast_1d(dofs) - self.bounds = np.atleast_2d(bounds) + for dof in active_dofs: + dof.kind = "hinted" + + self.active_dofs = np.atleast_1d(active_dofs) + self.active_dof_bounds = np.atleast_2d(active_dof_bounds).astype(float) self.tasks = np.atleast_1d(tasks) - self.initialization = initialization - self.acquisition = acquisition if acquisition is not None else default_acquisition_plan - self.digestion = digestion self.db = db - for dof in self.dofs: - dof.kind = "hinted" + self.verbose = kwargs.get("verbose", False) + self.ignore_acquisition_errors = kwargs.get("ignore_acquisition_errors", False) - for i, task in enumerate(self.tasks): - task.index = i + self.initialization = kwargs.get("initialization", None) + self.acquisition_plan = kwargs.get("acquisition_plan", default_acquisition_plan) + self.digestion = kwargs.get("digestion", default_digestion_plan) - self.dets = detectors if detectors is not None else np.array([]) + self.tolerate_acquisition_errors = kwargs.get("tolerate_acquisition_errors", True) - self.n_dof = len(self.dofs) - self.target_names = [f"{task.name}_fitness" for task in self.tasks] - self.n_tasks = len(self.tasks) + self.acquisition = acquisition.Acquisition() - self.training_iter = training_iter - self.verbose = verbose + self.dets = np.atleast_1d(kwargs.get("detectors", [])) + self.passive_dofs = np.atleast_1d(kwargs.get("passive_dofs", [])) - MAX_TEST_POINTS = 2**11 + for i, task in enumerate(self.tasks): + task.index = i - self.test_X = self.sampler(n=MAX_TEST_POINTS) + self.n_active_dof = self.active_dofs.size + self.n_passive_dof = self.passive_dofs.size - self.test_inputs = self.unnormalize_inputs(self.test_X) + self.dofs = np.r_[self.active_dofs, self.passive_dofs] - n_per_dim = int(np.power(MAX_TEST_POINTS, 1 / self.n_dof)) - self.X_samples = np.linspace(0, 1, n_per_dim) - self.test_X_grid = np.swapaxes(np.r_[np.meshgrid(*self.n_dof * [self.X_samples], indexing="ij")], 0, -1) - self.test_inputs_grid = self.unnormalize_inputs(self.test_X_grid) + self.n_dof = self.n_active_dof + self.n_passive_dof - self.inputs = np.empty((0, self.n_dof)) - self.targets = np.empty((0, self.n_tasks)) - self.table = pd.DataFrame() + self.n_tasks = len(self.tasks) - self._initialized = False + self.training_iter = kwargs.get("training_iter", 256) - def normalize_inputs(self, inputs): - return (inputs - self.bounds.min(axis=1)) / self.bounds.ptp(axis=1) + # self.test_normalized_active_inputs = self.sampler(n=self.MAX_TEST_INPUTS) - def unnormalize_inputs(self, X): - return X * self.bounds.ptp(axis=1) + self.bounds.min(axis=1) - - def normalize_targets(self, targets): - return (targets - self.targets_mean) / (1e-20 + self.targets_scale) + # make some test points for sampling - def unnormalize_targets(self, targets): - return targets * self.targets_scale + self.targets_mean + self.normalized_test_active_inputs = utils.normalized_sobol_sampler(n=MAX_TEST_INPUTS, d=self.n_active_dof) - @property - def normalized_bounds(self): - return (self.bounds - self.bounds.min(axis=1)[:, None]) / self.bounds.ptp(axis=1)[:, None] + n_per_active_dim = int(np.power(MAX_TEST_INPUTS, 1 / self.n_active_dof)) - @property - def targets_mean(self): - return np.nanmean(self.targets, axis=0) + self.normalized_test_active_inputs_grid = np.swapaxes( + np.r_[np.meshgrid(*self.n_active_dof * [np.linspace(0, 1, n_per_active_dim)])], 0, -1 + ) - @property - def targets_scale(self): - return np.nanstd(self.targets, axis=0) + self.table = pd.DataFrame() - @property - def normalized_targets(self): - return self.normalize_targets(self.targets) + self._initialized = False - @property - def current_inputs(self): - return np.array([dof.read()[dof.name]["value"] for dof in self.dofs]) + def normalize_active_inputs(self, x): + return (x - self.active_dof_bounds.min(axis=1)) / self.active_dof_bounds.ptp(axis=1) - @property - def dof_names(self): - return [dof.name for dof in self.dofs] + def unnormalize_active_inputs(self, x): + return x * self.active_dof_bounds.ptp(axis=1) + self.active_dof_bounds.min(axis=1) - @property - def det_names(self): - return [det.name for det in self.dets] + def active_inputs_sampler(self, n=MAX_TEST_INPUTS): + """ + Returns $n$ quasi-randomly sampled inputs in the bounded parameter space + """ + return self.unnormalize_active_inputs(utils.normalized_sobol_sampler(n, self.n_active_dof)) @property - def best_sum_of_tasks(self): - return np.nanmax(self.targets.sum(axis=1)) + def test_active_inputs(self): + """ + A static, quasi-randomly sampled set of test active inputs. + """ + return self.unnormalize_active_inputs(self.normalized_test_active_inputs) @property - def best_sum_of_tasks_inputs(self): - return self.inputs[np.nanargmax(self.targets.sum(axis=1))] + def test_active_inputs_grid(self): + """ + A static, gridded set of test active inputs. + """ + return self.unnormalize_active_inputs(self.normalized_test_active_inputs_grid) - @property - def go_to(self, inputs): - yield from bps.mv(*[_ for items in zip(self.dofs, np.atleast_1d(inputs).T) for _ in items]) + # @property + # def input_transform(self): + # coefficient = torch.tensor(self.inputs.values.ptp(axis=0)) + # offset = torch.tensor(self.inputs.values.min(axis=0)) + # return botorch.models.transforms.input.AffineInputTransform( + # d=self.n_dof, coefficient=coefficient, offset=offset + # ) @property - def go_to_best_sum_of_tasks(self): - yield from self.go_to(self.best_sum_of_tasks_inputs) - - def inspect_beam(self, index, border=None): - im = self.images[index] - - x_min, x_max, y_min, y_max, width_x, width_y = self.table.loc[ - index, ["x_min", "x_max", "y_min", "y_max", "width_x", "width_y"] - ] - - bbx = np.array([x_min, x_max])[[0, 0, 1, 1, 0]] - bby = np.array([y_min, y_max])[[0, 1, 1, 0, 0]] - - plt.figure() - plt.imshow(im, cmap="gray_r") - plt.plot(bbx, bby, lw=4e0, c="r") - - if border is not None: - plt.xlim(x_min - border * width_x, x_min + border * width_x) - plt.ylim(y_min - border * width_y, y_min + border * width_y) + def input_transform(self): + return botorch.models.transforms.input.Normalize(d=self.n_dof) def save(self, filepath="./agent_data.h5"): """ @@ -181,23 +166,21 @@ def save(self, filepath="./agent_data.h5"): f.create_dataset("inputs", data=self.inputs) self.table.to_hdf(filepath, key="table") - def forget(self, n): - if n >= len(self.inputs): - raise ValueError(f"Cannot forget last {n} points (the agent only has {len(self.inputs)} points).") - self.tell(new_inputs=self.inputs[:-n], new_targets=self.targets[:-n], append=False) + def forget(self, index): + self.tell(new_table=self.table.drop(index=index), append=False) def sampler(self, n): """ - Returns $n$ quasi-randomly sampled points on the [0,1] ^ n_dof hypercube using Sobol sampling. + Returns $n$ quasi-randomly sampled points on the [0,1] ^ n_active_dof hypercube using Sobol sampling. """ - power_of_two = 2 ** int(np.ceil(np.log(n) / np.log(2))) - subset = np.random.choice(power_of_two, size=n, replace=False) - return sp.stats.qmc.Sobol(d=self.n_dof, scramble=True).random(n=power_of_two)[subset] + min_power_of_two = 2 ** int(np.ceil(np.log(n) / np.log(2))) + subset = np.random.choice(min_power_of_two, size=n, replace=False) + return sp.stats.qmc.Sobol(d=self.n_active_dof, scramble=True).random(n=min_power_of_two)[subset] def initialize( self, filepath=None, - init_scheme=None, + acqf=None, n_init=4, ): """ @@ -206,305 +189,440 @@ def initialize( It should be passed to a Bluesky RunEngine. """ - if filepath is not None: - with h5py.File(filepath, "r") as f: - init_inputs = f["inputs"][:] - self.table = pd.read_hdf(filepath, key="table") - init_targets = self.table.loc[:, self.target_names].values - # experiment-specific stuff if self.initialization is not None: yield from self.initialization() + if filepath is not None: + self.tell(new_table=pd.read_hdf(filepath, key="table")) + # now let's get bayesian - if init_scheme == "quasi-random": - init_inputs = self.ask(strategy="quasi-random", n=n_init, route=True) - init_table = yield from self.acquire(self.dofs, init_inputs, self.dets) - self.table = pd.concat([self.table, init_table]) - self.table.index = np.arange(len(self.table)) - init_targets = init_table.loc[:, self.target_names].values + elif acqf in ["qr"]: + yield from self.learn(acqf=acqf, n_iter=1, n_per_iter=n_init, route=True) else: raise Exception( - "Could not initialize model! Either pass initial X and data, or specify one of:" - "['quasi-random']." + """Could not initialize model! Either load a table, or specify an acqf from: +['qr'].""" ) - if (init_inputs is None) or (init_targets is None): - raise RuntimeError("Unhandled initialization error.") + # if init_table is None: + # raise RuntimeError("Unhandled initialization error.") - no_good_samples_tasks = np.isnan(init_targets).all(axis=0) + no_good_samples_tasks = np.isnan(self.table.loc[:, self.target_names]).all(axis=0) if no_good_samples_tasks.any(): raise ValueError( f"The tasks {[self.tasks[i].name for i in np.where(no_good_samples_tasks)[0]]} " f"don't have any good samples." ) - self.tell(new_inputs=init_inputs, new_targets=init_targets, reuse_hypers=True, verbose=self.verbose) - self._initialized = True - def tell(self, new_inputs, new_targets, append=True, **kwargs): + def tell(self, new_table=None, append=True, **kwargs): """ Inform the agent about new inputs and targets for the model. """ - if append: - self.inputs = np.r_[self.inputs, np.atleast_2d(new_inputs)] - self.targets = np.r_[self.targets, np.atleast_2d(new_targets)] - else: - self.inputs = new_inputs - self.targets = new_targets - self.X = self.normalize_inputs(self.inputs) + new_table = pd.DataFrame() if new_table is None else new_table + + self.table = pd.concat([self.table, new_table]) if append else new_table + + self.table.loc[:, "total_fitness"] = self.table.loc[:, self.task_names].fillna(-np.inf).sum(axis=1) + self.table.index = np.arange(len(self.table)) - # if hasattr(self.experiment, "IMAGE_NAME"): - # self.images = np.array([im for im in self.table[self.experiment.IMAGE_NAME].values]) + # self.normalized_inputs = self.normalize_inputs(self.inputs) self.all_targets_valid = ~np.isnan(self.targets).any(axis=1) for task in self.tasks: - task.targets = self.targets[:, task.index] - task.normalized_targets = self.normalized_targets[:, task.index] + task.targets = self.targets.loc[:, task.name] + # + # task.targets_mean = np.nanmean(task.targets, axis=0) + # task.targets_scale = np.nanstd(task.targets, axis=0) - task.targets_mean = np.nanmean(task.targets) - task.targets_scale = np.nanstd(task.targets) + # task.normalized_targets = self.normalized_targets.loc[:, task.name] - task.classes = self.all_targets_valid.astype(int) + task.feasibility = self.all_targets_valid.astype(int) - regressor_likelihood = gpytorch.likelihoods.GaussianLikelihood( + train_inputs = torch.tensor(self.inputs.loc[task.feasibility == 1].values).double().unsqueeze(0) + train_targets = torch.tensor(task.targets.loc[task.feasibility == 1].values).double().unsqueeze(0).unsqueeze(-1) + + if train_inputs.ndim == 1: + train_inputs = train_inputs.unsqueeze(-1) + if train_targets.ndim == 1: + train_targets = train_targets.unsqueeze(-1) + + likelihood = gpytorch.likelihoods.GaussianLikelihood( noise_constraint=gpytorch.constraints.Interval( torch.tensor(task.MIN_NOISE_LEVEL).square(), torch.tensor(task.MAX_NOISE_LEVEL).square(), ), ).double() - task.regressor = models.BoTorchSingleTaskGP( - train_inputs=torch.tensor(self.X[task.classes == 1]).double(), - train_targets=torch.tensor(self.normalized_targets[:, task.index][task.classes == 1]).double(), - likelihood=regressor_likelihood, + task.regressor = models.LatentGP( + train_inputs=train_inputs, + train_targets=train_targets, + likelihood=likelihood, + input_transform=self.input_transform, + outcome_transform=botorch.models.transforms.outcome.Standardize(m=1, batch_shape=torch.Size((1,))), ).double() - task.regressor_mll = gpytorch.mlls.ExactMarginalLogLikelihood( - task.regressor.likelihood, task.regressor - ) - botorch.fit.fit_gpytorch_mll(task.regressor_mll, **kwargs) + task.regressor_mll = gpytorch.mlls.ExactMarginalLogLikelihood(task.regressor.likelihood, task.regressor) - self.scalarization = botorch.acquisition.objective.ScalarizedPosteriorTransform( - weights=torch.tensor(self.targets_scale).double(), - offset=self.targets_mean.sum(), + log_feas_prob_weight = np.sqrt(np.sum(np.nanvar(self.targets.values, axis=0) * np.square(self.task_weights))) + + self.task_scalarization = botorch.acquisition.objective.ScalarizedPosteriorTransform( + weights=torch.tensor([*[task.weight for task in self.tasks], log_feas_prob_weight]).double(), + offset=0, ) - dirichlet_classifier_likelihood = gpytorch.likelihoods.DirichletClassificationLikelihood( + dirichlet_likelihood = gpytorch.likelihoods.DirichletClassificationLikelihood( torch.as_tensor(self.all_targets_valid).long(), learn_additional_noise=True ).double() - self.dirichlet_classifier = models.BoTorchDirichletClassifier( - train_inputs=torch.tensor(self.X).double(), - train_targets=dirichlet_classifier_likelihood.transformed_targets, - likelihood=dirichlet_classifier_likelihood, + self.dirichlet_classifier = models.LatentDirichletClassifier( + train_inputs=torch.tensor(self.inputs.values).double(), + train_targets=dirichlet_likelihood.transformed_targets.transpose(-1, -2).double(), + likelihood=dirichlet_likelihood, + input_transform=self.input_transform, ).double() self.dirichlet_classifier_mll = gpytorch.mlls.ExactMarginalLogLikelihood( self.dirichlet_classifier.likelihood, self.dirichlet_classifier ) - botorch.fit.fit_gpytorch_mll(self.dirichlet_classifier_mll, **kwargs) - self.classifier = botorch.models.deterministic.GenericDeterministicModel( - f=lambda X: self.dirichlet_classifier.log_prob(X) + self.feas_model = botorch.models.deterministic.GenericDeterministicModel( + f=lambda X: -self.dirichlet_classifier.log_prob(X).square() ) - self.multimodel = botorch.models.model.ModelList(*[task.regressor for task in self.tasks]) - - def sample_acqf(self, acqf, n_test=256, optimize=False): - """ - Return the point that optimizes a given acqusition function. - """ + self.fit_models() - def acq_loss(x, *args): - return -acqf(x, *args).detach().numpy() + self.targets_model = botorch.models.model_list_gp_regression.ModelListGP(*[task.regressor for task in self.tasks]) - acq_args = (self,) + self.task_model = botorch.models.model.ModelList(*[task.regressor for task in self.tasks], self.feas_model) - test_X = self.sampler(n=n_test) - init_X = test_X[acq_loss(test_X, *acq_args).argmin()] + def fit_models(self, **kwargs): + for task in self.tasks: + botorch.fit.fit_gpytorch_mll(task.regressor_mll, **kwargs) + botorch.fit.fit_gpytorch_mll(self.dirichlet_classifier_mll, **kwargs) - if optimize: - res = sp.optimize.minimize( - fun=acq_loss, - args=acq_args, - x0=init_X, - bounds=self.normalized_bounds, - method="SLSQP", - options={"maxiter": 256}, + def get_acquisition_function(self, acqf_name="ei", return_metadata=False, acqf_args={}, **kwargs): + if not self._initialized: + raise RuntimeError(f'Can\'t construct acquisition function "{acqf_name}" (the agent is not initialized!)') + + if acqf_name.lower() == "ei": + acqf = botorch.acquisition.analytic.LogExpectedImprovement( + self.task_model, + best_f=self.best_sum_of_tasks, + posterior_transform=self.task_scalarization, + **kwargs, ) - X = res.x - else: - X = init_X + acqf_meta = {"name": "expected improvement", "args": {}} + + elif acqf_name.lower() == "pi": + acqf = botorch.acquisition.analytic.LogProbabilityOfImprovement( + self.task_model, + best_f=self.best_sum_of_tasks, + posterior_transform=self.task_scalarization, + **kwargs, + ) + acqf_meta = {"name": "probability of improvement", "args": {}} + + elif acqf_name.lower() == "ucb": + beta = acqf_args.get("beta", 0.1) + acqf = botorch.acquisition.analytic.UpperConfidenceBound( + self.task_model, + beta=beta, + posterior_transform=self.task_scalarization, + **kwargs, + ) + acqf_meta = {"name": "upper confidence bound", "args": {"beta": beta}} - # print(res) + else: + raise ValueError(f'Unrecognized acquisition acqf_name "{acqf_name}".') - return X, acq_loss(X, *acq_args) + return (acqf, acqf_meta) if return_metadata else acqf def ask( self, tasks=None, classifier=None, - strategy=None, + acqf_name="ei", greedy=True, n=1, disappointment=0, - route=True, + route=False, cost_model=None, n_test=1024, optimize=True, + return_metadata=False, ): """ The next $n$ points to sample, recommended by the agent. """ - if route: - unrouted_points = self.ask( - tasks=tasks, - classifier=classifier, - strategy=strategy, - greedy=greedy, - n=n, - disappointment=disappointment, - route=False, - cost_model=cost_model, - n_test=n_test, + # if route: + # unrouted_points = self.ask( + # tasks=tasks, + # classifier=classifier, + # strategy=strategy, + # greedy=greedy, + # n=n, + # disappointment=disappointment, + # route=False, + # cost_model=cost_model, + # n_test=n_test, + # ) + + # routing_index, _ = utils.get_routing(self.read_active_dofs, unrouted_points) + # x = unrouted_points[routing_index] + + if acqf_name.lower() == "qr": + x = self.active_inputs_sampler(n=n) + acqf_meta = {"name": "quasi-random", "args": {}} + + else: + acqf, acqf_meta = self.get_acquisition_function(acqf_name=acqf_name, return_metadata=True) + + BATCH_SIZE = 1 + NUM_RESTARTS = 7 + RAW_SAMPLES = 512 + + candidates, _ = botorch.optim.optimize_acqf( + acq_function=acqf, + bounds=torch.tensor(self.dof_bounds).T, + q=BATCH_SIZE, + num_restarts=NUM_RESTARTS, + raw_samples=RAW_SAMPLES, # used for intialization heuristic + options={"batch_limit": 3, "maxiter": 1024}, ) - routing_index, _ = utils.get_routing(self.current_inputs, unrouted_points) - return unrouted_points[routing_index] + x = candidates.detach().numpy()[..., self.dof_is_active_mask] - if strategy.lower() == "quasi-random": - return self.unnormalize_inputs(self.sampler(n=n)) + return (x, acqf_meta) if return_metadata else x - if not self._initialized: - raise RuntimeError("The agent is not initialized!") + def acquire(self, active_inputs): + """ + Acquire and digest according to the agent's acquisition and digestion plans. - if tasks is None: - tasks = self.tasks + This should yield a table of sampled tasks with the same length as the sampled inputs. + """ + try: + uid = yield from self.acquisition_plan(self.dofs, active_inputs, [*self.dets, *self.dofs, *self.passive_dofs]) - if classifier is None: - classifier = self.classifier + products = self.digestion(self.db, uid) - if (not greedy) or (n == 1): - acqf = None + # compute the fitness for each task + for index, entry in products.iterrows(): + for task in self.tasks: + products.loc[index, task.name] = task.get_fitness(entry) - if strategy.lower() == "esti": # maximize the expected improvement - acqf = acquisition.log_expected_sum_of_tasks_improvement + except Exception as error: + if not self.tolerate_acquisition_errors: + raise error + logging.warning(f"Error in acquisition/digestion: {repr(error)}") + products = pd.DataFrame(active_inputs, columns=self.active_dof_names) + for task in self.tasks: + products.loc[:, task.name] = np.nan - if acqf is None: - raise ValueError(f'Unrecognized strategy "{strategy}".') + if not len(active_inputs) == len(products): + raise ValueError("The table returned by the digestion must be the same length as the sampled inputs!") - X, loss = self.sample_acqf(acqf, optimize=optimize) + return products - return self.unnormalize_inputs(np.atleast_2d(X)) + def learn(self, acqf, n_iter=1, n_per_iter=1, reuse_hypers=True, upsample=1, verbose=True, plots=[], **kwargs): + """ + This iterates the learning algorithm, looping over ask -> acquire -> tell. + It should be passed to a Bluesky RunEngine. + """ - if greedy and (n > 1): - dummy_tasks = [copy.deepcopy(task) for task in self.tasks] + for i in range(n_iter): + x, acqf_meta = self.ask(n=n_per_iter, acqf_name=acqf, return_metadata=True, **kwargs) - inputs_to_sample = np.zeros((0, self.n_dof)) + new_table = yield from self.acquire(x) - for i in range(n): - new_input = self.ask(strategy=strategy, tasks=dummy_tasks, classifier=classifier, greedy=True, n=1) + new_table.loc[:, "acqf"] = acqf_meta["name"] - inputs_to_sample = np.r_[inputs_to_sample, new_input] + self.tell(new_table=new_table, reuse_hypers=reuse_hypers) - if not (i + 1 == n): # no point if we're on the last iteration - fantasy_multimodel = botorch.models.model.ModelList(*[task.regressor for task in self.tasks]) - fantasy_posterior = fantasy_multimodel.posterior( - torch.tensor(self.normalize_inputs(new_input)).double() - ) + def normalize_inputs(self, inputs): + return (inputs - self.input_bounds.min(axis=1)) / self.input_bounds.ptp(axis=1) - fantasy_targets = fantasy_posterior.mean - disappointment * fantasy_posterior.variance.sqrt() - new_targets = self.unnormalize_targets(fantasy_targets.detach().numpy()) + def unnormalize_inputs(self, X): + return X * self.input_bounds.ptp(axis=1) + self.input_bounds.min(axis=1) - self.tell(new_inputs=new_input, new_targets=new_targets) + def normalize_targets(self, targets): + return (targets - self.targets_mean) / (1e-20 + self.targets_scale) - self.forget(n=n) # forget what you saw here + def unnormalize_targets(self, targets): + return targets * self.targets_scale + self.targets_mean - return inputs_to_sample + @property + def test_inputs(self): + test_passive_inputs = self.passive_inputs.values[-1][None] * np.ones(len(self.test_active_inputs))[..., None] + return np.concatenate([self.test_active_inputs, test_passive_inputs], axis=-1) - def acquire(self, dofs, inputs, dets): - """ - Acquire and digest according to the agent's acquisition and digestion plans. + @property + def test_inputs_grid(self): + test_passive_inputs_grid = self.passive_inputs.values[-1] * np.ones( + (*self.test_active_inputs_grid.shape[:-1], self.n_passive_dof) + ) + return np.concatenate([self.test_active_inputs_grid, test_passive_inputs_grid], axis=-1) - This should yield a table of sampled tasks with the same length as the sampled inputs. - """ - try: - uid = yield from self.acquisition(dofs, inputs, dets) - products = self.digestion(self.db, uid) - acq_table = pd.DataFrame(inputs, columns=[dof.name for dof in dofs]) - acq_table.insert(0, "timestamp", pd.Timestamp.now()) + @property + def inputs(self): + return self.table.loc[:, self.dof_names].astype(float) - for key, values in products.items(): - acq_table.loc[:, key] = values + @property + def active_inputs(self): + return self.inputs.loc[:, self.active_dof_names] - # compute the fitness for each task - for index, entry in acq_table.iterrows(): - for task in self.tasks: - acq_table.loc[index, f"{task.name}_fitness"] = task.get_fitness(entry) + @property + def passive_inputs(self): + return self.inputs.loc[:, self.passive_dof_names] - except Exception as err: - acq_table = pd.DataFrame() - logging.warning(repr(err)) + @property + def targets(self): + return self.table.loc[:, self.task_names].astype(float) - if not len(inputs) == len(acq_table): - raise ValueError("The resulting table must be the same length as the sampled inputs!") + @property + def feasible(self): + with pd.option_context("mode.use_inf_as_null", True): + feasible = ~self.targets.isna() + return feasible + + # @property + # def input_bounds(self): + # lower_bound = np.r_[ + # self.active_dof_bounds[:, 0], np.nanmin(self.passive_inputs.astype(float).values, axis=0) + # ] + # upper_bound = np.r_[ + # self.active_dof_bounds[:, 1], np.nanmax(self.passive_inputs.astype(float).values, axis=0) + # ] + # return np.c_[lower_bound, upper_bound] - return acq_table + @property + def targets_mean(self): + return np.nanmean(self.targets, axis=0) - def learn( - self, strategy, n_iter=1, n_per_iter=1, reuse_hypers=True, upsample=1, verbose=True, plots=[], **kwargs - ): - """ - This iterates the learning algorithm, looping over ask -> acquire -> tell. - It should be passed to a Bluesky RunEngine. - """ + @property + def targets_scale(self): + return np.nanstd(self.targets, axis=0) + + @property + def normalized_targets(self): + return self.normalize_targets(self.targets) - print(f'learning with strategy "{strategy}" ...') + @property + def latest_passive_dof_values(self): + passive_inputs = self.passive_inputs + return [passive_inputs.loc[passive_inputs.last_valid_index(), col] for col in passive_inputs.columns] - for i in range(n_iter): - inputs_to_sample = np.atleast_2d(self.ask(n=n_per_iter, strategy=strategy, **kwargs)) + @property + def passive_dof_bounds(self): + # food for thought: should this be the current values, or the latest recorded values? + # the former leads to weird extrapolation (especially for time), and the latter to some latency. + # let's go with the first way for now + return np.outer(self.latest_passive_dof_values, [1.0, 1.0]) - new_table = yield from self.acquire(self.dofs, inputs_to_sample, self.dets) + @property + def dof_is_active_mask(self): + return np.r_[np.ones(self.n_active_dof), np.zeros(self.n_passive_dof)].astype(bool) + + @property + def dof_bounds(self): + return np.r_[self.active_dof_bounds, self.passive_dof_bounds] + + @property + def read_active_dofs(self): + return np.array([dof.read()[dof.name]["value"] for dof in self.active_dofs]) + + @property + def read_passive_dofs(self): + return np.array([dof.read()[dof.name]["value"] for dof in self.passive_dofs]) - self.table = pd.concat([self.table, new_table]) - self.table.index = np.arange(len(self.table)) - new_targets = new_table.loc[:, self.target_names].values + @property + def read_dofs(self): + return np.r_[self.read_active_dofs, self.read_passive_dofs] - self.tell(new_inputs=inputs_to_sample, new_targets=new_targets, reuse_hypers=reuse_hypers) + @property + def active_dof_names(self): + return [dof.name for dof in self.active_dofs] + + @property + def passive_dof_names(self): + return [dof.name for dof in self.passive_dofs] + + @property + def dof_names(self): + return [dof.name for dof in self.dofs] + + @property + def det_names(self): + return [det.name for det in self.dets] + + @property + def target_names(self): + return [task.name for task in self.tasks] + + @property + def task_names(self): + return [task.name for task in self.tasks] + + @property + def task_weights(self): + return np.array([task.weight for task in self.tasks]) + + @property + def best_sum_of_tasks(self): + return self.targets.fillna(-np.inf).sum(axis=1).max() + + @property + def best_sum_of_tasks_inputs(self): + return self.inputs[np.nanargmax(self.targets.sum(axis=1))] + + @property + def go_to(self, inputs): + yield from bps.mv(*[_ for items in zip(self.dofs, np.atleast_1d(inputs).T) for _ in items]) + + @property + def go_to_best_sum_of_tasks(self): + yield from self.go_to(self.best_sum_of_tasks_inputs) def plot_tasks(self, **kwargs): - if self.n_dof == 1: + if self.n_active_dof == 1: self._plot_tasks_one_dof(**kwargs) else: self._plot_tasks_many_dofs(**kwargs) - def plot_constraints(self, **kwargs): - if self.n_dof == 1: - self._plot_constraints_one_dof(**kwargs) + def plot_feasibility(self, **kwargs): + if self.n_active_dof == 1: + self._plot_feas_one_dof(**kwargs) else: - self._plot_constraints_many_dofs(**kwargs) + self._plot_feas_many_dofs(**kwargs) - def _plot_constraints_one_dof(self, size=32): + def plot_acquisition(self, **kwargs): + if self.n_active_dof == 1: + self._plot_acq_one_dof(**kwargs) + + else: + self._plot_acq_many_dofs(**kwargs) + + def _plot_feas_one_dof(self, size=32): self.class_fig, self.class_ax = plt.subplots(1, 1, figsize=(4, 4), sharex=True, constrained_layout=True) - self.class_ax.scatter(self.inputs, self.all_targets_valid.astype(int), s=size) + self.class_ax.scatter(self.inputs.values, self.all_targets_valid.astype(int), s=size) - x = torch.tensor(self.test_X_grid.reshape(-1, self.n_dof)).double() - log_prob = self.dirichlet_classifier.log_prob(x).detach().numpy().reshape(self.test_X_grid.shape[:-1]) + x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dof)).double() + log_prob = self.dirichlet_classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) self.class_ax.plot(self.test_inputs_grid.ravel(), np.exp(log_prob)) - self.class_ax.set_xlim(*self.bounds[0]) + self.class_ax.set_xlim(*self.active_dof_bounds[0]) - def _plot_constraints_many_dofs(self, axes=[0, 1], shading="nearest", cmap="inferno", size=32, gridded=None): + def _plot_feas_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLORMAP, size=32, gridded=None): if gridded is None: gridded = self.n_dof == 2 @@ -517,16 +635,16 @@ def _plot_constraints_many_dofs(self, axes=[0, 1], shading="nearest", cmap="infe ax.set_ylabel(self.dofs[axes[1]].name) data_ax = self.class_axes[0].scatter( - *self.inputs.T[:2], s=size, c=self.all_targets_valid.astype(int), vmin=0, vmax=1, cmap=cmap + *self.inputs.values.T[:2], s=size, c=self.all_targets_valid.astype(int), vmin=0, vmax=1, cmap=cmap ) if gridded: - x = torch.tensor(self.test_X_grid.reshape(-1, self.n_dof)).double() - log_prob = self.dirichlet_classifier.log_prob(x).detach().numpy().reshape(self.test_X_grid.shape[:-1]) + x = torch.tensor(self.test_inputs_grid.reshape(-1, self.n_dof)).double() + log_prob = self.dirichlet_classifier.log_prob(x).detach().numpy().reshape(self.test_inputs_grid.shape[:-1]) self.class_axes[1].pcolormesh( - *(self.bounds[axes].ptp(axis=1) * self.X_samples[:, None] + self.bounds[axes].min(axis=1)).T, - np.exp(log_prob), + *np.swapaxes(self.test_inputs_grid, 0, -1), + np.exp(log_prob).T, shading=shading, cmap=cmap, vmin=0, @@ -534,16 +652,16 @@ def _plot_constraints_many_dofs(self, axes=[0, 1], shading="nearest", cmap="infe ) else: - x = torch.tensor(self.test_X).double() + x = torch.tensor(self.test_inputs).double() log_prob = self.dirichlet_classifier.log_prob(x).detach().numpy() - self.class_axes[1].scatter(*self.test_X.T[axes], s=size, c=np.exp(log_prob), vmin=0, vmax=1, cmap=cmap) + self.class_axes[1].scatter(*x.detach().numpy().T[axes], s=size, c=np.exp(log_prob), vmin=0, vmax=1, cmap=cmap) self.class_fig.colorbar(data_ax, ax=self.class_axes[:2], location="bottom", aspect=32, shrink=0.8) - for ax in self.task_axes.ravel(): - ax.set_xlim(*self.bounds[axes[0]]) - ax.set_ylim(*self.bounds[axes[1]]) + for ax in self.class_axes.ravel(): + ax.set_xlim(*self.active_dof_bounds[axes[0]]) + ax.set_ylim(*self.active_dof_bounds[axes[1]]) def _plot_tasks_one_dof(self, size=32, lw=1e0): self.task_fig, self.task_axes = plt.subplots( @@ -557,17 +675,16 @@ def _plot_tasks_one_dof(self, size=32, lw=1e0): self.task_axes = np.atleast_1d(self.task_axes) for itask, task in enumerate(self.tasks): - color = COLOR_LIST[itask] + color = DEFAULT_COLOR_LIST[itask] self.task_axes[itask].set_ylabel(task.name) - x = torch.tensor(self.test_X_grid).double() - task_posterior = task.regressor.posterior(x) - task_mean = task_posterior.mean.detach().numpy() * task.targets_scale + task.targets_mean - task_sigma = task_posterior.variance.sqrt().detach().numpy() * task.targets_scale + task_posterior = task.regressor.posterior(torch.tensor(self.test_inputs_grid).double()) + task_mean = task_posterior.mean.detach().numpy().ravel() + task_sigma = task_posterior.variance.sqrt().detach().numpy().ravel() - self.task_axes[itask].scatter(self.inputs, task.targets, s=size, color=color) - self.task_axes[itask].plot(self.test_inputs_grid.ravel(), task_mean, lw=lw, color=color) + self.task_axes[itask].scatter(self.inputs.values, task.targets, s=size, color=color) + self.task_axes[itask].plot(self.test_active_inputs_grid.ravel(), task_mean, lw=lw, color=color) for z in [1, 2]: self.task_axes[itask].fill_between( @@ -579,9 +696,9 @@ def _plot_tasks_one_dof(self, size=32, lw=1e0): alpha=0.5**z, ) - self.task_axes[itask].set_xlim(*self.bounds[0]) + self.task_axes[itask].set_xlim(*self.active_dof_bounds[0]) - def _plot_tasks_many_dofs(self, axes=[0, 1], shading="nearest", cmap="inferno", gridded=None, size=32): + def _plot_tasks_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLORMAP, gridded=None, size=32): if gridded is None: gridded = self.n_dof == 2 @@ -607,44 +724,145 @@ def _plot_tasks_many_dofs(self, axes=[0, 1], shading="nearest", cmap="inferno", self.task_axes[itask, 2].set_title("posterior std. dev.") data_ax = self.task_axes[itask, 0].scatter( - *self.inputs.T[axes], s=size, c=task.targets, norm=task_norm, cmap=cmap + *self.inputs.values.T[axes], s=size, c=task.targets, norm=task_norm, cmap=cmap ) - if gridded: - x = torch.tensor(self.test_X_grid).double() - else: - x = torch.tensor(self.test_X).double() + x = torch.tensor(self.test_inputs_grid).double() if gridded else torch.tensor(self.test_inputs).double() task_posterior = task.regressor.posterior(x) - task_mean = task_posterior.mean.detach().numpy() * task.targets_scale + task.targets_mean - task_sigma = task_posterior.variance.sqrt().detach().numpy() * task.targets_scale + task_mean = task_posterior.mean.detach().numpy() # * task.targets_scale + task.targets_mean + task_sigma = task_posterior.variance.sqrt().detach().numpy() # * task.targets_scale if gridded: self.task_axes[itask, 1].pcolormesh( - *(self.bounds[axes].ptp(axis=1) * self.X_samples[:, None] + self.bounds[axes].min(axis=1)).T, - task_mean.reshape(self.test_X_grid.shape[:-1]), + *np.swapaxes(self.test_inputs_grid, 0, -1), + task_mean.reshape(self.test_active_inputs_grid.shape[:-1]).T, shading=shading, cmap=cmap, norm=task_norm, ) sigma_ax = self.task_axes[itask, 2].pcolormesh( - *(self.bounds[axes].ptp(axis=1) * self.X_samples[:, None] + self.bounds[axes].min(axis=1)).T, - task_sigma.reshape(self.test_X_grid.shape[:-1]), + *np.swapaxes(self.test_inputs_grid, 0, -1), + task_sigma.reshape(self.test_inputs_grid.shape[:-1]).T, shading=shading, cmap=cmap, ) else: - self.task_axes[itask, 1].scatter( - *self.test_inputs.T[axes], s=size, c=task_mean, norm=task_norm, cmap=cmap - ) - sigma_ax = self.task_axes[itask, 2].scatter( - *self.test_inputs.T[axes], s=size, c=task_sigma, cmap=cmap - ) + self.task_axes[itask, 1].scatter(*x.detach().numpy().T[axes], s=size, c=task_mean, norm=task_norm, cmap=cmap) + sigma_ax = self.task_axes[itask, 2].scatter(*x.detach().numpy().T[axes], s=size, c=task_sigma, cmap=cmap) self.task_fig.colorbar(data_ax, ax=self.task_axes[itask, :2], location="bottom", aspect=32, shrink=0.8) self.task_fig.colorbar(sigma_ax, ax=self.task_axes[itask, 2], location="bottom", aspect=32, shrink=0.8) for ax in self.task_axes.ravel(): - ax.set_xlim(*self.bounds[axes[0]]) - ax.set_ylim(*self.bounds[axes[1]]) + ax.set_xlim(*self.active_dof_bounds[axes[0]]) + ax.set_ylim(*self.active_dof_bounds[axes[1]]) + + def _plot_acq_one_dof(self, size=32, lw=1e0, **kwargs): + acqf_names = np.atleast_1d(kwargs.get("acqf", "ei")) + + self.acq_fig, self.acq_axes = plt.subplots( + 1, + len(acqf_names), + figsize=(6 * len(acqf_names), 6), + sharex=True, + constrained_layout=True, + ) + + self.acq_axes = np.atleast_1d(self.acq_axes) + + for iacqf, acqf_name in enumerate(acqf_names): + color = DEFAULT_COLOR_LIST[0] + + acqf, acqf_meta = self.get_acquisition_function(acqf_name, return_metadata=True) + + *grid_shape, dim = self.test_inputs_grid.shape + x = torch.tensor(self.test_inputs_grid.reshape(-1, 1, dim)).double() + obj = acqf.forward(x) + + if acqf_name in ["ei", "pi"]: + obj = obj.exp() + + self.acq_axes[iacqf].set_title(acqf_meta["name"]) + self.acq_axes[iacqf].plot(self.test_active_inputs_grid.ravel(), obj.detach().numpy().ravel(), lw=lw, color=color) + + self.acq_axes[iacqf].set_xlim(*self.active_dof_bounds[0]) + + def _plot_acq_many_dofs(self, axes=[0, 1], shading="nearest", cmap=DEFAULT_COLORMAP, gridded=None, size=32, **kwargs): + acqf_names = np.atleast_1d(kwargs.get("acqf", "ei")) + + self.acq_fig, self.acq_axes = plt.subplots( + 1, + len(acqf_names), + figsize=(4 * len(acqf_names), 5), + sharex=True, + sharey=True, + constrained_layout=True, + ) + + if gridded is None: + gridded = self.n_active_dof == 2 + + self.acq_axes = np.atleast_1d(self.acq_axes) + self.acq_fig.suptitle(f"(x,y)=({self.dofs[axes[0]].name},{self.dofs[axes[1]].name})") + + for iacqf, acqf_name in enumerate(acqf_names): + acqf, acqf_meta = self.get_acquisition_function(acqf_name, return_metadata=True) + + if gridded: + *grid_shape, dim = self.test_inputs_grid.shape + x = torch.tensor(self.test_inputs_grid.reshape(-1, 1, dim)).double() + obj = acqf.forward(x) + + if acqf_name in ["ei", "pi"]: + obj = obj.exp() + + self.acq_axes[iacqf].set_title(acqf_meta["name"]) + obj_ax = self.acq_axes[iacqf].pcolormesh( + *np.swapaxes(self.test_inputs_grid, 0, -1), + obj.detach().numpy().reshape(grid_shape).T, + shading=shading, + cmap=cmap, + ) + + self.acq_fig.colorbar(obj_ax, ax=self.acq_axes[iacqf], location="bottom", aspect=32, shrink=0.8) + + else: + *inputs_shape, dim = self.test_inputs.shape + x = torch.tensor(self.test_inputs.reshape(-1, 1, dim)).double() + obj = acqf.forward(x) + + if acqf_name in ["ei", "pi"]: + obj = obj.exp() + + self.acq_axes[iacqf].set_title(acqf_meta["name"]) + obj_ax = self.acq_axes[iacqf].scatter( + x.detach().numpy()[..., axes[0]], + x.detach().numpy()[..., axes[1]], + c=obj.detach().numpy().reshape(inputs_shape), + ) + + self.acq_fig.colorbar(obj_ax, ax=self.acq_axes[iacqf], location="bottom", aspect=32, shrink=0.8) + + for ax in self.acq_axes.ravel(): + ax.set_xlim(*self.active_dof_bounds[axes[0]]) + ax.set_ylim(*self.active_dof_bounds[axes[1]]) + + def inspect_beam(self, index, border=None): + im = self.images[index] + + x_min, x_max, y_min, y_max, width_x, width_y = self.table.loc[ + index, ["x_min", "x_max", "y_min", "y_max", "width_x", "width_y"] + ] + + bbx = np.array([x_min, x_max])[[0, 0, 1, 1, 0]] + bby = np.array([y_min, y_max])[[0, 1, 1, 0, 0]] + + plt.figure() + plt.imshow(im, cmap="gray_r") + plt.plot(bbx, bby, lw=4e0, c="r") + + if border is not None: + plt.xlim(x_min - border * width_x, x_min + border * width_x) + plt.ylim(y_min - border * width_y, y_min + border * width_y) diff --git a/bloptools/bayesian/acquisition.py b/bloptools/bayesian/acquisition.py index 5586302..0a041dc 100644 --- a/bloptools/bayesian/acquisition.py +++ b/bloptools/bayesian/acquisition.py @@ -1,19 +1,37 @@ import torch -from botorch.acquisition.analytic import LogExpectedImprovement +from botorch.acquisition.analytic import LogExpectedImprovement, LogProbabilityOfImprovement -def log_expected_sum_of_tasks_improvement(candidates, agent): - """ - Return the expected improvement in the sum of tasks. - """ +class Acquisition: + def __init__(self, *args, **kwargs): + ... - *input_shape, n_dim = candidates.shape - x = torch.as_tensor(candidates.reshape(-1, 1, n_dim)).double() + @staticmethod + def log_expected_improvement(candidates, agent): + *input_shape, n_dim = candidates.shape - LEI = LogExpectedImprovement( - model=agent.multimodel, best_f=agent.best_sum_of_tasks, posterior_transform=agent.scalarization - ) - lei = LEI.forward(x).reshape(input_shape) - log_prob = agent.dirichlet_classifier.log_prob(x).reshape(input_shape) + x = torch.as_tensor(candidates.reshape(-1, 1, n_dim)).double() - return torch.clamp(lei + log_prob, min=-16) + LEI = LogExpectedImprovement( + model=agent.task_model, best_f=agent.best_sum_of_tasks, posterior_transform=agent.scalarization + ) + + lei = LEI.forward(x) + feas_log_prob = agent.feas_model(x) + + return (lei.reshape(input_shape) + feas_log_prob.reshape(input_shape)).detach().numpy() + + @staticmethod + def log_probability_of_improvement(candidates, agent): + *input_shape, n_dim = candidates.shape + + x = torch.as_tensor(candidates.reshape(-1, 1, n_dim)).double() + + LPI = LogProbabilityOfImprovement( + model=agent.task_model, best_f=agent.best_sum_of_tasks, posterior_transform=agent.scalarization + ) + + lpi = LPI.forward(x) + feas_log_prob = agent.feas_model(x) + + return (lpi.reshape(input_shape) + feas_log_prob.reshape(input_shape)).detach().numpy() diff --git a/bloptools/bayesian/kernels.py b/bloptools/bayesian/kernels.py index e678667..0bb1430 100644 --- a/bloptools/bayesian/kernels.py +++ b/bloptools/bayesian/kernels.py @@ -3,149 +3,144 @@ import torch -class LatentMaternKernel(gpytorch.kernels.Kernel): +class LatentKernel(gpytorch.kernels.Kernel): + is_stationary = True + + num_outputs = 1 + def __init__( self, - n_dim, - off_diag=False, - diagonal_prior=False, + num_inputs=1, + off_diag=True, + diag_prior=True, + scale_kernel=True, **kwargs, ): - super(LatentMaternKernel, self).__init__() + super(LatentKernel, self).__init__() + + self.num_inputs = num_inputs + self.n_off_diag = int(num_inputs * (num_inputs - 1) / 2) - self.n_dim = n_dim - self.n_off_diag = int(n_dim * (n_dim - 1) / 2) self.off_diag = off_diag + self.scale_kernel = scale_kernel - # output_scale_constraint = gpytorch.constraints.Positive() - trans_diagonal_constraint = gpytorch.constraints.Interval(1e0, 1e2) - trans_off_diag_constraint = gpytorch.constraints.Interval(-1e0, 1e0) + self.nu = kwargs.get("nu", 1.5) - trans_diagonal_initial = np.sqrt( - trans_diagonal_constraint.lower_bound * trans_diagonal_constraint.upper_bound - ) - raw_trans_diagonal_initial = trans_diagonal_constraint.inverse_transform(trans_diagonal_initial) + # kernel_scale_constraint = gpytorch.constraints.Positive() + diag_entries_constraint = gpytorch.constraints.Positive() # gpytorch.constraints.Interval(5e-1, 1e2) + skew_entries_constraint = gpytorch.constraints.Interval(-1e0, 1e0) + + # diag_entries_initial = np.ones() + # np.sqrt(diag_entries_constraint.lower_bound * diag_entries_constraint.upper_bound) + raw_diag_entries_initial = diag_entries_constraint.inverse_transform(torch.tensor(2)) - # self.register_parameter( - # name="raw_output_scale", parameter=torch.nn.Parameter(torch.ones(*self.batch_shape, 1).double()) - # ) self.register_parameter( - name="raw_trans_diagonal", - parameter=torch.nn.Parameter( - raw_trans_diagonal_initial * torch.ones(*self.batch_shape, self.n_dim).double() - ), + name="raw_diag_entries", + parameter=torch.nn.Parameter(raw_diag_entries_initial * torch.ones(self.num_outputs, self.num_inputs).double()), ) + self.register_constraint("raw_diag_entries", constraint=diag_entries_constraint) - # self.register_constraint("raw_output_scale", output_scale_constraint) - self.register_constraint("raw_trans_diagonal", trans_diagonal_constraint) - - if diagonal_prior: + if diag_prior: self.register_prior( - name="trans_diagonal_prior", - prior=gpytorch.priors.GammaPrior(concentration=1, rate=0.2), - param_or_closure=lambda m: m.trans_diagonal, - setting_closure=lambda m, v: m._set_trans_diagonal(v), + name="diag_entries_prior", + prior=gpytorch.priors.GammaPrior(concentration=2, rate=1), + param_or_closure=lambda m: m.diag_entries, + setting_closure=lambda m, v: m._set_diag_entries(v), ) if self.off_diag: self.register_parameter( - name="raw_trans_off_diag", - parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, self.n_off_diag).double()), + name="raw_skew_entries", + parameter=torch.nn.Parameter(torch.zeros(self.num_outputs, self.n_off_diag).double()), + ) + self.register_constraint("raw_skew_entries", skew_entries_constraint) + + if self.scale_kernel: + kernel_scale_constraint = gpytorch.constraints.Positive() + kernel_scale_prior = gpytorch.priors.GammaPrior(concentration=2, rate=0.15) + + self.register_parameter( + name="raw_kernel_scale", + parameter=torch.nn.Parameter(torch.ones(1)), + ) + + self.register_constraint("raw_kernel_scale", constraint=kernel_scale_constraint) + + self.register_prior( + name="kernel_scale_prior", + prior=kernel_scale_prior, + param_or_closure=lambda m: m.kernel_scale, + setting_closure=lambda m, v: m._set_kernel_scale(v), ) - self.register_constraint("raw_trans_off_diag", trans_off_diag_constraint) - # @property - # def output_scale(self): - # return self.raw_output_scale_constraint.transform(self.raw_output_scale) + @property + def diag_entries(self): + return self.raw_diag_entries_constraint.transform(self.raw_diag_entries) @property - def trans_diagonal(self): - return self.raw_trans_diagonal_constraint.transform(self.raw_trans_diagonal) + def skew_entries(self): + return self.raw_skew_entries_constraint.transform(self.raw_skew_entries) @property - def trans_off_diag(self): - return self.raw_trans_off_diag_constraint.transform(self.raw_trans_off_diag) + def kernel_scale(self): + return self.raw_kernel_scale_constraint.transform(self.raw_kernel_scale) - # @output_scale.setter - # def output_scale(self, value): - # self._set_output_scale(value) + @diag_entries.setter + def diag_entries(self, value): + self._set_diag_entries(value) - @trans_diagonal.setter - def trans_diagonal(self, value): - self._set_trans_diagonal(value) + @skew_entries.setter + def skew_entries(self, value): + self._set_skew_entries(value) - @trans_off_diag.setter - def trans_off_diag(self, value): - self._set_trans_off_diag(value) + @kernel_scale.setter + def kernel_scale(self, value): + self._set_kernel_scale(value) - def _set_trans_off_diag(self, value): + def _set_diag_entries(self, value): if not torch.is_tensor(value): - value = torch.as_tensor(value).to(self.raw_trans_off_diag) - self.initialize(raw_trans_off_diag=self.raw_trans_off_diag_constraint.inverse_transform(value)) + value = torch.as_tensor(value).to(self.raw_diag_entries) + self.initialize(raw_diag_entries=self.raw_diag_entries_constraint.inverse_transform(value)) - # def _set_output_scale(self, value): - # if not torch.is_tensor(value): - # value = torch.as_tensor(value).to(self.raw_output_scale) - # self.initialize(raw_output_scale=self.raw_output_scale_constraint.inverse_transform(value)) + def _set_skew_entries(self, value): + if not torch.is_tensor(value): + value = torch.as_tensor(value).to(self.raw_skew_entries) + self.initialize(raw_skew_entries=self.raw_skew_entries_constraint.inverse_transform(value)) - def _set_trans_diagonal(self, value): + def _set_kernel_scale(self, value): if not torch.is_tensor(value): - value = torch.as_tensor(value).to(self.raw_trans_diagonal) - self.initialize(raw_trans_diagonal=self.raw_trans_diagonal_constraint.inverse_transform(value)) + value = torch.as_tensor(value).to(self.raw_kernel_scale) + self.initialize(raw_kernel_scale=self.raw_kernel_scale_constraint.inverse_transform(value)) @property - def trans_matrix(self): + def output_scale(self): + return self.kernel_scale.sqrt() + + @property + def latent_dimensions(self): # no rotations if not self.off_diag: - T = torch.eye(self.n_dim).double() + T = torch.eye(self.num_inputs, dtype=torch.float64) # construct an orthogonal matrix. fun fact: exp(skew(N)) is the generator of SO(N) else: - A = torch.zeros((self.n_dim, self.n_dim)).double() - A[np.triu_indices(self.n_dim, k=1)] = self.trans_off_diag - A += -A.T + A = torch.zeros((self.num_inputs, self.num_inputs)).double() + A[np.triu_indices(self.num_inputs, k=1)] = self.skew_entries + A += -A.transpose(-1, -2) T = torch.linalg.matrix_exp(A) - T = torch.matmul(torch.diag(self.trans_diagonal), T) + diagonal_transform = torch.cat([torch.diag(entries).unsqueeze(0) for entries in self.diag_entries], dim=0) + T = torch.matmul(diagonal_transform, T) return T - def forward(self, x1, x2=None, diag=False, auto=False, last_dim_is_batch=False, **params): - # returns the homoskedastic diagonal - if diag: - # return torch.square(self.output_scale[0]) * torch.ones((*self.batch_shape, *x1.shape[:-1])) - return torch.ones((*self.batch_shape, *x1.shape[:-1])) - - # computes the autocovariance of the process at the parameters - if auto: - x2 = x1 - - # print(x1, x2) - - # x1 and x2 are arrays of shape (..., n_1, n_dim) and (..., n_2, n_dim) - _x1, _x2 = torch.as_tensor(x1).double(), torch.as_tensor(x2).double() - - # dx has shape (..., n_1, n_2, n_dim) - dx = _x1.unsqueeze(-2) - _x2.unsqueeze(-3) - - # transform coordinates with hyperparameters (this applies lengthscale and rotations) - trans_dx = torch.matmul(self.trans_matrix, dx.unsqueeze(-1)) - - # total transformed distance. D has shape (..., n_1, n_2) - d_eff = torch.sqrt(torch.matmul(trans_dx.transpose(-1, -2), trans_dx).sum((-1, -2)) + 1e-12) - - # Matern covariance of effective order nu=3/2. - # nu=3/2 is a special case and has a concise closed-form expression - # In general, this is something between an exponential (n=1/2) and a Gaussian (n=infinity) - # https://en.wikipedia.org/wiki/Matern_covariance_function - - # C = torch.exp(-d_eff) # Matern_0.5 (exponential) - C = (1 + d_eff) * torch.exp(-d_eff) # Matern_1.5 - # C = (1 + d_eff + 1 / 3 * torch.square(d_eff)) * torch.exp(-d_eff) # Matern_2.5 - # C = torch.exp(-0.5 * np.square(d_eff)) # Matern_infinity (RBF) + def forward(self, x1, x2, diag=False, **params): + # adapted from the Matern kernel + mean = x1.reshape(-1, x1.size(-1)).mean(0)[(None,) * (x1.dim() - 1)] - # C = torch.square(self.output_scale[0]) * torch.exp(-torch.square(d_eff)) + trans_x1 = torch.matmul(self.latent_dimensions.unsqueeze(1), (x1 - mean).unsqueeze(-1)).squeeze(-1) + trans_x2 = torch.matmul(self.latent_dimensions.unsqueeze(1), (x2 - mean).unsqueeze(-1)).squeeze(-1) - # print(f'{diag = } {C.shape = }') + distance = self.covar_dist(trans_x1, trans_x2, diag=diag, **params) - return C + return self.kernel_scale * (1 + distance) * torch.exp(-distance) diff --git a/bloptools/bayesian/models.py b/bloptools/bayesian/models.py index d6ea2ea..21ebc36 100644 --- a/bloptools/bayesian/models.py +++ b/bloptools/bayesian/models.py @@ -7,9 +7,45 @@ from . import kernels -class BoTorchSingleTaskGP(ExactGP, GPyTorchModel): +class LatentDirichletClassifier(botorch.models.gp_regression.SingleTaskGP): + def __init__(self, train_inputs, train_targets, *args, **kwargs): + super().__init__(train_inputs, train_targets, *args, **kwargs) + + self.mean_module = gpytorch.means.ConstantMean() + self.covar_module = kernels.LatentKernel( + num_inputs=train_inputs.shape[-1], + num_outputs=train_targets.shape[-1], + off_diag=True, + diag_prior=True, + scale_output=True, + **kwargs + ) + + def log_prob(self, x, n_samples=256): + *input_shape, n_dim = x.shape + samples = self.posterior(x.reshape(-1, n_dim)).sample(torch.Size((n_samples,))).exp() + return torch.log((samples / samples.sum(-1, keepdim=True)).mean(0)[:, 1]).reshape(*input_shape, 1) + + +class LatentGP(botorch.models.gp_regression.SingleTaskGP): + def __init__(self, train_inputs, train_targets, *args, **kwargs): + super().__init__(train_inputs, train_targets, *args, **kwargs) + + self.mean_module = gpytorch.means.ConstantMean(constant_prior=gpytorch.priors.NormalPrior(loc=0, scale=1)) + + self.covar_module = kernels.LatentKernel( + num_inputs=train_inputs.shape[-1], + num_outputs=train_targets.shape[-1], + off_diag=True, + diag_prior=True, + scale_output=True, + **kwargs + ) + + +class OldBoTorchSingleTaskGP(ExactGP, GPyTorchModel): def __init__(self, train_inputs, train_targets, likelihood): - super(BoTorchSingleTaskGP, self).__init__(train_inputs, train_targets, likelihood) + super(OldBoTorchSingleTaskGP, self).__init__(train_inputs, train_targets, likelihood) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel( kernels.LatentMaternKernel(n_dim=train_inputs.shape[-1], off_diag=True, diagonal_prior=True) @@ -42,11 +78,11 @@ def forward(self, x): return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x) -class BoTorchDirichletClassifier(gpytorch.models.ExactGP, botorch.models.gpytorch.GPyTorchModel): +class OldBoTorchDirichletClassifier(gpytorch.models.ExactGP, botorch.models.gpytorch.GPyTorchModel): _num_outputs = 1 # to inform GPyTorchModel API def __init__(self, train_inputs, train_targets, likelihood): - super(BoTorchDirichletClassifier, self).__init__(train_inputs, train_targets, likelihood) + super(OldBoTorchDirichletClassifier, self).__init__(train_inputs, train_targets, likelihood) self.mean_module = gpytorch.means.ConstantMean(batch_shape=len(train_targets.unique())) self.covar_module = gpytorch.kernels.ScaleKernel( kernels.LatentMaternKernel(n_dim=train_inputs.shape[-1], off_diag=False, diagonal_prior=False) diff --git a/bloptools/de/de_opt_utils.py b/bloptools/de/de_opt_utils.py index 033d68c..095326e 100644 --- a/bloptools/de/de_opt_utils.py +++ b/bloptools/de/de_opt_utils.py @@ -126,9 +126,7 @@ def _run_flyers(flyers): return uid_list -def run_hardware_fly( - motors, detector, population, max_velocity, min_velocity, start_det, read_det, stop_det, watch_func -): +def run_hardware_fly(motors, detector, population, max_velocity, min_velocity, start_det, read_det, stop_det, watch_func): flyers = generate_hardware_flyers( motors=motors, detector=detector, @@ -143,9 +141,7 @@ def run_hardware_fly( return _run_flyers(flyers) -def run_fly_sim( - population, num_interm_vals, num_scans_at_once, sim_id, server_name, root_dir, watch_name, run_parallel -): +def run_fly_sim(population, num_interm_vals, num_scans_at_once, sim_id, server_name, root_dir, watch_name, run_parallel): flyers = generate_sim_flyers( population=population, num_between_vals=num_interm_vals, diff --git a/bloptools/de/de_optimization.py b/bloptools/de/de_optimization.py index eb35748..54ed426 100644 --- a/bloptools/de/de_optimization.py +++ b/bloptools/de/de_optimization.py @@ -7,9 +7,7 @@ from .de_opt_utils import check_opt_bounds, move_to_optimized_positions -def omea_evaluation( - motors, bounds, popsize, num_interm_vals, num_scans_at_once, uids, flyer_name, intensity_name, db -): +def omea_evaluation(motors, bounds, popsize, num_interm_vals, num_scans_at_once, uids, flyer_name, intensity_name, db): if motors is not None: # hardware # get the data from databroker @@ -149,9 +147,7 @@ def rand_1(pop, popsize, target_indx, mut, bounds): for elem, param in x_1.items(): v_donor[elem] = {} for param_name in param.keys(): - v_donor[elem][param_name] = x_1[elem][param_name] + mut * ( - x_2[elem][param_name] - x_3[elem][param_name] - ) + v_donor[elem][param_name] = x_1[elem][param_name] + mut * (x_2[elem][param_name] - x_3[elem][param_name]) v_donor = ensure_bounds(vec=v_donor, bounds=bounds) return v_donor @@ -169,9 +165,7 @@ def best_1(pop, popsize, target_indx, mut, bounds, ind_sol): for elem, param in x_best.items(): v_donor[elem] = {} for param_name in param.items(): - v_donor[elem][param_name] = x_best[elem][param_name] + mut * ( - x_1[elem][param_name] - x_2[elem][param_name] - ) + v_donor[elem][param_name] = x_best[elem][param_name] + mut * (x_1[elem][param_name] - x_2[elem][param_name]) v_donor = ensure_bounds(vec=v_donor, bounds=bounds) return v_donor @@ -182,9 +176,7 @@ def mutate(population, strategy, mut, bounds, ind_sol): if strategy == "rand/1": v_donor = rand_1(pop=population, popsize=len(population), target_indx=i, mut=mut, bounds=bounds) elif strategy == "best/1": - v_donor = best_1( - pop=population, popsize=len(population), target_indx=i, mut=mut, bounds=bounds, ind_sol=ind_sol - ) + v_donor = best_1(pop=population, popsize=len(population), target_indx=i, mut=mut, bounds=bounds, ind_sol=ind_sol) # elif strategy == 'current-to-best/1': # v_donor = current_to_best_1(population, len(population), i, mut, bounds, ind_sol) # elif strategy == 'best/2': @@ -544,9 +536,7 @@ def optimization_plan( cross_trial_pop = crossover(population=pop_positions, mutated_indv=mutated_trial_pop, crosspb=crosspb) # select if opt_type == "hardware": - select_positions = yield from create_selection_params( - motors=motors, population=None, cross_indv=cross_trial_pop - ) + select_positions = yield from create_selection_params(motors=motors, population=None, cross_indv=cross_trial_pop) uid_list = yield from fly_plan( motors=motors, detector=detector, @@ -690,6 +680,4 @@ def optimization_plan( print(f"Convergence list: {best_fitness}") - yield from bp.count( - [], md={"best_fitness": best_fitness, "optimized_positions": optimized_positions, "uids": all_uids} - ) + yield from bp.count([], md={"best_fitness": best_fitness, "optimized_positions": optimized_positions, "uids": all_uids}) diff --git a/bloptools/de/hardware_flyer.py b/bloptools/de/hardware_flyer.py index 98ee9fa..5406b4f 100644 --- a/bloptools/de/hardware_flyer.py +++ b/bloptools/de/hardware_flyer.py @@ -121,9 +121,7 @@ def collect(self): motor_dict.update( { f"{self.name}_{motor_name}_velocity": self.velocities[motor_name], - f"{self.name}_{motor_name}_position": self.watch_positions[motor_name][field_name][ - ind - ], + f"{self.name}_{motor_name}_position": self.watch_positions[motor_name][field_name][ind], } ) data = {f"{self.name}_intensity": self.watch_intensities[ind]} diff --git a/bloptools/devices.py b/bloptools/devices.py new file mode 100644 index 0000000..6e56d2a --- /dev/null +++ b/bloptools/devices.py @@ -0,0 +1,29 @@ +import time as ttime + +from ophyd import Component as Cpt +from ophyd import Device, Signal, SignalRO + + +def dummy_dofs(n=2): + return [Signal(name=f"x{i+1}", value=0) for i in range(n)] + + +def get_dummy_device(name="dofs", n=2): + components = {} + + for i in range(n): + components[f"x{i+1}"] = Cpt(Signal, value=i + 1) + + cls = type("DOF", (Device,), components) + + device = cls(name=name) + + return [getattr(device, attr) for attr in device.read_attrs] + + +class TimeReadback(SignalRO): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def get(self): + return ttime.time() diff --git a/bloptools/experiments/__init__.py b/bloptools/experiments/__init__.py index 35e5d2b..e69de29 100644 --- a/bloptools/experiments/__init__.py +++ b/bloptools/experiments/__init__.py @@ -1 +0,0 @@ -from . import tests # noqa F401 diff --git a/bloptools/experiments/nsls2/tes.py b/bloptools/experiments/nsls2/tes.py index 33d3d89..c0a495a 100644 --- a/bloptools/experiments/nsls2/tes.py +++ b/bloptools/experiments/nsls2/tes.py @@ -51,9 +51,7 @@ def fitness(entry, args): background = args["background"] - x_min, x_max, y_min, y_max, separability = utils.get_beam_stats( - image - background, beam_prop=args["beam_prop"] - ) + x_min, x_max, y_min, y_max, separability = utils.get_beam_stats(image - background, beam_prop=args["beam_prop"]) n_y, n_x = image.shape diff --git a/bloptools/experiments/sirepo/tes.py b/bloptools/experiments/sirepo/tes.py index 240f1ef..a1b1267 100644 --- a/bloptools/experiments/sirepo/tes.py +++ b/bloptools/experiments/sirepo/tes.py @@ -1,35 +1,22 @@ import numpy as np -def digestion(db, uid, image_name="w9"): - """ - Simulating a misaligned Gaussian beam. The optimum is at (1, 1, 1, 1) - """ - - table = db[uid].table(fill=True) - - products_keys = [ - "image", - "vertical_extent", - "horizontal_extent", - "flux", - "x_pos", - "y_pos", - "x_width", - "y_width", - ] - - products = {key: [] for key in products_keys} - - for index, entry in table.iterrows(): +def w8_digestion(db, uid): + return image_digestion(db, uid, image_name="w8") + + +def w9_digestion(db, uid): + return image_digestion(db, uid, image_name="w9") + + +def image_digestion(db, uid, image_name): + products = db[uid].table(fill=True) + + for index, entry in products.iterrows(): image = getattr(entry, f"{image_name}_image") horizontal_extent = getattr(entry, f"{image_name}_horizontal_extent") vertical_extent = getattr(entry, f"{image_name}_vertical_extent") - products["image"].append(image) - products["vertical_extent"].append(vertical_extent) - products["horizontal_extent"].append(horizontal_extent) - flux = image.sum() n_y, n_x = image.shape @@ -47,13 +34,15 @@ def digestion(db, uid, image_name="w9"): bad |= any(np.isnan([mean_x, mean_y, sigma_x, sigma_y])) if not bad: - products["flux"].append(flux) - products["x_pos"].append(mean_x) - products["y_pos"].append(mean_y) - products["x_width"].append(2 * sigma_x) - products["y_width"].append(2 * sigma_y) + products.loc[index, ["flux", "x_pos", "y_pos", "x_width", "y_width"]] = ( + flux, + mean_x, + mean_y, + 2 * sigma_x, + 2 * sigma_y, + ) else: - for key in ["flux", "x_pos", "y_pos", "x_width", "y_width"]: - products[key].append(np.nan) + for col in ["flux", "x_pos", "y_pos", "x_width", "y_width"]: + products.loc[index, col] = np.nan return products diff --git a/bloptools/experiments/tests/__init__.py b/bloptools/experiments/tests/__init__.py deleted file mode 100644 index 89ce8a8..0000000 --- a/bloptools/experiments/tests/__init__.py +++ /dev/null @@ -1,60 +0,0 @@ -import numpy as np -from ophyd import Component as Cpt -from ophyd import Device, Signal - - -def get_dummy_dofs(n=2): - return [Signal(name=f"x{i+1}", value=0) for i in range(n)] - - -def get_dummy_device(name="dofs", n=2): - components = {} - - for i in range(n): - components[f"x{i+1}"] = Cpt(Signal, value=i + 1) - - cls = type("DOF", (Device,), components) - - device = cls(name=name) - - return [getattr(device, attr) for attr in device.read_attrs] - - -def booth(x1, x2): - return (x1 + 2 * x2 - 7) ** 2 + (2 * x1 + x2 - 5) ** 2 - - -def himmelblau(x1, x2): - return (x1**2 + x2 - 11) ** 2 + (x1 + x2**2 - 7) ** 2 - - -def gaussian_beam_waist(x1, x2): - return np.sqrt(1 + 0.25 * (x1 - x2) ** 2 + 16 * (x1 + x2 - 2) ** 2) - - -def himmelblau_digestion(db, uid): - table = db[uid].table() - products = {"himmelblau": []} - - for index, entry in table.iterrows(): - products["himmelblau"].append(himmelblau(entry.x1, entry.x2)) - - return products - - -def mock_kbs_digestion(db, uid): - """ - Simulating a misaligned Gaussian beam. The optimum is at (1, 1, 1, 1) - """ - - table = db[uid].table() - products = {"x_width": [], "y_width": []} - - for index, entry in table.iterrows(): - sigma_x = gaussian_beam_waist(entry.x1, entry.x2) - sigma_y = gaussian_beam_waist(entry.x3, entry.x4) - - products["x_width"].append(2 * sigma_x) - products["y_width"].append(2 * sigma_y) - - return products diff --git a/bloptools/tasks/__init__.py b/bloptools/tasks.py similarity index 92% rename from bloptools/tasks/__init__.py rename to bloptools/tasks.py index fa787e6..61ce47b 100644 --- a/bloptools/tasks/__init__.py +++ b/bloptools/tasks.py @@ -1,12 +1,13 @@ class Task: - MIN_NOISE_LEVEL = 1e-4 + MIN_NOISE_LEVEL = 1e-6 MAX_NOISE_LEVEL = 1e-2 def __init__(self, key, kind="max", name=None, transform=None, **kwargs): self.key = key self.kind = kind - self.name = name if name is not None else f"{kind}_{key}" + self.name = name if name is not None else f"{kind}_{key}_fitness" self.transform = transform if transform is not None else lambda x: x + self.weight = 1.0 if kind.lower() in ["min", "minimum", "minimize"]: self.sign = -1 diff --git a/bloptools/test_functions.py b/bloptools/test_functions.py new file mode 100644 index 0000000..25693a2 --- /dev/null +++ b/bloptools/test_functions.py @@ -0,0 +1,48 @@ +import numpy as np + + +def booth(x1, x2): + return (x1 + 2 * x2 - 7) ** 2 + (2 * x1 + x2 - 5) ** 2 + + +def matyas(x1, x2): + return 0.26 * (x1**2 + x2**2) - 0.48 * x1 * x2 + + +def himmelblau(x1, x2): + return (x1**2 + x2 - 11) ** 2 + (x1 + x2**2 - 7) ** 2 + + +def rastrigin(*x): + X = np.c_[x] + return 10 * X.shape[-1] + (X**2 - 10 * np.cos(2 * np.pi * X)).sum(axis=1) + + +def gaussian_beam_waist(x1, x2): + return np.sqrt(1 + 0.25 * (x1 - x2) ** 2 + 16 * (x1 + x2 - 2) ** 2) + + +def himmelblau_digestion(db, uid): + products = db[uid].table() + + for index, entry in products.iterrows(): + products.loc[index, "himmelblau"] = himmelblau(entry.x1, entry.x2) + + return products + + +def mock_kbs_digestion(db, uid): + """ + Simulating a misaligned Gaussian beam. The optimum is at (1, 1, 1, 1) + """ + + products = db[uid].table() + + for index, entry in products.iterrows(): + sigma_x = gaussian_beam_waist(entry.x1, entry.x2) + sigma_y = gaussian_beam_waist(entry.x3, entry.x4) + + products.loc[index, "x_width"] = 2 * sigma_x + products.loc[index, "y_width"] = 2 * sigma_y + + return products diff --git a/bloptools/tests/test_bayesian_shadow.py b/bloptools/tests/test_bayesian_shadow.py index da32024..aa6b908 100644 --- a/bloptools/tests/test_bayesian_shadow.py +++ b/bloptools/tests/test_bayesian_shadow.py @@ -2,8 +2,8 @@ import pytest from sirepo_bluesky.sirepo_ophyd import create_classes -from bloptools.bayesian import Agent -from bloptools.experiments.sirepo import tes +import bloptools +from bloptools.experiments.sirepo.tes import w9_digestion from bloptools.tasks import Task @@ -19,24 +19,23 @@ def test_bayesian_agent_tes_shadow(RE, db, shadow_tes_simulation): kb_dofs = [kbv.x_rot, kbv.offz] kb_bounds = np.array([[-0.10, +0.10], [-0.50, +0.50]]) - for dof in kb_dofs: - dof.kind = "hinted" - beam_flux_task = Task(key="flux", kind="max", transform=lambda x: np.log(x)) beam_width_task = Task(key="x_width", kind="min", transform=lambda x: np.log(x)) beam_height_task = Task(key="y_width", kind="min", transform=lambda x: np.log(x)) - boa = Agent( - dofs=kb_dofs, - bounds=kb_bounds, + agent = bloptools.bayesian.Agent( + active_dofs=kb_dofs, + passive_dofs=[], detectors=[w9], + active_dof_bounds=kb_bounds, tasks=[beam_flux_task, beam_width_task, beam_height_task], - digestion=tes.digestion, + digestion=w9_digestion, db=db, ) - RE(boa.initialize(init_scheme="quasi-random", n_init=4)) + RE(agent.initialize(acqf="qr", n_init=4)) - RE(boa.learn(strategy="esti", n_iter=2, n_per_iter=2)) + RE(agent.learn(acqf="ei", n_iter=2)) + RE(agent.learn(acqf="pi", n_iter=2)) - boa.plot_tasks() + agent.plot_tasks() diff --git a/bloptools/tests/test_bayesian_test_funcs.py b/bloptools/tests/test_bayesian_test_funcs.py index 31cff3d..058b709 100644 --- a/bloptools/tests/test_bayesian_test_funcs.py +++ b/bloptools/tests/test_bayesian_test_funcs.py @@ -1,48 +1,50 @@ import pytest import bloptools -from bloptools.experiments.tests import himmelblau_digestion, mock_kbs_digestion from bloptools.tasks import Task +from bloptools.test_functions import himmelblau_digestion, mock_kbs_digestion @pytest.mark.test_func def test_bayesian_agent_himmelblau(RE, db): - dofs = bloptools.experiments.tests.get_dummy_dofs(n=2) # get a list of two DOFs + dofs = bloptools.devices.dummy_dofs(n=2) # get a list of two DOFs bounds = [(-5.0, +5.0), (-5.0, +5.0)] task = Task(key="himmelblau", kind="min") - boa = bloptools.bayesian.Agent( - dofs=dofs, - bounds=bounds, - tasks=task, + agent = bloptools.bayesian.Agent( + active_dofs=dofs, + passive_dofs=[], + active_dof_bounds=bounds, + tasks=[task], digestion=himmelblau_digestion, db=db, ) - RE(boa.initialize(init_scheme="quasi-random", n_init=16)) + RE(agent.initialize(acqf="qr", n_init=16)) - RE(boa.learn(strategy="esti", n_iter=2, n_per_iter=3)) + RE(agent.learn(acqf="ei", n_iter=2)) - boa.plot_tasks() + agent.plot_tasks() @pytest.mark.test_func def test_bayesian_agent_mock_kbs(RE, db): - dofs = bloptools.experiments.tests.get_dummy_dofs(n=4) # get a list of two DOFs + dofs = bloptools.devices.dummy_dofs(n=4) # get a list of two DOFs bounds = [(-4.0, +4.0), (-4.0, +4.0), (-4.0, +4.0), (-4.0, +4.0)] tasks = [Task(key="x_width", kind="min"), Task(key="y_width", kind="min")] - boa = bloptools.bayesian.Agent( - dofs=dofs, - bounds=bounds, + agent = bloptools.bayesian.Agent( + active_dofs=dofs, + passive_dofs=[], + active_dof_bounds=bounds, tasks=tasks, digestion=mock_kbs_digestion, db=db, ) - RE(boa.initialize(init_scheme="quasi-random", n_init=16)) + RE(agent.initialize(acqf="qr", n_init=16)) - RE(boa.learn(strategy="esti", n_iter=2, n_per_iter=3)) + RE(agent.learn(acqf="ei", n_iter=4)) - boa.plot_tasks() + agent.plot_tasks() diff --git a/bloptools/utils.py b/bloptools/utils.py index 74e6d98..82953af 100644 --- a/bloptools/utils.py +++ b/bloptools/utils.py @@ -1,8 +1,18 @@ +import botorch import numpy as np import scipy as sp +import torch from ortools.constraint_solver import pywrapcp, routing_enums_pb2 +def normalized_sobol_sampler(n, d): + """ + Returns $n$ quasi-randomly sampled points in the [0,1]^d hypercube + """ + x = botorch.utils.sampling.draw_sobol_samples(torch.outer(torch.tensor([0, 1]), torch.ones(d)), n=n, q=1) + return x.squeeze(1).detach().numpy() + + def estimate_root_indices(x): # or, indices_before_sign_changes i_whole = np.where(np.sign(x[1:]) != np.sign(x[:-1]))[0] @@ -43,9 +53,7 @@ def get_routing(origin, points): delay_matrix = np.sqrt(np.square(rel_points[:, None, :] - rel_points[None, :, :]).sum(axis=-1)) delay_matrix = (1e3 * delay_matrix).astype(int) # it likes integers idk - manager = pywrapcp.RoutingIndexManager( - len(_points), 1, 0 - ) # number of depots, number of salesmen, starting index + manager = pywrapcp.RoutingIndexManager(len(_points), 1, 0) # number of depots, number of salesmen, starting index routing = pywrapcp.RoutingModel(manager) def delay_callback(from_index, to_index): diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst index 53df520..48218c9 100644 --- a/docs/source/tutorials.rst +++ b/docs/source/tutorials.rst @@ -5,4 +5,7 @@ Tutorials :maxdepth: 2 tutorials/introduction.ipynb + tutorials/hyperparameters.ipynb + tutorials/constrained-himmelblau.ipynb + tutorials/latent-toroid-dimensions.ipynb tutorials/multi-task-sirepo.ipynb diff --git a/docs/source/tutorials/constrained-himmelblau.ipynb b/docs/source/tutorials/constrained-himmelblau.ipynb new file mode 100644 index 0000000..8a8f20e --- /dev/null +++ b/docs/source/tutorials/constrained-himmelblau.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "e7b5e13a-c059-441d-8d4f-fff080d52054", + "metadata": {}, + "source": [ + "# Feasibility modeling\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "c18ef717", + "metadata": {}, + "source": [ + "A more complicated example involves minimizing in two dimensions, where some parts of the parameter space are off-limits. Let's minimize Himmelblau's function, subject to the constraint that $x_1^2 + x_2^2 < 50$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22438de8", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib as mpl\n", + "from matplotlib import pyplot as plt\n", + "from bloptools import test_functions\n", + "\n", + "x1 = x2 = np.linspace(-8, 8, 256)\n", + "X1, X2 = np.meshgrid(x1, x2)\n", + "from bloptools.tasks import Task\n", + "\n", + "task = Task(key=\"himmelblau\", kind=\"min\")\n", + "F = test_functions.himmelblau(X1, X2)\n", + "F[X1**2 + X2**2 > 50] = np.nan\n", + "\n", + "plt.pcolormesh(x1, x2, F, norm=mpl.colors.LogNorm(), shading=\"auto\")\n", + "plt.colorbar()\n", + "plt.xlabel(\"x1\")\n", + "plt.ylabel(\"x2\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "7a88c7bd", + "metadata": {}, + "source": [ + "where everything outside our constraint is undefined. In our digestion function, we return a `NaN` when we violate the constraint:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6bfcf73", + "metadata": {}, + "outputs": [], + "source": [ + "def digestion(db, uid):\n", + " products = db[uid].table()\n", + "\n", + " for index, entry in products.iterrows():\n", + " if entry.x1**2 + entry.x2**2 < 50:\n", + " products.loc[index, \"himmelblau\"] = test_functions.himmelblau(entry.x1, entry.x2)\n", + " else:\n", + " products.loc[index, \"himmelblau\"] = np.nan\n", + "\n", + " return products" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "0d3d91c3", + "metadata": {}, + "source": [ + "and create the agent in the usual way:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "071a829f-a390-40dc-9d5b-ae75702e119e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%run -i ../../../examples/prepare_bluesky.py # prepare the bluesky environment\n", + "\n", + "import bloptools\n", + "from bloptools.tasks import Task\n", + "\n", + "dofs = bloptools.devices.dummy_dofs(n=2)\n", + "bounds = [(-8, 8), (-8, 8)]\n", + "\n", + "task = Task(key=\"himmelblau\", kind=\"min\")\n", + "\n", + "agent = bloptools.bayesian.Agent(\n", + " active_dofs=dofs,\n", + " passive_dofs=[],\n", + " active_dof_bounds=bounds,\n", + " tasks=[task],\n", + " digestion=digestion,\n", + " db=db,\n", + ")\n", + "\n", + "RE(agent.initialize(acqf=\"qr\", n_init=32))\n", + "\n", + "agent.plot_tasks()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9ab3be01", + "metadata": {}, + "source": [ + "In addition to modeling the fitness of the task, the agent models the probability that an input will be feasible:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "996c3c01-f91d-4a25-9b8d-eba5fa964504", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "agent.plot_feasibility()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "ab608930", + "metadata": {}, + "source": [ + "It combines the estimate of the objective and the estimate of the feasibility in deciding where to go:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28c5c0df", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "agent.plot_acquisition(acqf=[\"ei\", \"pi\", \"ucb\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff1c5f1c", + "metadata": {}, + "outputs": [], + "source": [ + "RE(agent.learn(\"ei\", n_iter=4))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a79b56ac", + "metadata": {}, + "source": [ + "The agent automatically tries to avoid infeasible points, but will end up naturally exploring the boundary of the constraint. Let's see where the agent is thinking of going:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc53bf67", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "agent.plot_tasks()\n", + "agent.plot_acquisition(strategy=[\"ei\", \"pi\", \"ucb\"])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a43130e4", + "metadata": {}, + "source": [ + "The agent will naturally explore the whole parameter space" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50b6582d", + "metadata": {}, + "outputs": [], + "source": [ + "RE(agent.learn(\"ei\", n_iter=16))\n", + "agent.plot_tasks()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "vscode": { + "interpreter": { + "hash": "9aced674e98d511b4f654e147532c84d38dc986fe042b1e92785fb9d8df41f75" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/tutorials/hyperparameters.ipynb b/docs/source/tutorials/hyperparameters.ipynb new file mode 100644 index 0000000..0fe5e2a --- /dev/null +++ b/docs/source/tutorials/hyperparameters.ipynb @@ -0,0 +1,157 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "e7b5e13a-c059-441d-8d4f-fff080d52054", + "metadata": {}, + "source": [ + "# Hyperparameters\n", + "\n", + "Consider the test function of two inputs $f(x_1, x_2) = e^{- (x_1/10)^2 - x_2^2}$. We can see that this function varies more quickly in the $x_1$ direction than the $x_2$ direction, especially if we look at the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22438de8", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib as mpl\n", + "from matplotlib import pyplot as plt\n", + "\n", + "x1 = x2 = np.linspace(-10, 10, 256)\n", + "X1, X2 = np.meshgrid(x1, x2)\n", + "\n", + "F = -np.exp(-((X1 / 8) ** 2)) * np.exp(-(X2**2))\n", + "\n", + "plt.pcolormesh(x1, x2, F, shading=\"auto\")\n", + "plt.colorbar()\n", + "plt.xlabel(\"x1\")\n", + "plt.ylabel(\"x2\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "7a88c7bd", + "metadata": {}, + "source": [ + "The optimization goes faster if our model understands how the function changes as we change the inputs in different ways. The way it picks up on this is by starting from a general model that could describe a lot of functions, and making it specific to this one by choosing the right hyperparameters. Our Bayesian agent is very good at this, and only needs a few samples to figure out what the function looks like:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "071a829f-a390-40dc-9d5b-ae75702e119e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%run -i ../../../examples/prepare_bluesky.py # prepare the bluesky environment\n", + "\n", + "import bloptools\n", + "from bloptools.tasks import Task\n", + "\n", + "dofs = bloptools.devices.dummy_dofs(n=2)\n", + "bounds = [(-8, 8), (-8, 8)]\n", + "\n", + "task = Task(key=\"loss\", kind=\"min\")\n", + "\n", + "\n", + "def digestion(db, uid):\n", + " products = db[uid].table()\n", + "\n", + " for index, entry in products.iterrows():\n", + " products.loc[index, \"loss\"] = -np.exp(-1e-2 * entry.x1**2) * np.exp(-1e-1 * entry.x2**2)\n", + "\n", + " return products\n", + "\n", + "\n", + "agent = bloptools.bayesian.Agent(\n", + " active_dofs=dofs,\n", + " passive_dofs=[],\n", + " active_dof_bounds=bounds,\n", + " tasks=[task],\n", + " digestion=digestion,\n", + " db=db,\n", + ")\n", + "\n", + "RE(agent.initialize(acqf=\"qr\", n_init=16))\n", + "\n", + "agent.plot_tasks()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fba0f0c", + "metadata": {}, + "outputs": [], + "source": [ + "agent.tasks[0].regressor.covar_module.latent_dimensions" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9ab3be01", + "metadata": {}, + "source": [ + "In addition to modeling the fitness of the task, the agent models the probability that an input will be feasible:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc53bf67", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "agent.plot_acquisition(strategy=[\"ei\", \"pi\", \"ucb\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebc65169", + "metadata": {}, + "outputs": [], + "source": [ + "RE(agent.learn(\"ei\", n_iter=4))\n", + "agent.plot_tasks()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "vscode": { + "interpreter": { + "hash": "9aced674e98d511b4f654e147532c84d38dc986fe042b1e92785fb9d8df41f75" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/tutorials/introduction.ipynb b/docs/source/tutorials/introduction.ipynb index 90061dc..4aabba2 100644 --- a/docs/source/tutorials/introduction.ipynb +++ b/docs/source/tutorials/introduction.ipynb @@ -6,8 +6,7 @@ "id": "e7b5e13a-c059-441d-8d4f-fff080d52054", "metadata": {}, "source": [ - "# Kernels and hyperparameters\n", - "\n" + "# Bayesian optimization" ] }, { @@ -18,7 +17,7 @@ "source": [ "This tutorial is an introduction to the syntax used by the optimizer, as well as the principles of Bayesian optimization in general.\n", "\n", - "We'll start by minimizing Booth's function, which looks like this:" + "We'll start by minimizing the Rastrigin function in one dimension, which looks like this:" ] }, { @@ -32,15 +31,11 @@ "import matplotlib as mpl\n", "from matplotlib import pyplot as plt\n", "\n", - "import bloptools\n", + "from bloptools import test_functions\n", "\n", - "x1 = x2 = np.linspace(-10, 10, 256)\n", - "X1, X2 = np.meshgrid(x1, x2)\n", + "x = np.linspace(-4, 4, 256)\n", "\n", - "plt.pcolormesh(x1, x2, bloptools.experiments.tests.himmelblau(X1, X2), norm=mpl.colors.LogNorm(), shading=\"auto\")\n", - "plt.colorbar()\n", - "plt.xlabel(\"x1\")\n", - "plt.ylabel(\"x2\")\n" + "plt.plot(x, test_functions.rastrigin(x), c=\"b\")" ] }, { @@ -61,8 +56,9 @@ "source": [ "import bloptools\n", "\n", - "dofs = bloptools.experiments.tests.get_dummy_dofs(2)\n", - "bounds = [(-10, 10), (-10, 10)]" + "dofs = bloptools.devices.dummy_dofs(n=1) # an ophyd device that we can read/set\n", + "\n", + "bounds = [(-4.0, 4.0)] # one set of bounds per dof" ] }, { @@ -71,7 +67,7 @@ "id": "7a88c7bd", "metadata": {}, "source": [ - "The agent automatically samples at different inputs, but we often need some post-processing after data collection. In this case, we need to give the agent a way to compute Himmelblau's function. We accomplish this with a digestion function:" + "This degree of freedom will move around a variable called `x1`. The agent automatically samples at different inputs, but we often need some post-processing after data collection. In this case, we need to give the agent a way to compute the Rastrigin function. We accomplish this with a digestion function, which always takes `(db, uid)` as an input. For each entry, we compute the function:\n" ] }, { @@ -82,13 +78,10 @@ "outputs": [], "source": [ "def digestion(db, uid):\n", + " products = db[uid].table()\n", "\n", - " table = db[uid].table()\n", - " products = {\"himmelblau\": []}\n", - "\n", - " for index, entry in table.iterrows():\n", - "\n", - " products[\"himmelblau\"].append(bloptools.experiments.tests.himmelblau(entry.x1, entry.x2))\n", + " for index, entry in products.iterrows():\n", + " products.loc[index, \"rastrigin\"] = test_functions.rastrigin(entry.x1)\n", "\n", " return products" ] @@ -99,7 +92,7 @@ "id": "dad64303", "metadata": {}, "source": [ - "The next ingredient is a task, which gives the agent something to do. We want it to minimize Himmelblau's function, so we make a task that will try to minimize the output of the digestion function called \"himmelblau\". We also include a transform function, which will make it easier to regress over the outputs of the function." + "The next ingredient is a task, which gives the agent something to do. We want it to minimize the Rastrigin function, so we make a task that will try to minimize the output of the digestion function called \"rastrigin\"." ] }, { @@ -111,7 +104,7 @@ "source": [ "from bloptools.tasks import Task\n", "\n", - "task = Task(key=\"himmelblau\", kind=\"min\", transform=lambda x: np.log(1 + 1e-2 * x))" + "task = Task(key=\"rastrigin\", kind=\"min\")" ] }, { @@ -134,15 +127,16 @@ "source": [ "%run -i ../../../examples/prepare_bluesky.py # prepare the bluesky environment\n", "\n", - "boa = bloptools.bayesian.Agent(\n", - " dofs=dofs, # things which we move around\n", - " bounds=bounds, # where we're allowed to move them\n", - " tasks=task, # tasks for the optimizer\n", - " digestion=digestion, # how to process the acquisition into task data\n", - " db=db, # a databroker instance\n", - " )\n", + "agent = bloptools.bayesian.Agent(\n", + " active_dofs=dofs,\n", + " passive_dofs=[],\n", + " active_dof_bounds=bounds,\n", + " tasks=[task],\n", + " digestion=digestion,\n", + " db=db,\n", + ")\n", "\n", - "RE(boa.initialize(init_scheme='quasi-random', n_init=16))" + "RE(agent.initialize(acqf=\"qr\", n_init=12))" ] }, { @@ -163,114 +157,57 @@ }, "outputs": [], "source": [ - "boa.plot_tasks()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "4274930c", - "metadata": {}, - "source": [ - "We can also the agent's posterior about the probability of goodness over the parameters:" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d2eb855c", - "metadata": {}, - "source": [ - "We want to learn a bit more, so we can ask the agent where to sample based off of some strategy. Here we use the \"esti\" strategy, which maximizes the expected sum of tasks improvement.\n", + "# what are the points?\n", "\n", - "We can ask the agent to \"route\" them using ``ortools``, so that we can sample them more quickly if it requires us to e.g. move motors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "334f3c66", - "metadata": {}, - "outputs": [], - "source": [ - "X_to_sample = boa.ask(strategy='esti', n=16, optimize=True, route=True)\n", - "plt.scatter(*X_to_sample.T)\n", - "plt.plot(*X_to_sample.T)" + "agent.plot_tasks()" ] }, { "attachments": {}, "cell_type": "markdown", - "id": "296d9fd2", + "id": "fa141636", "metadata": {}, "source": [ - "Let's tell the agent to learn a bit more (it will direct itself):" + "Note that the value of the fitness is the negative value of the function: we always want to maximize the fitness of the tasks.\n", + "\n", + "An important concept in Bayesian optimization is the acquisition function, which is how the agent decides where to sample next. Under the hood, the agent will see what inputs maximize the acquisition function to make its decision.\n", + "\n", + "We can see what the agent is thinking by asking it to plot a few different acquisition functions in its current state." ] }, { "cell_type": "code", "execution_count": null, - "id": "f0e74651", + "id": "8554d7c2", "metadata": { "tags": [] }, "outputs": [], "source": [ - "RE(boa.learn(strategy='esti', n_iter=1, n_per_iter=4))\n", - "boa.plot_tasks()" + "# helper function to list acquisition functions\n", + "\n", + "agent.plot_acquisition(strategy=[\"ei\", \"pi\", \"ucb\"])" ] }, { "attachments": {}, "cell_type": "markdown", - "id": "aeab7a9b", + "id": "2529763b", "metadata": {}, "source": [ - "The agent has updated its model of the tasks, including refitting the hyperparameters. Continuing:" + "Let's tell the agent to learn a little bit more. We just have to tell it what acquisition function to use (by passing a `strategy`) and how many iterations we'd like it to perform (by passing `n_iter`)." ] }, { "cell_type": "code", "execution_count": null, - "id": "d6b39b54", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "RE(boa.learn(strategy='esti', n_iter=4, n_per_iter=4))\n", - "boa.plot_tasks()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "e955233f", + "id": "ebc65169", "metadata": {}, - "source": [ - "Eventually, we reach a point of saturation where no more improvement takes place:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d73e4fd5", - "metadata": { - "tags": [] - }, "outputs": [], "source": [ - "RE(boa.learn(strategy='esti', n_iter=4, n_per_iter=4))\n", - "boa.plot_tasks()" + "RE(agent.learn(acqf=\"ei\", n_iter=8))\n", + "agent.plot_tasks()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5ad4b1e7", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/source/tutorials/latent-toroid-dimensions.ipynb b/docs/source/tutorials/latent-toroid-dimensions.ipynb new file mode 100644 index 0000000..d6ffe16 --- /dev/null +++ b/docs/source/tutorials/latent-toroid-dimensions.ipynb @@ -0,0 +1,125 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "e7b5e13a-c059-441d-8d4f-fff080d52054", + "metadata": {}, + "source": [ + "# Finding latent dimensions for the toroidal mirror \n", + "\n", + "It is common that beamline inputs are highly coupled, and so the effect of an input on the beam cannot be understood except in concert with the others. In this example, we show how our agent figures out latent dimensions, as well as the benefit of doing so. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8a6989", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%run -i ../../../examples/prepare_bluesky.py\n", + "%run -i ../../../examples/prepare_tes_shadow.py\n", + "\n", + "toroid_dofs = np.array([toroid.x_rot, toroid.offz])\n", + "toroid_bounds = np.array([[-1e-3, 1e-3], [-4e-1, +4e-1]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "071a829f-a390-40dc-9d5b-ae75702e119e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from bloptools.bayesian import Agent\n", + "from bloptools.experiments.sirepo.tes import w8_digestion\n", + "from bloptools.tasks import Task\n", + "\n", + "beam_flux_task = Task(key=\"flux\", kind=\"max\", transform=lambda x: np.log(x))\n", + "\n", + "agent = Agent(\n", + " active_dofs=toroid_dofs,\n", + " active_dof_bounds=toroid_bounds,\n", + " detectors=[w8],\n", + " tasks=[beam_flux_task],\n", + " digestion=w8_digestion,\n", + " db=db,\n", + ")\n", + "\n", + "RE(agent.initialize(acqf=\"qr\", n_init=24))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a6259a4f", + "metadata": {}, + "source": [ + "We can see that the beam is only not cut off (i.e. it has a non-zero flux) in a diagonal strip, and that in fact this is really just a one-dimensional optimization problem in some diagonal dimension. Our agent has figured this out, with a transformation matrix that has a long coherence length in one dimension and a short coherence length orthogonal to it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e17e666", + "metadata": {}, + "outputs": [], + "source": [ + "agent.tasks[0].regressor.covar_module.latent_dimensions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "996c3c01-f91d-4a25-9b8d-eba5fa964504", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "agent.plot_tasks()\n", + "agent.plot_feasibility()\n", + "agent.plot_acquisition(strategy=[\"ei\", \"pi\", \"ucb\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9093dd73", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "vscode": { + "interpreter": { + "hash": "9aced674e98d511b4f654e147532c84d38dc986fe042b1e92785fb9d8df41f75" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/tutorials/multi-task-sirepo.ipynb b/docs/source/tutorials/multi-task-sirepo.ipynb index 8a3478a..d3fefd0 100644 --- a/docs/source/tutorials/multi-task-sirepo.ipynb +++ b/docs/source/tutorials/multi-task-sirepo.ipynb @@ -26,10 +26,7 @@ "%run -i ../../../examples/prepare_tes_shadow.py\n", "\n", "kb_dofs = [kbv.x_rot, kbh.x_rot]\n", - "kb_bounds = np.array([[-0.10, +0.10], [-0.10, +0.10]]) \n", - "\n", - "for dof in kb_dofs:\n", - " dof.kind = \"hinted\"" + "kb_bounds = np.array([[-0.10, +0.10], [-0.10, +0.10]])" ] }, { @@ -41,25 +38,24 @@ }, "outputs": [], "source": [ - "\n", "from bloptools.bayesian import Agent\n", - "from bloptools.experiments.sirepo.tes import digestion\n", + "from bloptools.experiments.sirepo.tes import w9_digestion\n", "from bloptools.tasks import Task\n", "\n", "beam_flux_task = Task(key=\"flux\", kind=\"max\", transform=lambda x: np.log(x))\n", "beam_width_task = Task(key=\"x_width\", kind=\"min\", transform=lambda x: np.log(x))\n", "beam_height_task = Task(key=\"y_width\", kind=\"min\", transform=lambda x: np.log(x))\n", "\n", - "boa = Agent(\n", - " dofs=kb_dofs, \n", - " bounds=kb_bounds,\n", - " detectors=[w9],\n", - " tasks=[beam_flux_task, beam_width_task, beam_height_task],\n", - " digestion=digestion,\n", - " db=db,\n", + "agent = Agent(\n", + " active_dofs=kb_dofs,\n", + " active_dof_bounds=kb_bounds,\n", + " detectors=[w9],\n", + " tasks=[beam_flux_task, beam_width_task, beam_height_task],\n", + " digestion=w9_digestion,\n", + " db=db,\n", ")\n", "\n", - "RE(boa.initialize(init_scheme='quasi-random', n_init=16))" + "RE(agent.initialize(acqf=\"qr\", n_init=16))" ] }, { @@ -80,7 +76,8 @@ }, "outputs": [], "source": [ - "boa.plot_tasks()" + "agent.plot_tasks()\n", + "agent.plot_acquisition(strategy=[\"ei\", \"pi\", \"ucb\"])" ] }, { @@ -89,7 +86,7 @@ "id": "296d9fd2", "metadata": {}, "source": [ - "Let's optimize:" + "We should find our optimum (or something close to it) on the very next iteration:" ] }, { @@ -101,8 +98,8 @@ }, "outputs": [], "source": [ - "RE(boa.learn(strategy='esti', n_iter=4))\n", - "boa.plot_tasks()" + "RE(agent.learn(acqf=\"ei\", n_iter=2))\n", + "agent.plot_tasks()" ] }, { @@ -131,7 +128,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.16 (main, Mar 8 2023, 14:00:05) \n[GCC 11.2.0]" }, "vscode": { "interpreter": { diff --git a/docs/wip/custom-acquisition.ipynb b/docs/wip/custom-acquisition.ipynb new file mode 100644 index 0000000..dbdae7f --- /dev/null +++ b/docs/wip/custom-acquisition.ipynb @@ -0,0 +1,106 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "e7b5e13a-c059-441d-8d4f-fff080d52054", + "metadata": {}, + "source": [ + "# Custom acquisition plans\n", + "\n", + "The simplest acqusition plan for a beamline is to move some motor inputs to some positions, and then trigger some detectors. Often, though, we want to investigate behaviors more complex than this. Consider the example of aligning the spectrometer at the Inner-Shell Spectroscopy (ISS) beamline at NSLS-II, which operates by modulating the energy of a beam on a sample and watching the resulting flux rise, peak, and then fall. \n", + "\n", + "We can build a toy model of this spectrometer using custom acquisition and digestion functions. Let's pretend that we can vary two inputs $\\mathbf{x} = (x_1, x_2)$, and that the resolution of the spectrometer is equal to \n", + "\n", + "$\\nu_\\sigma = \\big (1 + x_1^2 + (x_2 - 1)^2 \\big)^{1/2}$\n", + "\n", + "which has a single minimum at $(0,1)$. We can't sample this value directly, rather we can sample how the resulting flux peaks as we vary the energy $\\nu$. Let's pretend that it looks like a Gaussian:\n", + "\n", + "$I(\\nu) = \\exp \\Big [ - 0.5 (\\nu - \\nu_0)^2 / \\nu_\\sigma^2 \\Big ]$\n", + "\n", + "To find the inputs that lead to the tightest spectrum, we need to vary $\\mathbf{x}$, scan over $\\nu$, and then estimate the resolution for the agent to optimize over. Let's write acquisition and digestion functions to do this: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92c202e2", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "\n", + "def acquisition(dofs, inputs, dets):\n", + " uid = yield from bp.list_scan\n", + "\n", + " for x in inputs:\n", + " res = np.sqrt(1 + np.square(x).sum()) # our resolution is\n", + "\n", + " nu_sigma = np.sqrt(1 + x1**2 + (x2 - 1) ** 2)\n", + "\n", + " flux = np.exp(-0.5 * np.square((nu - nu_0) / nu_sigma))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ba0bb16", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from matplotlib import pyplot as plt\n", + "\n", + "\n", + "nu_0 = 100\n", + "\n", + "nu = np.linspace(90, 110, 256)\n", + "\n", + "for x1, x2 in [(0, 0), (-2, 2), (-1, 0), (0, 1)]:\n", + " nu_sigma = np.sqrt(1 + x1**2 + (x2 - 1) ** 2)\n", + "\n", + " flux = np.exp(-0.5 * np.square((nu - nu_0) / nu_sigma))\n", + "\n", + " plt.plot(nu, flux, label=f\"(x1, x2) = ({x1}, {x2})\")\n", + "\n", + "plt.legend()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "1075cb6a", + "metadata": {}, + "source": [ + "To find the inputs that lead to the tightest spectrum, we need to vary $\\mathbf{x}$, scan over $\\nu$, and then estimate the resolution for the agent to optimize over. Let's write acquisition and digestion functions to do this: " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16 (main, Mar 8 2023, 14:00:05) \n[GCC 11.2.0]" + }, + "vscode": { + "interpreter": { + "hash": "9aced674e98d511b4f654e147532c84d38dc986fe042b1e92785fb9d8df41f75" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/wip/passive-dofs.ipynb b/docs/wip/passive-dofs.ipynb new file mode 100644 index 0000000..89d601e --- /dev/null +++ b/docs/wip/passive-dofs.ipynb @@ -0,0 +1,329 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "e7b5e13a-c059-441d-8d4f-fff080d52054", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "c18ef717", + "metadata": {}, + "source": [ + "This tutorial is an introduction to the syntax used by the optimizer, as well as the principles of Bayesian optimization in general.\n", + "\n", + "We'll start by minimizing Himmelblau's function, which looks like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22438de8", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib as mpl\n", + "from matplotlib import pyplot as plt\n", + "\n", + "import bloptools\n", + "\n", + "x1 = x2 = np.linspace(-5.0, 5.0, 256)\n", + "X1, X2 = np.meshgrid(x1, x2)\n", + "\n", + "plt.pcolormesh(x1, x2, bloptools.experiments.tests.himmelblau(X1, X2), norm=mpl.colors.LogNorm(), shading=\"auto\")\n", + "\n", + "plt.xlabel(\"x1\")\n", + "plt.ylabel(\"x2\")\n", + "plt.colorbar()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f46924af", + "metadata": {}, + "outputs": [], + "source": [ + "from bloptools.objects import TimeReadback\n", + "\n", + "tr = TimeReadback(name=\"timestamp\")\n", + "\n", + "tr.read()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "ecef8da5", + "metadata": {}, + "source": [ + "There are several things that our agent will need. The first ingredient is some degrees of freedom (these are always `ophyd` devices) which the agent will move around to different inputs within each DOF's bounds (the second ingredient). We define these here:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c870567", + "metadata": {}, + "outputs": [], + "source": [ + "import bloptools\n", + "\n", + "dofs = bloptools.objects.get_dummy_dofs(n=2) # get a list of two DOFs\n", + "bounds = [(-5.0, +5.0), (-5.0, +5.0)]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "7a88c7bd", + "metadata": {}, + "source": [ + "The agent automatically samples at different inputs, but we often need some post-processing after data collection. In this case, we need to give the agent a way to compute Himmelblau's function. We accomplish this with a digestion function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6bfcf73", + "metadata": {}, + "outputs": [], + "source": [ + "import bloptools\n", + "\n", + "dofs = bloptools.objects.get_dummy_dofs(n=2) # get a list of two DOFs\n", + "bounds = [(-5.0, +5.0), (-5.0, +5.0)]\n", + "\n", + "from bloptools.objects import TimeReadback\n", + "\n", + "tr = TimeReadback(name=\"timestamp\")\n", + "\n", + "tr.read()\n", + "\n", + "\n", + "def digestion(db, uid):\n", + " table = db[uid].table()\n", + " products = pd.DataFrame()\n", + "\n", + " for index, entry in table.iterrows():\n", + " products.loc[index, \"himmelblau\"] = bloptools.test_functions.himmelblau(entry.x1, entry.x2)\n", + "\n", + " return products\n", + "\n", + "\n", + "from bloptools.tasks import Task\n", + "\n", + "task = Task(key=\"himmelblau\", kind=\"min\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4532b087", + "metadata": {}, + "source": [ + "The next ingredient is a task, which gives the agent something to do. We want it to minimize Himmelblau's function, so we make a task that will try to minimize the output of the digestion function called \"himmelblau\". We also include a transform function, which will make it easier to regress over the outputs of the function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c14d162", + "metadata": {}, + "outputs": [], + "source": [ + "from bloptools.tasks import Task\n", + "\n", + "task = Task(key=\"himmelblau\", kind=\"min\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "0d3d91c3", + "metadata": {}, + "source": [ + "Combining all of these with a databroker instance, we can make an agent:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "071a829f-a390-40dc-9d5b-ae75702e119e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%run -i ../../../examples/prepare_bluesky.py # prepare the bluesky environment\n", + "\n", + "boa = bloptools.bayesian.Agent(\n", + " dofs=dofs,\n", + " bounds=bounds,\n", + " passive_dims=[tr],\n", + " tasks=task,\n", + " digestion=digestion,\n", + " db=db,\n", + ")\n", + "\n", + "RE(boa.initialize(init_scheme=\"quasi-random\", n_init=32))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d8f2da43", + "metadata": {}, + "source": [ + "We initialized the GP with the \"quasi-random\" strategy, as it doesn't require any prior data. We can view the state of the optimizer's posterior of the tasks over the input parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5818143a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e93c75a1", + "metadata": {}, + "outputs": [], + "source": [ + "np.atleast_1d([]).size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "996c3c01-f91d-4a25-9b8d-eba5fa964504", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "boa.plot_tasks()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d2eb855c", + "metadata": {}, + "source": [ + "We want to learn a bit more, so we can ask the agent where to sample based off of some strategy. Here we use the \"esti\" strategy, which maximizes the expected sum of tasks improvement." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0e74651", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "RE(boa.learn(strategy=\"esti\", n_iter=4))\n", + "boa.plot_tasks()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "aeab7a9b", + "metadata": {}, + "source": [ + "The agent has updated its model of the tasks, including refitting the hyperparameters. Continuing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6b39b54", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "RE(boa.learn(strategy=\"esti\", n_iter=16))\n", + "boa.plot_tasks()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "e955233f", + "metadata": {}, + "source": [ + "Eventually, we reach a point of saturation where no more improvement takes place:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d73e4fd5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "RE(boa.learn(strategy=\"esti\", n_iter=32))\n", + "boa.plot_tasks()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ad4b1e7", + "metadata": {}, + "outputs": [], + "source": [ + "boa.tasks[0].regressor.covar_module.base_kernel.trans_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96e9abac", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "vscode": { + "interpreter": { + "hash": "9aced674e98d511b4f654e147532c84d38dc986fe042b1e92785fb9d8df41f75" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/benchmark.py b/examples/benchmark.py index f0c07ae..1c7db37 100644 --- a/examples/benchmark.py +++ b/examples/benchmark.py @@ -38,9 +38,7 @@ plt.plot( timestamps - timestamps[0], [ - np.nanmax(bo.data.fitness.values[: i + 1]) - if not all(np.isnan(bo.data.fitness.values[: i + 1])) - else np.nan + np.nanmax(bo.data.fitness.values[: i + 1]) if not all(np.isnan(bo.data.fitness.values[: i + 1])) else np.nan for i in range(len(bo.data.fitness.values)) ], ) diff --git a/examples/prepare_tes_shadow.py b/examples/prepare_tes_shadow.py index 116ee76..ff5e8b8 100644 --- a/examples/prepare_tes_shadow.py +++ b/examples/prepare_tes_shadow.py @@ -12,13 +12,13 @@ classes, objects = create_classes(connection.data, connection=connection) globals().update(**objects) -data["models"]["simulation"]["npoint"] = 200000 -data["models"]["watchpointReport12"]["histogramBins"] = 32 +data["models"]["simulation"]["npoint"] = 50000 +data["models"]["watchpointReport12"]["histogramBins"] = 16 # w9.duration.kind = "hinted" bec.disable_baseline() bec.disable_heading() -bec.disable_table() +# bec.disable_table() # This should be done by installing the package with `pip install -e .` or something similar. # import sys diff --git a/pyproject.toml b/pyproject.toml index 3239179..d852d68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.black] -line-length = 115 +line-length = 125 include = '\.pyi?$' exclude = ''' /(