From 2b05f34fbbe260738a3062aef9dc3f1c37361612 Mon Sep 17 00:00:00 2001
From: Thomas Morris <thomasmorris@princeton.edu>
Date: Sat, 6 Jul 2024 21:55:09 -0400
Subject: [PATCH 1/9] add simulated KB mirrors

---
 pyproject.toml              |   1 +
 src/blop/agent.py           |   2 +-
 src/blop/sim/__init__.py    |   1 +
 src/blop/sim/beamline.py    | 199 ++++++++++++++++++++++++++++++++++++
 src/blop/tests/test_sims.py |  35 +++++++
 src/blop/utils/__init__.py  | 163 +++++++----------------------
 6 files changed, 273 insertions(+), 128 deletions(-)
 create mode 100644 src/blop/sim/__init__.py
 create mode 100644 src/blop/sim/beamline.py
 create mode 100644 src/blop/tests/test_sims.py

diff --git a/pyproject.toml b/pyproject.toml
index 8b532d9..6b4bcb1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,7 @@ maintainers = [
 requires-python = ">=3.9"
 
 dependencies = [
+    "area-detector-handlers",
     "bluesky",
     "botorch",
     "databroker",
diff --git a/src/blop/agent.py b/src/blop/agent.py
index 80ef55f..a7de1aa 100644
--- a/src/blop/agent.py
+++ b/src/blop/agent.py
@@ -842,7 +842,7 @@ def _set_hypers(self, hypers):
         self.validity_constraint.load_state_dict(hypers["validity_constraint"])
 
     def constraint(self, x):
-        x = self.dofs(active=True).transform(x)
+        # x = self.dofs(active=True).transform(x)
 
         p = torch.ones(x.shape[:-1])
         for obj in self.objectives(active=True):
diff --git a/src/blop/sim/__init__.py b/src/blop/sim/__init__.py
new file mode 100644
index 0000000..70c7e45
--- /dev/null
+++ b/src/blop/sim/__init__.py
@@ -0,0 +1 @@
+from .beamline import Beamline, Detector  # noqa
diff --git a/src/blop/sim/beamline.py b/src/blop/sim/beamline.py
new file mode 100644
index 0000000..a70b966
--- /dev/null
+++ b/src/blop/sim/beamline.py
@@ -0,0 +1,199 @@
+import itertools
+from collections import deque
+from datetime import datetime
+from pathlib import Path
+
+import h5py
+import numpy as np
+import scipy as sp
+from area_detector_handlers.handlers import HandlerBase
+from event_model import compose_resource
+from ophyd import Component as Cpt
+from ophyd import Device, Signal
+from ophyd.sim import NullStatus, new_uid
+from ophyd.utils import make_dir_tree
+
+from ..utils import get_beam_stats
+
+
+class HDF5Handler(HandlerBase):
+    specs = {"HDF5"}
+
+    def __init__(self, filename):
+        self._name = filename
+
+    def __call__(self, frame):
+        with h5py.File(self._name, "r") as f:
+            entry = f["/entry/image"]
+            return entry[frame]
+
+
+class ExternalFileReference(Signal):
+    """
+    A pure software Signal that describe()s an image in an external file.
+    """
+
+    def describe(self):
+        resource_document_data = super().describe()
+        resource_document_data[self.name].update(
+            dict(
+                external="FILESTORE:",
+                dtype="array",
+            )
+        )
+        return resource_document_data
+
+
+class Detector(Device):
+    sum = Cpt(Signal, kind="hinted")
+    max = Cpt(Signal, kind="normal")
+    area = Cpt(Signal, kind="normal")
+    cen_x = Cpt(Signal, kind="hinted")
+    cen_y = Cpt(Signal, kind="hinted")
+    wid_x = Cpt(Signal, kind="hinted")
+    wid_y = Cpt(Signal, kind="hinted")
+    image = Cpt(ExternalFileReference, kind="normal")
+    image_shape = Cpt(Signal, value=(300, 400), kind="normal")
+    noise = Cpt(Signal, kind="normal")
+
+    def __init__(self, root_dir: str = "/tmp/blop/sim", verbose: bool = True, noise: bool = True, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        _ = make_dir_tree(datetime.now().year, base_path=root_dir)
+
+        self._root_dir = root_dir
+        self._verbose = verbose
+
+        # Used for the emulated cameras only.
+        self._img_dir = None
+
+        # Resource/datum docs related variables.
+        self._asset_docs_cache = deque()
+        self._resource_document = None
+        self._datum_factory = None
+
+        self.noise.put(noise)
+
+    def trigger(self):
+        super().trigger()
+        raw_image = self.generate_beam(noise=self.noise.get())
+
+        current_frame = next(self._counter)
+
+        self._dataset.resize((current_frame + 1, *self.image_shape.get()))
+
+        self._dataset[current_frame, :, :] = raw_image
+
+        datum_document = self._datum_factory(datum_kwargs={"frame": current_frame})
+        self._asset_docs_cache.append(("datum", datum_document))
+
+        stats = get_beam_stats(raw_image)
+        self.image.put(datum_document["datum_id"])
+
+        for attr in ["max", "sum", "cen_x", "cen_y", "wid_x", "wid_y"]:
+            getattr(self, attr).put(stats[attr])
+
+        super().trigger()
+
+        return NullStatus()
+
+    def stage(self):
+        super().stage()
+        date = datetime.now()
+        self._assets_dir = date.strftime("%Y/%m/%d")
+        data_file = f"{new_uid()}.h5"
+
+        self._resource_document, self._datum_factory, _ = compose_resource(
+            start={"uid": "needed for compose_resource() but will be discarded"},
+            spec="HDF5",
+            root=self._root_dir,
+            resource_path=str(Path(self._assets_dir) / Path(data_file)),
+            resource_kwargs={},
+        )
+
+        self._data_file = str(Path(self._resource_document["root"]) / Path(self._resource_document["resource_path"]))
+
+        # now discard the start uid, a real one will be added later
+        self._resource_document.pop("run_start")
+        self._asset_docs_cache.append(("resource", self._resource_document))
+
+        self._h5file_desc = h5py.File(self._data_file, "x")
+        group = self._h5file_desc.create_group("/entry")
+        self._dataset = group.create_dataset(
+            "image",
+            data=np.full(fill_value=np.nan, shape=(1, *self.image_shape.get())),
+            maxshape=(None, *self.image_shape.get()),
+            chunks=(1, *self.image_shape.get()),
+            dtype="float64",
+            compression="lzf",
+        )
+        self._counter = itertools.count()
+
+    def unstage(self):
+        super().unstage()
+        del self._dataset
+        self._h5file_desc.close()
+        self._resource_document = None
+        self._datum_factory = None
+
+    def collect_asset_docs(self):
+        items = list(self._asset_docs_cache)
+        self._asset_docs_cache.clear()
+        for item in items:
+            yield item
+
+    def generate_beam(self, noise: bool = True):
+        nx, ny = self.image_shape.get()
+
+        x = np.linspace(-10, 10, ny)
+        y = np.linspace(-10, 10, nx)
+        X, Y = np.meshgrid(x, y)
+
+        x0 = self.parent.kbh_ush.get() - self.parent.kbh_dsh.get()
+        y0 = self.parent.kbv_usv.get() - self.parent.kbv_dsv.get()
+        x_width = np.sqrt(0.5 + 5e-1 * (self.parent.kbh_ush.get() + self.parent.kbh_dsh.get() - 1) ** 2)
+        y_width = np.sqrt(0.25 + 5e-1 * (self.parent.kbv_usv.get() + self.parent.kbv_dsv.get() - 2) ** 2)
+
+        beam = np.exp(-0.5 * (((X - x0) / x_width) ** 4 + ((Y - y0) / y_width) ** 4)) / (
+            np.sqrt(2 * np.pi) * x_width * y_width
+        )
+
+        mask = X > self.parent.ssa_inboard.get()
+        mask &= X < self.parent.ssa_outboard.get()
+        mask &= Y > self.parent.ssa_lower.get()
+        mask &= Y < self.parent.ssa_upper.get()
+        mask = sp.ndimage.gaussian_filter(mask.astype(float), sigma=1)
+
+        image = beam * mask
+
+        if noise:
+            kx = np.fft.fftfreq(n=len(x), d=0.1)
+            ky = np.fft.fftfreq(n=len(y), d=0.1)
+            KX, KY = np.meshgrid(kx, ky)
+
+            power_spectrum = 1 / (1e-2 + KX**2 + KY**2)
+
+            white_noise = 5e-3 * np.random.standard_normal(size=X.shape)
+            pink_noise = 5e-3 * np.real(np.fft.ifft2(power_spectrum * np.fft.fft2(np.random.standard_normal(size=X.shape))))
+            # background = 5e-3 * (X - Y) / X.max()
+
+            image += white_noise + pink_noise
+
+        return image
+
+
+class Beamline(Device):
+    det = Cpt(Detector)
+
+    kbh_ush = Cpt(Signal, kind="hinted")
+    kbh_dsh = Cpt(Signal, kind="hinted")
+    kbv_usv = Cpt(Signal, kind="hinted")
+    kbv_dsv = Cpt(Signal, kind="hinted")
+
+    ssa_inboard = Cpt(Signal, value=-5.0, kind="hinted")
+    ssa_outboard = Cpt(Signal, value=5.0, kind="hinted")
+    ssa_lower = Cpt(Signal, value=-5.0, kind="hinted")
+    ssa_upper = Cpt(Signal, value=5.0, kind="hinted")
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
diff --git a/src/blop/tests/test_sims.py b/src/blop/tests/test_sims.py
new file mode 100644
index 0000000..25a7e3b
--- /dev/null
+++ b/src/blop/tests/test_sims.py
@@ -0,0 +1,35 @@
+import numpy as np
+
+from blop import DOF, Agent, Objective
+from blop.sim import Beamline
+
+
+def test_kb_simulation(RE, db):
+    beamline = Beamline(name="bl")
+    beamline.det.noise.put(False)
+
+    dofs = [
+        DOF(description="KBV downstream", device=beamline.kbv_dsv, search_domain=(-5.0, 5.0)),
+        DOF(description="KBV upstream", device=beamline.kbv_usv, search_domain=(-5.0, 5.0)),
+        DOF(description="KBH downstream", device=beamline.kbh_dsh, search_domain=(-5.0, 5.0)),
+        DOF(description="KBH upstream", device=beamline.kbh_ush, search_domain=(-5.0, 5.0)),
+    ]
+
+    objectives = [
+        Objective(name="bl_det_sum", target="max", transform="log", trust_domain=(1, np.inf)),
+        Objective(name="bl_det_wid_x", target="min", transform="log"),  # , latent_groups=[("x1", "x2")]),
+        Objective(name="bl_det_wid_y", target="min", transform="log"),  # , latent_groups=[("x1", "x2")])]
+    ]
+
+    agent = Agent(
+        dofs=dofs,
+        objectives=objectives,
+        dets=[beamline.det],
+        verbose=True,
+        db=db,
+        tolerate_acquisition_errors=False,
+        train_every=3,
+    )
+
+    RE(agent.learn("qr", n=32))
+    RE(agent.learn("qei", n=4, iterations=4))
diff --git a/src/blop/utils/__init__.py b/src/blop/utils/__init__.py
index a25bf4f..7ff1c8b 100644
--- a/src/blop/utils/__init__.py
+++ b/src/blop/utils/__init__.py
@@ -5,6 +5,42 @@
 from ortools.constraint_solver import pywrapcp, routing_enums_pb2
 
 
+def get_beam_stats(image, threshold=0.5):
+    ny, nx = image.shape
+
+    fim = image.copy()
+    fim -= np.median(fim, axis=0)
+    fim -= np.median(fim, axis=1)[:, None]
+
+    fim = sp.ndimage.median_filter(fim, size=3)
+    fim = sp.ndimage.gaussian_filter(fim, sigma=1)
+
+    m = fim > threshold * fim.max()
+    area = m.sum()
+
+    cs_x = np.cumsum(m.sum(axis=0)) / area
+    cs_y = np.cumsum(m.sum(axis=1)) / area
+
+    q_min, q_max = [0.15865, 0.84135]  # one sigma
+    q_min, q_max = [0.05, 0.95]  # 90%
+
+    x_min, x_max = np.interp([q_min, q_max], cs_x, np.arange(nx))
+    y_min, y_max = np.interp([q_min, q_max], cs_y, np.arange(ny))
+
+    stats = {
+        "max": fim.max(),
+        "sum": fim.sum(),
+        "area": area,
+        "cen_x": (x_min + x_max) / 2,
+        "cen_y": (y_min + y_max) / 2,
+        "wid_x": x_max - x_min,
+        "wid_y": y_max - y_min,
+        "bbox": [[x_min, x_max, x_max, x_min, x_min], [y_min, y_min, y_max, y_max, y_min]],
+    }
+
+    return stats
+
+
 def cummax(x):
     return [np.nanmax(x[: i + 1]) for i in range(len(np.atleast_1d(x)))]
 
@@ -102,130 +138,3 @@ def get_movement_time(x, v_max, a):
     return 2 * np.sqrt(np.abs(x) / a) * (np.abs(x) < v_max**2 / a) + (np.abs(x) / v_max + v_max / a) * (
         np.abs(x) > v_max**2 / a
     )
-
-
-def get_principal_component_bounds(image, beam_prop=0.5):
-    """
-    Returns the bounding box in pixel units of an image, along with a goodness of fit parameter.
-    This should go off without a hitch as long as beam_prop is less than 1.
-    """
-
-    if image.sum() == 0:
-        return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
-
-    # print(f'{extent = }')
-
-    u, s, v = np.linalg.svd(image - image.mean())
-    separability = np.square(s[0]) / np.square(s).sum()
-
-    # q refers to "quantile"
-    q_min, q_max = 0.5 * (1 - beam_prop), 0.5 * (1 + beam_prop)
-
-    # these represent the cumulative proportion of the beam, as captured by the SVD.
-    cs_beam_x = np.cumsum(v[0]) / np.sum(v[0])
-    cs_beam_y = np.cumsum(u[:, 0]) / np.sum(u[:, 0])
-    cs_beam_x[0], cs_beam_y[0] = 0, 0  # change this lol
-
-    # the first coordinate where the cumulative beam is greater than the minimum
-    i_q_min_x = np.where((cs_beam_x[1:] > q_min) & (cs_beam_x[:-1] < q_min))[0][0]
-    i_q_min_y = np.where((cs_beam_y[1:] > q_min) & (cs_beam_y[:-1] < q_min))[0][0]
-
-    # the last coordinate where the cumulative beam is less than the maximum
-    i_q_max_x = np.where((cs_beam_x[1:] > q_max) & (cs_beam_x[:-1] < q_max))[0][-1]
-    i_q_max_y = np.where((cs_beam_y[1:] > q_max) & (cs_beam_y[:-1] < q_max))[0][-1]
-
-    # interpolate, so that we can go finer than one pixel. this quartet is the "bounding box", from 0 to 1.
-    # (let's make this more efficient later)
-    x_min = np.interp(q_min, cs_beam_x[[i_q_min_x, i_q_min_x + 1]], [i_q_min_x, i_q_min_x + 1])
-    x_max = np.interp(q_max, cs_beam_x[[i_q_max_x, i_q_max_x + 1]], [i_q_max_x, i_q_max_x + 1])
-    y_min = np.interp(q_min, cs_beam_y[[i_q_min_y, i_q_min_y + 1]], [i_q_min_y, i_q_min_y + 1])
-    y_max = np.interp(q_max, cs_beam_y[[i_q_max_y, i_q_max_y + 1]], [i_q_max_y, i_q_max_y + 1])
-
-    return (
-        x_min,
-        x_max,
-        y_min,
-        y_max,
-        separability,
-    )
-
-
-def get_beam_bounding_box(image, thresh=0.5):
-    """
-    Returns the bounding box in pixel units of an image, along with a goodness of fit parameter.
-    This should go off without a hitch as long as beam_prop is less than 1.
-    """
-
-    n_y, n_x = image.shape
-
-    if image.sum() == 0:
-        return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
-
-    # filter the image
-    zim = sp.ndimage.median_filter(image.astype(float), size=3)
-    zim -= np.median(zim, axis=0)
-    zim -= np.median(zim, axis=1)[:, None]
-
-    x_sum = zim.sum(axis=0)
-    y_sum = zim.sum(axis=1)
-
-    x_sum_min_val = thresh * x_sum.max()
-    y_sum_min_val = thresh * y_sum.max()
-
-    gtt_x = x_sum > x_sum_min_val
-    gtt_y = y_sum > y_sum_min_val
-
-    i_x_min_start = np.where(~gtt_x[:-1] & gtt_x[1:])[0][0]
-    i_x_max_start = np.where(gtt_x[:-1] & ~gtt_x[1:])[0][-1]
-    i_y_min_start = np.where(~gtt_y[:-1] & gtt_y[1:])[0][0]
-    i_y_max_start = np.where(gtt_y[:-1] & ~gtt_y[1:])[0][-1]
-
-    x_min = (
-        0
-        if gtt_x[0]
-        else np.interp(x_sum_min_val, x_sum[[i_x_min_start, i_x_min_start + 1]], [i_x_min_start, i_x_min_start + 1])
-    )
-    y_min = (
-        0
-        if gtt_y[0]
-        else np.interp(y_sum_min_val, y_sum[[i_y_min_start, i_y_min_start + 1]], [i_y_min_start, i_y_min_start + 1])
-    )
-    x_max = (
-        n_x - 2
-        if gtt_x[-1]
-        else np.interp(x_sum_min_val, x_sum[[i_x_max_start + 1, i_x_max_start]], [i_x_max_start + 1, i_x_max_start])
-    )
-    y_max = (
-        n_y - 2
-        if gtt_y[-1]
-        else np.interp(y_sum_min_val, y_sum[[i_y_max_start + 1, i_y_max_start]], [i_y_max_start + 1, i_y_max_start])
-    )
-
-    return (
-        x_min,
-        x_max,
-        y_min,
-        y_max,
-    )
-
-
-def best_image_feedback(image):
-    n_y, n_x = image.shape
-
-    fim = sp.ndimage.median_filter(image, size=3)
-
-    masked_image = fim * (fim - fim.mean() > 0.5 * fim.ptp())
-
-    x_weight = masked_image.sum(axis=0)
-    y_weight = masked_image.sum(axis=1)
-
-    x = np.arange(n_x)
-    y = np.arange(n_y)
-
-    x0 = np.sum(x_weight * x) / np.sum(x_weight)
-    y0 = np.sum(y_weight * y) / np.sum(y_weight)
-
-    xw = 2 * np.sqrt((np.sum(x_weight * x**2) / np.sum(x_weight) - x0**2))
-    yw = 2 * np.sqrt((np.sum(y_weight * y**2) / np.sum(y_weight) - y0**2))
-
-    return x0, xw, y0, yw

From 4e1db58896c7bdd370453cecacb0a0c240d57f5c Mon Sep 17 00:00:00 2001
From: Thomas Morris <thomasmorris@princeton.edu>
Date: Sun, 7 Jul 2024 13:14:12 -0400
Subject: [PATCH 2/9] revert numpy to older version

---
 .github/workflows/testing.yml |   4 +-
 pyproject.toml                |   2 +-
 src/blop/agent.py             |  72 +++++++----
 src/blop/plotting.py          |   8 +-
 src/blop/utils/__init__.py    |   4 +-
 src/blop/utils/misc.py        | 227 ----------------------------------
 6 files changed, 58 insertions(+), 259 deletions(-)
 delete mode 100644 src/blop/utils/misc.py

diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
index 55516cd..4c964f4 100644
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -4,8 +4,8 @@ on:
   push:
   pull_request:
   workflow_dispatch:
-  # schedule:
-  #   - cron: '00 4 * * *'  # daily at 4AM
+  schedule:
+    - cron: '00 6 * * *'  # daily at 6AM UTC
 
 jobs:
   run_tests:
diff --git a/pyproject.toml b/pyproject.toml
index 6b4bcb1..8e9e33c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,7 +22,7 @@ dependencies = [
     "gpytorch",
     "h5py",
     "matplotlib",
-    "numpy",
+    "numpy==1.26.4",
     "ophyd",
     "ortools",
     "scipy",
diff --git a/src/blop/agent.py b/src/blop/agent.py
index a7de1aa..2113e07 100644
--- a/src/blop/agent.py
+++ b/src/blop/agent.py
@@ -66,13 +66,14 @@ def __init__(
         dofs: Sequence[DOF],
         objectives: Sequence[Objective],
         db: Broker = None,
-        dets: Sequence[Signal] = [],
+        detectors: Sequence[Signal] = [],
         acquistion_plan=default_acquisition_plan,
         digestion: Callable = default_digestion_function,
         digestion_kwargs: dict = {},
         verbose: bool = False,
-        tolerate_acquisition_errors=False,
-        sample_center_on_init=False,
+        enforce_all_objectives_valid: bool = True,
+        tolerate_acquisition_errors: bool = False,
+        sample_center_on_init: bool = False,
         trigger_delay: float = 0,
         train_every: int = 1,
     ):
@@ -85,7 +86,7 @@ def __init__(
             The degrees of freedom that the agent can control, which determine the output of the model.
         objectives : iterable of Objective objects
             The objectives which the agent will try to optimize.
-        dets : iterable of ophyd objects
+        detectors : iterable of ophyd objects
             Detectors to trigger during acquisition.
         acquisition_plan : optional
             A plan that samples the beamline for some given inputs.
@@ -130,7 +131,7 @@ def __init__(
 
         self.db = db
 
-        self.dets = dets
+        self.detectors = detectors
         self.acquisition_plan = acquistion_plan
         self.digestion = digestion
         self.digestion_kwargs = digestion_kwargs
@@ -139,6 +140,8 @@ def __init__(
 
         self.tolerate_acquisition_errors = tolerate_acquisition_errors
 
+        self.enforce_all_objectives_valid = enforce_all_objectives_valid
+
         self.train_every = train_every
         self.trigger_delay = trigger_delay
         self.sample_center_on_init = sample_center_on_init
@@ -498,7 +501,7 @@ def acquire(self, points):
             uid = yield from self.acquisition_plan(
                 acquisition_dofs,
                 points,
-                [*self.dets, *self.dofs.devices],
+                [*self.detectors, *self.dofs.devices],
                 delay=self.trigger_delay,
             )
             products = self.digestion(self.db[uid].table(), **self.digestion_kwargs)
@@ -584,8 +587,9 @@ def fitness_model(self):
     @property
     def evaluated_constraints(self):
         constraint_objectives = self.objectives(kind="constraint")
+        raw_targets_dict = self.raw_targets()
         if len(constraint_objectives):
-            return torch.cat([obj.constrain(self.raw_targets(obj.name)) for obj in constraint_objectives], dim=-1)
+            return torch.cat([obj.constrain(raw_targets_dict[obj.name]) for obj in constraint_objectives], dim=-1)
         else:
             return torch.ones(size=(len(self.table), 0), dtype=torch.bool)
 
@@ -610,7 +614,9 @@ def scalarized_fitnesses(self, weights="default", constrained=True):
         """
         fitness_objs = self.objectives(kind="fitness")
         if len(fitness_objs) >= 1:
-            f = self.fitness_scalarization(weights=weights).evaluate(self.train_targets(active=True, kind="fitness"))
+            f = self.fitness_scalarization(weights=weights).evaluate(
+                self.train_targets(active=True, kind="fitness", concatenate=True)
+            )
             f = torch.where(f.isnan(), -np.inf, f)  # remove all nans
         else:
             f = torch.zeros(len(self.table), dtype=torch.double)  # if there are no fitnesses, use a constant dummy fitness
@@ -632,7 +638,7 @@ def pareto_mask(self):
         Returns a mask of all points that satisfy all constraints and are Pareto efficient.
         A point is Pareto efficient if it is there is no other point that is better at every objective.
         """
-        Y = self.train_targets(active=True, kind="fitness")
+        Y = self.train_targets(active=True, kind="fitness", concatenate=True)
 
         # nuke the bad points
         Y[~self.evaluated_constraints.all(axis=-1)] = -np.inf
@@ -650,12 +656,12 @@ def pareto_front(self):
 
     @property
     def min_ref_point(self):
-        y = self.train_targets()[:, self.objectives.type == "fitness"]
+        y = self.train_targets(concatenate=True)[:, self.objectives.type == "fitness"]
         return y[y.argmax(axis=0)].min(axis=0).values
 
     @property
     def random_ref_point(self):
-        return self.train_targets(active=True, kind="fitness")[self.argmax_best_f(weights="random")]
+        return self.train_targets(active=True, kind="fitness", concatenate=True)[self.argmax_best_f(weights="random")]
 
     @property
     def all_objectives_valid(self):
@@ -687,7 +693,7 @@ def _construct_model(self, obj, skew_dims=None):
         outcome_transform = botorch.models.transforms.outcome.Standardize(m=1)  # , batch_shape=torch.Size((1,)))
 
         train_inputs = self.train_inputs(active=True)
-        train_targets = self.train_targets(obj.name)
+        train_targets = self.train_targets()[obj.name].unsqueeze(-1)
 
         inputs_are_trusted = ~torch.isnan(train_inputs).any(axis=1)
         targets_are_trusted = ~torch.isnan(train_targets).any(axis=1)
@@ -935,24 +941,42 @@ def raw_targets(self, index=None, **subset_kwargs):
         """
         Get the raw, untransformed inputs for an objective (or for a subset).
         """
-        if index is None:
-            return torch.cat([self.raw_targets(index=obj.name) for obj in self.objectives(**subset_kwargs)], dim=-1)
-        return torch.tensor(self.table.loc[:, self.objectives[index].name].values, dtype=torch.double).unsqueeze(-1)
+        values = {}
 
-    def train_targets(self, index=None, **subset_kwargs):
+        for obj in self.objectives(**subset_kwargs):
+            # return torch.cat([self.raw_targets(index=obj.name) for obj in self.objectives(**subset_kwargs)], dim=-1)
+            values[obj.name] = torch.tensor(self.table.loc[:, obj.name].values, dtype=torch.double)
+
+        return values
+
+    def train_targets(self, concatenate=False, **subset_kwargs):
         """Returns the values associated with an objective name."""
 
-        if index is None:
-            return torch.cat([self.train_targets(obj.name) for obj in self.objectives(**subset_kwargs)], dim=-1)
+        targets_dict = {}
+        raw_targets_dict = self.raw_targets(**subset_kwargs)
+
+        for obj in self.objectives(**subset_kwargs):
+            y = raw_targets_dict[obj.name]
+
+            # check that targets values are inside acceptable values
+            valid = (y >= obj._trust_domain[0]) & (y <= obj._trust_domain[1])
+            y = torch.where(valid, y, np.nan)
+
+            targets_dict[obj.name] = obj._transform(y)
+
+        if self.enforce_all_objectives_valid:
+            all_valid_mask = True
+
+            for name, values in targets_dict.items():
+                all_valid_mask &= ~values.isnan()
 
-        obj = self.objectives[index]
-        raw_targets = self.raw_targets(index=index, **subset_kwargs)
+            for name in targets_dict.keys():
+                targets_dict[name] = targets_dict[name].where(all_valid_mask, np.nan)
 
-        # check that targets values are inside acceptable values
-        valid = (raw_targets >= obj._trust_domain[0]) & (raw_targets <= obj._trust_domain[1])
-        raw_targets = torch.where(valid, raw_targets, np.nan)
+        if concatenate:
+            return torch.cat([values.unsqueeze(-1) for values in targets_dict.values()], axis=-1)
 
-        return obj._transform(raw_targets)
+        return targets_dict
 
     @property
     def best(self):
diff --git a/src/blop/plotting.py b/src/blop/plotting.py
index 5492a65..91b7ca7 100644
--- a/src/blop/plotting.py
+++ b/src/blop/plotting.py
@@ -33,7 +33,7 @@ def _plot_fitness_objs_one_dof(agent, size=16, lw=1e0):
     test_model_inputs = agent.dofs(active=True).transform(test_inputs)
 
     for obj_index, obj in enumerate(fitness_objs):
-        obj_values = agent.train_targets(obj.name).squeeze(-1).numpy()
+        obj_values = agent.train_targets()(obj.name).numpy()
 
         color = DEFAULT_COLOR_LIST[obj_index]
 
@@ -84,7 +84,7 @@ def _plot_constraint_objs_one_dof(agent, size=16, lw=1e0):
         val_ax = agent.obj_axes[obj_index, 0]
         con_ax = agent.obj_axes[obj_index, 1]
 
-        obj_values = agent.train_targets(obj.name).squeeze(-1).numpy()
+        obj_values = agent.train_targets()[obj.name].numpy()
 
         color = DEFAULT_COLOR_LIST[obj_index]
 
@@ -158,7 +158,7 @@ def _plot_objs_many_dofs(agent, axes=(0, 1), shading="nearest", cmap=DEFAULT_COL
     model_inputs = agent.dofs(active=True).transform(test_inputs)
 
     for obj_index, obj in enumerate(agent.objectives):
-        targets = agent.train_targets(obj.name).squeeze(-1).numpy()
+        targets = agent.train_targets()[obj.name].numpy()
 
         values = obj._untransform(targets)
 
@@ -547,7 +547,7 @@ def _plot_pareto_front(agent, obj_indices=(0, 1)):
 
     fig, ax = plt.subplots(1, 1, figsize=(6, 6))
 
-    y = agent.train_targets(kind="fitness")
+    y = agent.train_targets(kind="fitness", concatenate=True)
 
     pareto_mask = agent.pareto_mask
     constraint = agent.evaluated_constraints.all(axis=-1)
diff --git a/src/blop/utils/__init__.py b/src/blop/utils/__init__.py
index 7ff1c8b..19d6226 100644
--- a/src/blop/utils/__init__.py
+++ b/src/blop/utils/__init__.py
@@ -4,6 +4,8 @@
 import torch
 from ortools.constraint_solver import pywrapcp, routing_enums_pb2
 
+from . import functions  # noqa
+
 
 def get_beam_stats(image, threshold=0.5):
     ny, nx = image.shape
@@ -90,7 +92,7 @@ def route(start_point, points, dim_weights=1):
     """
 
     total_points = np.r_[np.atleast_2d(start_point), points]
-    points_scale = total_points.ptp(axis=0)
+    points_scale = np.ptp(total_points, axis=0)
     dim_mask = points_scale > 0
 
     if dim_mask.sum() == 0:
diff --git a/src/blop/utils/misc.py b/src/blop/utils/misc.py
deleted file mode 100644
index 6f3bd85..0000000
--- a/src/blop/utils/misc.py
+++ /dev/null
@@ -1,227 +0,0 @@
-import botorch
-import numpy as np
-import scipy as sp
-import torch
-from ortools.constraint_solver import pywrapcp, routing_enums_pb2
-
-
-def sobol_sampler(bounds, n, q=1):
-    """
-    Returns $n$ quasi-randomly sampled points within the bounds (a 2 by d tensor)
-    and batch size $q$
-    """
-    return botorch.utils.sampling.draw_sobol_samples(bounds, n=n, q=q)
-
-
-def normalized_sobol_sampler(n, d):
-    """
-    Returns $n$ quasi-randomly sampled points in the [0,1]^d hypercube
-    """
-    normalized_bounds = torch.outer(torch.tensor([0, 1]), torch.ones(d))
-    return sobol_sampler(normalized_bounds, n=n, q=1)
-
-
-def estimate_root_indices(x):
-    # or, indices_before_sign_changes
-    i_whole = np.where(np.sign(x[1:]) != np.sign(x[:-1]))[0]
-    i_part = 1 - x[i_whole + 1] / (x[i_whole + 1] - x[i_whole])
-    return i_whole + i_part
-
-
-def _fast_psd_inverse(M):
-    """
-    About twice as fast as np.linalg.inv for large, PSD matrices.
-    """
-    cholesky, dpotrf_info = sp.linalg.lapack.dpotrf(M)
-    invM, dpotri_info = sp.linalg.lapack.dpotri(cholesky)
-    return np.where(invM, invM, invM.T)
-
-
-def mprod(*M):
-    res = M[0]
-    for m in M[1:]:
-        res = np.matmul(res, m)
-    return res
-
-
-def route(start_point, points):
-    """
-    Returns the indices of the most efficient way to visit `points`, starting from `start_point`.
-    """
-
-    total_points = np.r_[np.atleast_2d(start_point), points]
-    points_scale = total_points.ptp(axis=0)
-    dim_mask = points_scale > 0
-
-    if dim_mask.sum() == 0:
-        return np.arange(len(points))
-
-    normalized_points = (total_points - total_points.min(axis=0))[:, dim_mask] / points_scale[dim_mask]
-
-    delay_matrix = np.sqrt(np.square(normalized_points[:, None, :] - normalized_points[None, :, :]).sum(axis=-1))
-    delay_matrix = (1e4 * delay_matrix).astype(int)  # it likes integers idk
-
-    manager = pywrapcp.RoutingIndexManager(len(total_points), 1, 0)  # number of depots, number of salesmen, starting index
-    routing = pywrapcp.RoutingModel(manager)
-
-    def delay_callback(from_index, to_index):
-        to_node = manager.IndexToNode(to_index)
-        if to_node == 0:
-            return 0  # it is free to return to the depot from anywhere; we just won't do it
-        from_node = manager.IndexToNode(from_index)
-        return delay_matrix[from_node][to_node]
-
-    transit_callback_index = routing.RegisterTransitCallback(delay_callback)
-    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
-    search_parameters = pywrapcp.DefaultRoutingSearchParameters()
-    search_parameters.first_solution_strategy = routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
-
-    solution = routing.SolveWithParameters(search_parameters)
-
-    index = routing.Start(0)
-    route_indices, route_delays = [0], []
-    while not routing.IsEnd(index):
-        previous_index = index
-        index = solution.Value(routing.NextVar(index))
-        route_delays.append(routing.GetArcCostForVehicle(previous_index, index, 0))
-        route_indices.append(index)
-
-    # omit the first and last indices, which correspond to the start
-    return np.array(route_indices)[1:-1] - 1
-
-
-def get_movement_time(x, v_max, a):
-    """
-    How long does it take an object to go distance $x$ with acceleration $a$ and maximum velocity $v_max$?
-    That's what this function answers.
-    """
-    return 2 * np.sqrt(np.abs(x) / a) * (np.abs(x) < v_max**2 / a) + (np.abs(x) / v_max + v_max / a) * (
-        np.abs(x) > v_max**2 / a
-    )
-
-
-def get_principal_component_bounds(image, beam_prop=0.5):
-    """
-    Returns the bounding box in pixel units of an image, along with a goodness of fit parameter.
-    This should go off without a hitch as long as beam_prop is less than 1.
-    """
-
-    if image.sum() == 0:
-        return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
-
-    # print(f'{extent = }')
-
-    u, s, v = np.linalg.svd(image - image.mean())
-    separability = np.square(s[0]) / np.square(s).sum()
-
-    # q refers to "quantile"
-    q_min, q_max = 0.5 * (1 - beam_prop), 0.5 * (1 + beam_prop)
-
-    # these represent the cumulative proportion of the beam, as captured by the SVD.
-    cs_beam_x = np.cumsum(v[0]) / np.sum(v[0])
-    cs_beam_y = np.cumsum(u[:, 0]) / np.sum(u[:, 0])
-    cs_beam_x[0], cs_beam_y[0] = 0, 0  # change this lol
-
-    # the first coordinate where the cumulative beam is greater than the minimum
-    i_q_min_x = np.where((cs_beam_x[1:] > q_min) & (cs_beam_x[:-1] < q_min))[0][0]
-    i_q_min_y = np.where((cs_beam_y[1:] > q_min) & (cs_beam_y[:-1] < q_min))[0][0]
-
-    # the last coordinate where the cumulative beam is less than the maximum
-    i_q_max_x = np.where((cs_beam_x[1:] > q_max) & (cs_beam_x[:-1] < q_max))[0][-1]
-    i_q_max_y = np.where((cs_beam_y[1:] > q_max) & (cs_beam_y[:-1] < q_max))[0][-1]
-
-    # interpolate, so that we can go finer than one pixel. this quartet is the "bounding box", from 0 to 1.
-    # (let's make this more efficient later)
-    x_min = np.interp(q_min, cs_beam_x[[i_q_min_x, i_q_min_x + 1]], [i_q_min_x, i_q_min_x + 1])
-    x_max = np.interp(q_max, cs_beam_x[[i_q_max_x, i_q_max_x + 1]], [i_q_max_x, i_q_max_x + 1])
-    y_min = np.interp(q_min, cs_beam_y[[i_q_min_y, i_q_min_y + 1]], [i_q_min_y, i_q_min_y + 1])
-    y_max = np.interp(q_max, cs_beam_y[[i_q_max_y, i_q_max_y + 1]], [i_q_max_y, i_q_max_y + 1])
-
-    return (
-        x_min,
-        x_max,
-        y_min,
-        y_max,
-        separability,
-    )
-
-
-def get_beam_bounding_box(image, thresh=0.5):
-    """
-    Returns the bounding box in pixel units of an image, along with a goodness of fit parameter.
-    This should go off without a hitch as long as beam_prop is less than 1.
-    """
-
-    n_y, n_x = image.shape
-
-    if image.sum() == 0:
-        return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
-
-    # filter the image
-    zim = sp.ndimage.median_filter(image.astype(float), size=3)
-    zim -= np.median(zim, axis=0)
-    zim -= np.median(zim, axis=1)[:, None]
-
-    x_sum = zim.sum(axis=0)
-    y_sum = zim.sum(axis=1)
-
-    x_sum_min_val = thresh * x_sum.max()
-    y_sum_min_val = thresh * y_sum.max()
-
-    gtt_x = x_sum > x_sum_min_val
-    gtt_y = y_sum > y_sum_min_val
-
-    i_x_min_start = np.where(~gtt_x[:-1] & gtt_x[1:])[0][0]
-    i_x_max_start = np.where(gtt_x[:-1] & ~gtt_x[1:])[0][-1]
-    i_y_min_start = np.where(~gtt_y[:-1] & gtt_y[1:])[0][0]
-    i_y_max_start = np.where(gtt_y[:-1] & ~gtt_y[1:])[0][-1]
-
-    x_min = (
-        0
-        if gtt_x[0]
-        else np.interp(x_sum_min_val, x_sum[[i_x_min_start, i_x_min_start + 1]], [i_x_min_start, i_x_min_start + 1])
-    )
-    y_min = (
-        0
-        if gtt_y[0]
-        else np.interp(y_sum_min_val, y_sum[[i_y_min_start, i_y_min_start + 1]], [i_y_min_start, i_y_min_start + 1])
-    )
-    x_max = (
-        n_x - 2
-        if gtt_x[-1]
-        else np.interp(x_sum_min_val, x_sum[[i_x_max_start + 1, i_x_max_start]], [i_x_max_start + 1, i_x_max_start])
-    )
-    y_max = (
-        n_y - 2
-        if gtt_y[-1]
-        else np.interp(y_sum_min_val, y_sum[[i_y_max_start + 1, i_y_max_start]], [i_y_max_start + 1, i_y_max_start])
-    )
-
-    return (
-        x_min,
-        x_max,
-        y_min,
-        y_max,
-    )
-
-
-def best_image_feedback(image):
-    n_y, n_x = image.shape
-
-    fim = sp.ndimage.median_filter(image, size=3)
-
-    masked_image = fim * (fim - fim.mean() > 0.5 * fim.ptp())
-
-    x_weight = masked_image.sum(axis=0)
-    y_weight = masked_image.sum(axis=1)
-
-    x = np.arange(n_x)
-    y = np.arange(n_y)
-
-    x0 = np.sum(x_weight * x) / np.sum(x_weight)
-    y0 = np.sum(y_weight * y) / np.sum(y_weight)
-
-    xw = 2 * np.sqrt((np.sum(x_weight * x**2) / np.sum(x_weight) - x0**2))
-    yw = 2 * np.sqrt((np.sum(y_weight * y**2) / np.sum(y_weight) - y0**2))
-
-    return x0, xw, y0, yw

From 388cc3bf76ce9a43bb62f692c6527fe271454f7c Mon Sep 17 00:00:00 2001
From: Thomas Morris <thomasmorris@princeton.edu>
Date: Sun, 7 Jul 2024 15:03:39 -0400
Subject: [PATCH 3/9] fix syntax in tests

---
 src/blop/agent.py            | 3 +--
 src/blop/tests/test_acqfs.py | 2 +-
 src/blop/tests/test_sims.py  | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/blop/agent.py b/src/blop/agent.py
index 2113e07..5df24a4 100644
--- a/src/blop/agent.py
+++ b/src/blop/agent.py
@@ -126,12 +126,11 @@ def __init__(
 
         self.dofs = DOFList(list(np.atleast_1d(dofs)))
         self.objectives = ObjectiveList(list(np.atleast_1d(objectives)))
+        self.detectors = list(np.atleast_1d(detectors))
 
         _validate_dofs_and_objs(self.dofs, self.objectives)
 
         self.db = db
-
-        self.detectors = detectors
         self.acquisition_plan = acquistion_plan
         self.digestion = digestion
         self.digestion_kwargs = digestion_kwargs
diff --git a/src/blop/tests/test_acqfs.py b/src/blop/tests/test_acqfs.py
index 3548d18..bc03155 100644
--- a/src/blop/tests/test_acqfs.py
+++ b/src/blop/tests/test_acqfs.py
@@ -7,7 +7,7 @@
 @pytest.mark.parametrize("agent", all_agents, indirect=True)
 def test_analytic_acqfs(agent, RE, db, acqf):
     agent.db = db
-    RE(agent.learn("qr", n=16))
+    RE(agent.learn("qr", n=4))
     RE(agent.learn(acqf, n=1))
     getattr(agent, acqf)
 
diff --git a/src/blop/tests/test_sims.py b/src/blop/tests/test_sims.py
index 25a7e3b..3f66e77 100644
--- a/src/blop/tests/test_sims.py
+++ b/src/blop/tests/test_sims.py
@@ -24,7 +24,7 @@ def test_kb_simulation(RE, db):
     agent = Agent(
         dofs=dofs,
         objectives=objectives,
-        dets=[beamline.det],
+        detectors=beamline.det,
         verbose=True,
         db=db,
         tolerate_acquisition_errors=False,

From b1845df8e339cb294d79e61c6a927d5e720d45f2 Mon Sep 17 00:00:00 2001
From: Thomas Morris <thomasmorris@princeton.edu>
Date: Sun, 7 Jul 2024 21:04:27 -0400
Subject: [PATCH 4/9] add simulation example to tutorials

---
 docs/source/tutorials/kb-mirrors.ipynb | 135 +++++++++++++++++++++++++
 pyproject.toml                         |   2 +-
 src/blop/agent.py                      |  44 ++++----
 src/blop/digestion/__init__.py         |  53 ++++++++++
 src/blop/sim/__init__.py               |   2 +-
 src/blop/sim/beamline.py               |  38 +------
 src/blop/sim/handlers.py               |  31 ++++++
 src/blop/tests/conftest.py             |   3 +
 src/blop/tests/test_agents.py          |   2 +-
 src/blop/tests/test_sims.py            |  14 ++-
 src/blop/utils/prepare_re_env.py       |   3 +
 11 files changed, 266 insertions(+), 61 deletions(-)
 create mode 100644 docs/source/tutorials/kb-mirrors.ipynb
 create mode 100644 src/blop/sim/handlers.py

diff --git a/docs/source/tutorials/kb-mirrors.ipynb b/docs/source/tutorials/kb-mirrors.ipynb
new file mode 100644
index 0000000..4fd488c
--- /dev/null
+++ b/docs/source/tutorials/kb-mirrors.ipynb
@@ -0,0 +1,135 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c1ac0e4b-d065-41af-97ec-b73bbc7dad7d",
+   "metadata": {},
+   "source": [
+    "# Hyperparameters\n",
+    "\n",
+    "This example simulates the alignment of a KB mirror endstation (with four degrees of freedom)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a1798fa-b251-409e-9d2d-097240372b03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%run -i /users/tom/blop/src/src/blop/utils/prepare_re_env.py --db-type=temp\n",
+    "bec.disable_plots()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0ca926ab-5859-49f3-a96a-73c401cc18e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from blop.sim import Beamline\n",
+    "\n",
+    "beamline = Beamline(name=\"bl\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "28ee6cfc-428a-4472-b503-bb072f939866",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from blop import DOF, Objective, Agent\n",
+    "from blop.digestion import beam_stats_digestion\n",
+    "\n",
+    "dofs = [\n",
+    "    DOF(description=\"KBV downstream\", device=beamline.kbv_dsv, search_domain=(-5.0, 5.0)),\n",
+    "    DOF(description=\"KBV upstream\", device=beamline.kbv_usv, search_domain=(-5.0, 5.0)),\n",
+    "    DOF(description=\"KBH downstream\", device=beamline.kbh_dsh, search_domain=(-5.0, 5.0)),\n",
+    "    DOF(description=\"KBH upstream\", device=beamline.kbh_ush, search_domain=(-5.0, 5.0)),\n",
+    "]\n",
+    "\n",
+    "objectives = [\n",
+    "    Objective(name=\"bl_det_sum\", target=\"max\", transform=\"log\", trust_domain=(200, np.inf)),\n",
+    "    Objective(name=\"bl_det_wid_x\", target=\"min\", transform=\"log\", latent_groups=[(\"bl_kbh_dsh\", \"bl_kbh_ush\")]),\n",
+    "    Objective(name=\"bl_det_wid_y\", target=\"min\", transform=\"log\", latent_groups=[(\"bl_kbv_dsv\", \"bl_kbv_usv\")]),\n",
+    "]\n",
+    "\n",
+    "agent = Agent(\n",
+    "    dofs=dofs,\n",
+    "    objectives=objectives,\n",
+    "    detectors=[beamline.det],\n",
+    "    digestion=beam_stats_digestion,\n",
+    "    digestion_kwargs={\"image_key\": \"bl_det_image\"},\n",
+    "    verbose=True,\n",
+    "    db=db,\n",
+    "    tolerate_acquisition_errors=False,\n",
+    "    enforce_all_objectives_valid=True,\n",
+    "    train_every=3,\n",
+    ")\n",
+    "\n",
+    "(uid,) = RE(agent.learn(\"qr\", n=32))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "491636be-1e04-45a2-a622-dc21e192d208",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "RE(agent.learn(\"qei\", n=4, iterations=4))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d6434fc9-682a-45fa-b100-465dfda3aff1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.imshow(agent.best.bl_det_image)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ef89c0cd-65be-4d1f-ab6d-c8a4ce251d7b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agent.plot_objectives(axes=(2, 3))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fb56db05-0fe5-46a5-8435-383f1e34f55d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pyproject.toml b/pyproject.toml
index 8e9e33c..0106599 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,7 +22,7 @@ dependencies = [
     "gpytorch",
     "h5py",
     "matplotlib",
-    "numpy==1.26.4",
+    "numpy<2",
     "ophyd",
     "ortools",
     "scipy",
diff --git a/src/blop/agent.py b/src/blop/agent.py
index 5df24a4..c6413e6 100644
--- a/src/blop/agent.py
+++ b/src/blop/agent.py
@@ -145,13 +145,17 @@ def __init__(
         self.trigger_delay = trigger_delay
         self.sample_center_on_init = sample_center_on_init
 
-        self.table = pd.DataFrame()
+        self._table = pd.DataFrame()
 
         self.initialized = False
         self.a_priori_hypers = None
 
         self.n_last_trained = 0
 
+    @property
+    def table(self):
+        return self._table
+
     def unpack_run(self):
         return
 
@@ -174,7 +178,7 @@ def refresh(self):
         self._train_all_models()
 
     def redigest(self):
-        self.table = self.digestion(self.table, **self.digestion_kwargs)
+        self._table = self.digestion(self._table, **self.digestion_kwargs)
 
     def sample(self, n: int = DEFAULT_MAX_SAMPLES, method: str = "quasi-random") -> torch.Tensor:
         """
@@ -353,8 +357,8 @@ def tell(
             raise ValueError("All supplies values must be the same length!")
 
         new_table = pd.DataFrame(data)
-        self.table = pd.concat([self.table, new_table]) if append else new_table
-        self.table.index = np.arange(len(self.table))
+        self._table = pd.concat([self._table, new_table]) if append else new_table
+        self._table.index = np.arange(len(self._table))
 
         if update_models:
             for obj in self.objectives(active=True):
@@ -503,7 +507,7 @@ def acquire(self, points):
                 [*self.detectors, *self.dofs.devices],
                 delay=self.trigger_delay,
             )
-            products = self.digestion(self.db[uid].table(), **self.digestion_kwargs)
+            products = self.digestion(self.db[uid].table(fill=True), **self.digestion_kwargs)
 
         except KeyboardInterrupt as interrupt:
             raise interrupt
@@ -523,12 +527,12 @@ def acquire(self, points):
 
     def load_data(self, data_file, append=True):
         new_table = pd.read_hdf(data_file, key="table")
-        self.table = pd.concat([self.table, new_table]) if append else new_table
+        self._table = pd.concat([self._table, new_table]) if append else new_table
         self.refresh()
 
     def reset(self):
         """Reset the agent."""
-        self.table = pd.DataFrame()
+        self._table = pd.DataFrame()
 
         for obj in self.objectives(active=True):
             if hasattr(obj, "model"):
@@ -590,7 +594,7 @@ def evaluated_constraints(self):
         if len(constraint_objectives):
             return torch.cat([obj.constrain(raw_targets_dict[obj.name]) for obj in constraint_objectives], dim=-1)
         else:
-            return torch.ones(size=(len(self.table), 0), dtype=torch.bool)
+            return torch.ones(size=(len(self._table), 0), dtype=torch.bool)
 
     def fitness_scalarization(self, weights="default"):
         fitness_objectives = self.objectives(active=True, kind="fitness")
@@ -618,7 +622,7 @@ def scalarized_fitnesses(self, weights="default", constrained=True):
             )
             f = torch.where(f.isnan(), -np.inf, f)  # remove all nans
         else:
-            f = torch.zeros(len(self.table), dtype=torch.double)  # if there are no fitnesses, use a constant dummy fitness
+            f = torch.zeros(len(self._table), dtype=torch.double)  # if there are no fitnesses, use a constant dummy fitness
         if constrained:
             # how many constraints are satisfied?
             c = self.evaluated_constraints.sum(axis=-1)
@@ -651,7 +655,7 @@ def pareto_front(self):
         """
         A subset of the data table containing only points on the Pareto front.
         """
-        return self.table.loc[self.pareto_mask.numpy()]
+        return self._table.loc[self.pareto_mask.numpy()]
 
     @property
     def min_ref_point(self):
@@ -747,7 +751,7 @@ def _train_all_models(self, **kwargs):
         if self.verbose:
             print(f"trained models in {ttime.monotonic() - t0:.01f} seconds")
 
-        self.n_last_trained = len(self.table)
+        self.n_last_trained = len(self._table)
 
     def _get_acquisition_function(self, identifier, return_metadata=False):
         """Returns a BoTorch acquisition function for a given identifier. Acquisition functions can be
@@ -813,7 +817,7 @@ def save_data(self, path="./data.h5"):
 
         save_dir, _ = os.path.split(path)
         pathlib.Path(save_dir).mkdir(parents=True, exist_ok=True)
-        self.table.to_hdf(path, key="table")
+        self._table.to_hdf(path, key="table")
 
     def forget(self, last=None, index=None, train=True):
         """
@@ -828,12 +832,12 @@ def forget(self, last=None, index=None, train=True):
         """
 
         if last is not None:
-            if last > len(self.table):
-                raise ValueError(f"Cannot forget last {last} data points (only {len(self.table)} samples have been taken).")
-            self.forget(index=self.table.index.values[-last:], train=train)
+            if last > len(self._table):
+                raise ValueError(f"Cannot forget last {last} data points (only {len(self._table)} samples have been taken).")
+            self.forget(index=self._table.index.values[-last:], train=train)
 
         elif index is not None:
-            self.table.drop(index=index, inplace=True)
+            self._table.drop(index=index, inplace=True)
             self._construct_all_models()
             if train:
                 self._train_all_models()
@@ -919,7 +923,7 @@ def raw_inputs(self, index=None, **subset_kwargs):
         """
         if index is None:
             return torch.cat([self.raw_inputs(dof.name) for dof in self.dofs(**subset_kwargs)], dim=-1)
-        return torch.tensor(self.table.loc[:, self.dofs[index].name].values, dtype=torch.double).unsqueeze(-1)
+        return torch.tensor(self._table.loc[:, self.dofs[index].name].values, dtype=torch.double).unsqueeze(-1)
 
     def train_inputs(self, index=None, **subset_kwargs):
         """A two-dimensional tensor of all DOF values."""
@@ -944,7 +948,7 @@ def raw_targets(self, index=None, **subset_kwargs):
 
         for obj in self.objectives(**subset_kwargs):
             # return torch.cat([self.raw_targets(index=obj.name) for obj in self.objectives(**subset_kwargs)], dim=-1)
-            values[obj.name] = torch.tensor(self.table.loc[:, obj.name].values, dtype=torch.double)
+            values[obj.name] = torch.tensor(self._table.loc[:, obj.name].values, dtype=torch.double)
 
         return values
 
@@ -980,12 +984,12 @@ def train_targets(self, concatenate=False, **subset_kwargs):
     @property
     def best(self):
         """Returns all data for the best point."""
-        return self.table.loc[self.argmax_best_f()]
+        return self._table.loc[self.argmax_best_f()]
 
     @property
     def best_inputs(self):
         """Returns the value of each DOF at the best point."""
-        return self.table.loc[self.argmax_best_f(), self.dofs.names].to_dict()
+        return self._table.loc[self.argmax_best_f(), self.dofs.names].to_dict()
 
     def go_to(self, **positions):
         """Set all settable DOFs to a given position. DOF/value pairs should be supplied as kwargs, e.g. as
diff --git a/src/blop/digestion/__init__.py b/src/blop/digestion/__init__.py
index bdfd1b3..45f83b2 100644
--- a/src/blop/digestion/__init__.py
+++ b/src/blop/digestion/__init__.py
@@ -1,5 +1,58 @@
+import numpy as np
 import pandas as pd
+import scipy as sp
+
+
+def get_beam_stats(image, threshold=0.5):
+    ny, nx = image.shape
+
+    fim = image.copy()
+    fim -= np.median(fim, axis=0)
+    fim -= np.median(fim, axis=1)[:, None]
+
+    fim = sp.ndimage.median_filter(fim, size=3)
+    fim = sp.ndimage.gaussian_filter(fim, sigma=1)
+
+    m = fim > threshold * fim.max()
+    area = m.sum()
+
+    cs_x = np.cumsum(m.sum(axis=0)) / area
+    cs_y = np.cumsum(m.sum(axis=1)) / area
+
+    q_min, q_max = [0.15865, 0.84135]  # one sigma
+    q_min, q_max = [0.05, 0.95]  # 90%
+
+    x_min, x_max = np.interp([q_min, q_max], cs_x, np.arange(nx))
+    y_min, y_max = np.interp([q_min, q_max], cs_y, np.arange(ny))
+
+    stats = {
+        "max": fim.max(),
+        "sum": fim.sum(),
+        "area": area,
+        "cen_x": (x_min + x_max) / 2,
+        "cen_y": (y_min + y_max) / 2,
+        "wid_x": x_max - x_min,
+        "wid_y": y_max - y_min,
+        "x_min": x_min,
+        "x_max": x_max,
+        "y_min": y_min,
+        "y_max": y_max,
+        "bbox": [[x_min, x_max, x_max, x_min, x_min], [y_min, y_min, y_max, y_max, y_min]],
+    }
+
+    return stats
 
 
 def default_digestion_function(df: pd.DataFrame) -> pd.DataFrame:
     return df
+
+
+def beam_stats_digestion(df: pd.DataFrame, image_key, **kwargs) -> pd.DataFrame:
+    for index, entry in df.iterrows():
+        stats = get_beam_stats(entry.loc[image_key], **kwargs)
+
+        for attr, value in stats.items():
+            if attr not in ["bbox"]:
+                df.loc[index, attr] = value
+
+    return df
diff --git a/src/blop/sim/__init__.py b/src/blop/sim/__init__.py
index 70c7e45..5053a7d 100644
--- a/src/blop/sim/__init__.py
+++ b/src/blop/sim/__init__.py
@@ -1 +1 @@
-from .beamline import Beamline, Detector  # noqa
+from .beamline import Beamline, Detector, HDF5Handler  # noqa
diff --git a/src/blop/sim/beamline.py b/src/blop/sim/beamline.py
index a70b966..0994e8f 100644
--- a/src/blop/sim/beamline.py
+++ b/src/blop/sim/beamline.py
@@ -6,7 +6,6 @@
 import h5py
 import numpy as np
 import scipy as sp
-from area_detector_handlers.handlers import HandlerBase
 from event_model import compose_resource
 from ophyd import Component as Cpt
 from ophyd import Device, Signal
@@ -14,34 +13,7 @@
 from ophyd.utils import make_dir_tree
 
 from ..utils import get_beam_stats
-
-
-class HDF5Handler(HandlerBase):
-    specs = {"HDF5"}
-
-    def __init__(self, filename):
-        self._name = filename
-
-    def __call__(self, frame):
-        with h5py.File(self._name, "r") as f:
-            entry = f["/entry/image"]
-            return entry[frame]
-
-
-class ExternalFileReference(Signal):
-    """
-    A pure software Signal that describe()s an image in an external file.
-    """
-
-    def describe(self):
-        resource_document_data = super().describe()
-        resource_document_data[self.name].update(
-            dict(
-                external="FILESTORE:",
-                dtype="array",
-            )
-        )
-        return resource_document_data
+from .handlers import ExternalFileReference
 
 
 class Detector(Device):
@@ -151,8 +123,8 @@ def generate_beam(self, noise: bool = True):
 
         x0 = self.parent.kbh_ush.get() - self.parent.kbh_dsh.get()
         y0 = self.parent.kbv_usv.get() - self.parent.kbv_dsv.get()
-        x_width = np.sqrt(0.5 + 5e-1 * (self.parent.kbh_ush.get() + self.parent.kbh_dsh.get() - 1) ** 2)
-        y_width = np.sqrt(0.25 + 5e-1 * (self.parent.kbv_usv.get() + self.parent.kbv_dsv.get() - 2) ** 2)
+        x_width = np.sqrt(0.2 + 5e-1 * (self.parent.kbh_ush.get() + self.parent.kbh_dsh.get() - 1) ** 2)
+        y_width = np.sqrt(0.1 + 5e-1 * (self.parent.kbv_usv.get() + self.parent.kbv_dsv.get() - 2) ** 2)
 
         beam = np.exp(-0.5 * (((X - x0) / x_width) ** 4 + ((Y - y0) / y_width) ** 4)) / (
             np.sqrt(2 * np.pi) * x_width * y_width
@@ -173,8 +145,8 @@ def generate_beam(self, noise: bool = True):
 
             power_spectrum = 1 / (1e-2 + KX**2 + KY**2)
 
-            white_noise = 5e-3 * np.random.standard_normal(size=X.shape)
-            pink_noise = 5e-3 * np.real(np.fft.ifft2(power_spectrum * np.fft.fft2(np.random.standard_normal(size=X.shape))))
+            white_noise = 1e-3 * np.random.standard_normal(size=X.shape)
+            pink_noise = 1e-3 * np.real(np.fft.ifft2(power_spectrum * np.fft.fft2(np.random.standard_normal(size=X.shape))))
             # background = 5e-3 * (X - Y) / X.max()
 
             image += white_noise + pink_noise
diff --git a/src/blop/sim/handlers.py b/src/blop/sim/handlers.py
new file mode 100644
index 0000000..b95f310
--- /dev/null
+++ b/src/blop/sim/handlers.py
@@ -0,0 +1,31 @@
+import h5py
+from area_detector_handlers.handlers import HandlerBase
+from ophyd import Signal
+
+
+class HDF5Handler(HandlerBase):
+    specs = {"HDF5"}
+
+    def __init__(self, filename):
+        self._name = filename
+
+    def __call__(self, frame):
+        with h5py.File(self._name, "r") as f:
+            entry = f["/entry/image"]
+            return entry[frame]
+
+
+class ExternalFileReference(Signal):
+    """
+    A pure software Signal that describe()s an image in an external file.
+    """
+
+    def describe(self):
+        resource_document_data = super().describe()
+        resource_document_data[self.name].update(
+            dict(
+                external="FILESTORE:",
+                dtype="array",
+            )
+        )
+        return resource_document_data
diff --git a/src/blop/tests/conftest.py b/src/blop/tests/conftest.py
index 9de6513..84acb41 100644
--- a/src/blop/tests/conftest.py
+++ b/src/blop/tests/conftest.py
@@ -11,6 +11,7 @@
 from blop import DOF, Agent, Objective
 from blop.digestion.tests import chankong_and_haimes_digestion, sketchy_himmelblau_digestion
 from blop.dofs import BrownianMotion
+from blop.sim import HDF5Handler
 
 
 @pytest.fixture(scope="function")
@@ -23,6 +24,8 @@ def db():
     except Exception:
         pass
 
+    db.reg.register_handler("HDF5", HDF5Handler, overwrite=True)
+
     return db
 
 
diff --git a/src/blop/tests/test_agents.py b/src/blop/tests/test_agents.py
index 84f434c..598117d 100644
--- a/src/blop/tests/test_agents.py
+++ b/src/blop/tests/test_agents.py
@@ -10,7 +10,7 @@ def test_agent(agent, RE, db):
     """
 
     agent.db = db
-    RE(agent.learn("qr", n=16))
+    RE(agent.learn("qr", n=32))
 
     best = agent.best
     assert [dof.name in best for dof in agent.dofs]
diff --git a/src/blop/tests/test_sims.py b/src/blop/tests/test_sims.py
index 3f66e77..5906ac1 100644
--- a/src/blop/tests/test_sims.py
+++ b/src/blop/tests/test_sims.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from blop import DOF, Agent, Objective
+from blop.digestion import beam_stats_digestion
 from blop.sim import Beamline
 
 
@@ -16,20 +17,23 @@ def test_kb_simulation(RE, db):
     ]
 
     objectives = [
-        Objective(name="bl_det_sum", target="max", transform="log", trust_domain=(1, np.inf)),
-        Objective(name="bl_det_wid_x", target="min", transform="log"),  # , latent_groups=[("x1", "x2")]),
-        Objective(name="bl_det_wid_y", target="min", transform="log"),  # , latent_groups=[("x1", "x2")])]
+        Objective(name="bl_det_sum", target="max", transform="log", trust_domain=(200, np.inf)),
+        Objective(name="bl_det_wid_x", target="min", transform="log", latent_groups=[("bl_kbh_dsh", "bl_kbh_ush")]),
+        Objective(name="bl_det_wid_y", target="min", transform="log", latent_groups=[("bl_kbv_dsv", "bl_kbv_usv")]),
     ]
 
     agent = Agent(
         dofs=dofs,
         objectives=objectives,
-        detectors=beamline.det,
+        detectors=[beamline.det],
+        digestion=beam_stats_digestion,
+        digestion_kwargs={"image_key": "bl_det_image"},
         verbose=True,
         db=db,
         tolerate_acquisition_errors=False,
+        enforce_all_objectives_valid=True,
         train_every=3,
     )
 
-    RE(agent.learn("qr", n=32))
+    RE(agent.learn("qr", n=16))
     RE(agent.learn("qei", n=4, iterations=4))
diff --git a/src/blop/utils/prepare_re_env.py b/src/blop/utils/prepare_re_env.py
index fdf715e..01c7484 100644
--- a/src/blop/utils/prepare_re_env.py
+++ b/src/blop/utils/prepare_re_env.py
@@ -12,6 +12,8 @@
 from databroker import Broker
 from ophyd.utils import make_dir_tree
 
+from blop.sim import HDF5Handler
+
 DEFAULT_DB_TYPE = "local"
 DEFAULT_ROOT_DIR = "/tmp/sirepo-bluesky-data"
 DEFAULT_ENV_TYPE = "stepper"
@@ -24,6 +26,7 @@ def re_env(db_type=DEFAULT_DB_TYPE, root_dir=DEFAULT_ROOT_DIR):
     RE.subscribe(bec)
 
     db = Broker.named(db_type)
+    db.reg.register_handler("HDF5", HDF5Handler, overwrite=True)
     try:
         databroker.assets.utils.install_sentinels(db.reg.config, version=1)
     except Exception:

From dbed267af05f43bcde81c190c87524566cdf0dfb Mon Sep 17 00:00:00 2001
From: Thomas Morris <thomasmorris@princeton.edu>
Date: Sun, 7 Jul 2024 21:30:37 -0400
Subject: [PATCH 5/9] add new notebook to tutorials index

---
 docs/source/tutorials.rst                   | 1 +
 docs/source/tutorials/hyperparameters.ipynb | 2 +-
 src/blop/sim/__init__.py                    | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst
index d146376..64bd580 100644
--- a/docs/source/tutorials.rst
+++ b/docs/source/tutorials.rst
@@ -8,3 +8,4 @@ Tutorials
    tutorials/hyperparameters.ipynb
    tutorials/pareto-fronts.ipynb
    tutorials/passive-dofs.ipynb
+   tutorials/kb-mirrors.ipynb
diff --git a/docs/source/tutorials/hyperparameters.ipynb b/docs/source/tutorials/hyperparameters.ipynb
index e2c03b5..f58f10c 100644
--- a/docs/source/tutorials/hyperparameters.ipynb
+++ b/docs/source/tutorials/hyperparameters.ipynb
@@ -153,7 +153,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.11.9"
   },
   "vscode": {
    "interpreter": {
diff --git a/src/blop/sim/__init__.py b/src/blop/sim/__init__.py
index 5053a7d..2a71bc9 100644
--- a/src/blop/sim/__init__.py
+++ b/src/blop/sim/__init__.py
@@ -1 +1,2 @@
-from .beamline import Beamline, Detector, HDF5Handler  # noqa
+from .beamline import Beamline, Detector  # noqa
+from .handlers import HDF5Handler  # noqa

From 4e24db4a507d1516413325f87bb254caa743790b Mon Sep 17 00:00:00 2001
From: Thomas Morris <thomasmorris@princeton.edu>
Date: Sun, 7 Jul 2024 21:40:38 -0400
Subject: [PATCH 6/9] do not hardcode paths

---
 docs/source/tutorials/kb-mirrors.ipynb | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/docs/source/tutorials/kb-mirrors.ipynb b/docs/source/tutorials/kb-mirrors.ipynb
index 4fd488c..397ea78 100644
--- a/docs/source/tutorials/kb-mirrors.ipynb
+++ b/docs/source/tutorials/kb-mirrors.ipynb
@@ -17,7 +17,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%run -i /users/tom/blop/src/src/blop/utils/prepare_re_env.py --db-type=temp\n",
+    "from blop.utils import prepare_re_env\n",
+    "\n",
+    "%run -i $prepare_re_env.__file__ --db-type=temp\n",
     "bec.disable_plots()"
    ]
   },
@@ -113,7 +115,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3.12.2 64-bit",
    "language": "python",
    "name": "python3"
   },
@@ -127,7 +129,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.12.2"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
+   }
   }
  },
  "nbformat": 4,

From e2f2b92b1d37c2762b5033b1e8338ac2b7fc6a55 Mon Sep 17 00:00:00 2001
From: Thomas Morris <thomasmorris@princeton.edu>
Date: Mon, 8 Jul 2024 13:45:07 -0400
Subject: [PATCH 7/9] add option to train models for inactive objectives

---
 src/blop/agent.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/blop/agent.py b/src/blop/agent.py
index c6413e6..8f32fb9 100644
--- a/src/blop/agent.py
+++ b/src/blop/agent.py
@@ -72,6 +72,7 @@ def __init__(
         digestion_kwargs: dict = {},
         verbose: bool = False,
         enforce_all_objectives_valid: bool = True,
+        model_inactive_objectives: bool = False,
         tolerate_acquisition_errors: bool = False,
         sample_center_on_init: bool = False,
         trigger_delay: float = 0,
@@ -137,8 +138,8 @@ def __init__(
 
         self.verbose = verbose
 
+        self.model_inactive_objectives = model_inactive_objectives
         self.tolerate_acquisition_errors = tolerate_acquisition_errors
-
         self.enforce_all_objectives_valid = enforce_all_objectives_valid
 
         self.train_every = train_every
@@ -361,7 +362,8 @@ def tell(
         self._table.index = np.arange(len(self._table))
 
         if update_models:
-            for obj in self.objectives(active=True):
+            objectives_to_model = self.objectives if self.model_inactive_objectives else self.objectives(active=True)
+            for obj in objectives_to_model:
                 t0 = ttime.monotonic()
 
                 cached_hypers = obj.model.state_dict() if hasattr(obj, "model") else None
@@ -737,13 +739,15 @@ def _construct_model(self, obj, skew_dims=None):
 
     def _construct_all_models(self):
         """Construct a model for each objective."""
-        for obj in self.objectives(active=True):
+        objectives_to_construct = self.objectives if self.model_inactive_objectives else self.objectives(active=True)
+        for obj in objectives_to_construct:
             self._construct_model(obj)
 
     def _train_all_models(self, **kwargs):
         """Fit all of the agent's models. All kwargs are passed to `botorch.fit.fit_gpytorch_mll`."""
         t0 = ttime.monotonic()
-        for obj in self.objectives(active=True):
+        objectives_to_train = self.objectives if self.model_inactive_objectives else self.objectives(active=True)
+        for obj in objectives_to_train:
             self._train_model(obj.model)
             if obj.validity_conjugate_model is not None:
                 self._train_model(obj.validity_conjugate_model)

From 6adc90e783c4d41137a815b3fdebf3d720dda875 Mon Sep 17 00:00:00 2001
From: Thomas Morris <thomasmorris@princeton.edu>
Date: Tue, 16 Jul 2024 11:01:10 -0400
Subject: [PATCH 8/9] construct models independently of the agent

---
 src/blop/agent.py           | 41 +++++++++++--------------------------
 src/blop/bayesian/models.py | 40 ++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 29 deletions(-)

diff --git a/src/blop/agent.py b/src/blop/agent.py
index 8f32fb9..48b3fea 100644
--- a/src/blop/agent.py
+++ b/src/blop/agent.py
@@ -29,6 +29,7 @@
 from . import plotting, utils
 from .bayesian import acquisition, models
 from .bayesian.acquisition import _construct_acqf, parse_acqf_identifier
+from .bayesian.models import construct_single_task_model, train_model
 
 # from .bayesian.transforms import TargetingPosteriorTransform
 from .digestion import default_digestion_function
@@ -256,7 +257,7 @@ def ask(self, acqf="qei", n=1, route=True, sequential=True, upsample=1, **acqf_k
             for obj in active_objs:
                 if obj.model_dofs != set(active_dofs.names):
                     self._construct_model(obj)
-                    self._train_model(obj.model)
+                    train_model(obj.model)
 
             if acqf_config["type"] == "analytic" and n > 1:
                 raise ValueError("Can't generate multiple design points for analytic acquisition functions.")
@@ -377,12 +378,12 @@ def tell(
                 if len(obj.model.train_targets) >= 4:
                     if train:
                         t0 = ttime.monotonic()
-                        self._train_model(obj.model)
+                        train_model(obj.model)
                         if self.verbose:
                             print(f"trained model '{obj.name}' in {1e3*(ttime.monotonic() - t0):.00f} ms")
 
                     else:
-                        self._train_model(obj.model, hypers=cached_hypers)
+                        train_model(obj.model, hypers=cached_hypers)
 
     def learn(
         self,
@@ -673,14 +674,6 @@ def all_objectives_valid(self):
         """A mask of whether all objectives are valid for each data point."""
         return ~torch.isnan(self.scalarized_fitnesses())
 
-    def _train_model(self, model, hypers=None, **kwargs):
-        """Fit all of the agent's models. All kwargs are passed to `botorch.fit.fit_gpytorch_mll`."""
-        if hypers is not None:
-            model.load_state_dict(hypers)
-        else:
-            botorch.fit.fit_gpytorch_mll(gpytorch.mlls.ExactMarginalLogLikelihood(model.likelihood, model), **kwargs)
-        model.trained = True
-
     def _construct_model(self, obj, skew_dims=None):
         """
         Construct an untrained model for an objective.
@@ -688,15 +681,6 @@ def _construct_model(self, obj, skew_dims=None):
 
         skew_dims = skew_dims if skew_dims is not None else self._latent_dim_tuples(obj.name)
 
-        likelihood = gpytorch.likelihoods.GaussianLikelihood(
-            noise_constraint=gpytorch.constraints.Interval(
-                torch.tensor(obj.min_noise),
-                torch.tensor(obj.max_noise),
-            ),
-        )
-
-        outcome_transform = botorch.models.transforms.outcome.Standardize(m=1)  # , batch_shape=torch.Size((1,)))
-
         train_inputs = self.train_inputs(active=True)
         train_targets = self.train_targets()[obj.name].unsqueeze(-1)
 
@@ -705,13 +689,12 @@ def _construct_model(self, obj, skew_dims=None):
 
         trusted = inputs_are_trusted & targets_are_trusted
 
-        obj.model = models.LatentGP(
-            train_inputs=train_inputs[trusted],
-            train_targets=train_targets[trusted],
-            likelihood=likelihood,
-            skew_dims=skew_dims,
-            input_transform=self.input_normalization,
-            outcome_transform=outcome_transform,
+        obj.model = construct_single_task_model(
+            X=train_inputs[trusted],
+            y=train_targets[trusted],
+            min_noise=obj.min_noise,
+            max_noise=obj.max_noise,
+            skew_dims=self._latent_dim_tuples()[obj.name],
         )
 
         obj.model_dofs = set(self.dofs(active=True).names)  # if these change, retrain the model on self.ask()
@@ -748,9 +731,9 @@ def _train_all_models(self, **kwargs):
         t0 = ttime.monotonic()
         objectives_to_train = self.objectives if self.model_inactive_objectives else self.objectives(active=True)
         for obj in objectives_to_train:
-            self._train_model(obj.model)
+            train_model(obj.model)
             if obj.validity_conjugate_model is not None:
-                self._train_model(obj.validity_conjugate_model)
+                train_model(obj.validity_conjugate_model)
 
         if self.verbose:
             print(f"trained models in {ttime.monotonic() - t0:.01f} seconds")
diff --git a/src/blop/bayesian/models.py b/src/blop/bayesian/models.py
index 59ca6e9..7ee3faf 100644
--- a/src/blop/bayesian/models.py
+++ b/src/blop/bayesian/models.py
@@ -1,3 +1,4 @@
+import botorch
 import gpytorch
 import torch
 from botorch.models.gp_regression import SingleTaskGP
@@ -5,6 +6,45 @@
 from . import kernels
 
 
+def train_model(model, hypers=None, **kwargs):
+    """Fit all of the agent's models. All kwargs are passed to `botorch.fit.fit_gpytorch_mll`."""
+    if hypers is not None:
+        model.load_state_dict(hypers)
+    else:
+        botorch.fit.fit_gpytorch_mll(gpytorch.mlls.ExactMarginalLogLikelihood(model.likelihood, model), **kwargs)
+    model.trained = True
+
+
+def construct_single_task_model(X, y, skew_dims=None, min_noise=1e-6, max_noise=1e0):
+    """
+    Construct an untrained model for an objective.
+    """
+
+    likelihood = gpytorch.likelihoods.GaussianLikelihood(
+        noise_constraint=gpytorch.constraints.Interval(
+            torch.tensor(min_noise),
+            torch.tensor(max_noise),
+        ),
+    )
+
+    input_transform = botorch.models.transforms.input.Normalize(d=X.shape[-1])
+    outcome_transform = botorch.models.transforms.outcome.Standardize(m=1)  # , batch_shape=torch.Size((1,)))
+
+    if not X.isfinite().all():
+        raise ValueError("'X' must not contain points that are inf or NaN.")
+    if not y.isfinite().all():
+        raise ValueError("'y' must not contain points that are inf or NaN.")
+
+    return LatentGP(
+        train_inputs=X,
+        train_targets=y,
+        likelihood=likelihood,
+        skew_dims=skew_dims,
+        input_transform=input_transform,
+        outcome_transform=outcome_transform,
+    )
+
+
 class LatentGP(SingleTaskGP):
     def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs):
         super().__init__(train_inputs, train_targets, *args, **kwargs)

From 98c008da0eec55a1f2ee5967daaf13ce3f5080fa Mon Sep 17 00:00:00 2001
From: Thomas Morris <thomasmorris@princeton.edu>
Date: Tue, 30 Jul 2024 15:08:31 -0400
Subject: [PATCH 9/9] clean up before merge

---
 src/blop/agent.py              |  6 ++---
 src/blop/digestion/__init__.py | 42 +---------------------------------
 src/blop/sim/beamline.py       |  3 +--
 src/blop/utils/__init__.py     |  4 ++++
 4 files changed, 8 insertions(+), 47 deletions(-)

diff --git a/src/blop/agent.py b/src/blop/agent.py
index 48b3fea..93f8bfd 100644
--- a/src/blop/agent.py
+++ b/src/blop/agent.py
@@ -67,7 +67,7 @@ def __init__(
         dofs: Sequence[DOF],
         objectives: Sequence[Objective],
         db: Broker = None,
-        detectors: Sequence[Signal] = [],
+        detectors: Sequence[Signal] = None,
         acquistion_plan=default_acquisition_plan,
         digestion: Callable = default_digestion_function,
         digestion_kwargs: dict = {},
@@ -128,7 +128,7 @@ def __init__(
 
         self.dofs = DOFList(list(np.atleast_1d(dofs)))
         self.objectives = ObjectiveList(list(np.atleast_1d(objectives)))
-        self.detectors = list(np.atleast_1d(detectors))
+        self.detectors = list(np.atleast_1d(detectors or []))
 
         _validate_dofs_and_objs(self.dofs, self.objectives)
 
@@ -838,8 +838,6 @@ def _set_hypers(self, hypers):
         self.validity_constraint.load_state_dict(hypers["validity_constraint"])
 
     def constraint(self, x):
-        # x = self.dofs(active=True).transform(x)
-
         p = torch.ones(x.shape[:-1])
         for obj in self.objectives(active=True):
             # if the targeting constraint is non-trivial
diff --git a/src/blop/digestion/__init__.py b/src/blop/digestion/__init__.py
index 45f83b2..0728b11 100644
--- a/src/blop/digestion/__init__.py
+++ b/src/blop/digestion/__init__.py
@@ -1,46 +1,6 @@
-import numpy as np
 import pandas as pd
-import scipy as sp
 
-
-def get_beam_stats(image, threshold=0.5):
-    ny, nx = image.shape
-
-    fim = image.copy()
-    fim -= np.median(fim, axis=0)
-    fim -= np.median(fim, axis=1)[:, None]
-
-    fim = sp.ndimage.median_filter(fim, size=3)
-    fim = sp.ndimage.gaussian_filter(fim, sigma=1)
-
-    m = fim > threshold * fim.max()
-    area = m.sum()
-
-    cs_x = np.cumsum(m.sum(axis=0)) / area
-    cs_y = np.cumsum(m.sum(axis=1)) / area
-
-    q_min, q_max = [0.15865, 0.84135]  # one sigma
-    q_min, q_max = [0.05, 0.95]  # 90%
-
-    x_min, x_max = np.interp([q_min, q_max], cs_x, np.arange(nx))
-    y_min, y_max = np.interp([q_min, q_max], cs_y, np.arange(ny))
-
-    stats = {
-        "max": fim.max(),
-        "sum": fim.sum(),
-        "area": area,
-        "cen_x": (x_min + x_max) / 2,
-        "cen_y": (y_min + y_max) / 2,
-        "wid_x": x_max - x_min,
-        "wid_y": y_max - y_min,
-        "x_min": x_min,
-        "x_max": x_max,
-        "y_min": y_min,
-        "y_max": y_max,
-        "bbox": [[x_min, x_max, x_max, x_min, x_min], [y_min, y_min, y_max, y_max, y_min]],
-    }
-
-    return stats
+from ..utils import get_beam_stats
 
 
 def default_digestion_function(df: pd.DataFrame) -> pd.DataFrame:
diff --git a/src/blop/sim/beamline.py b/src/blop/sim/beamline.py
index 0994e8f..b770473 100644
--- a/src/blop/sim/beamline.py
+++ b/src/blop/sim/beamline.py
@@ -111,8 +111,7 @@ def unstage(self):
     def collect_asset_docs(self):
         items = list(self._asset_docs_cache)
         self._asset_docs_cache.clear()
-        for item in items:
-            yield item
+        yield from items
 
     def generate_beam(self, noise: bool = True):
         nx, ny = self.image_shape.get()
diff --git a/src/blop/utils/__init__.py b/src/blop/utils/__init__.py
index 19d6226..92fabfb 100644
--- a/src/blop/utils/__init__.py
+++ b/src/blop/utils/__init__.py
@@ -37,6 +37,10 @@ def get_beam_stats(image, threshold=0.5):
         "cen_y": (y_min + y_max) / 2,
         "wid_x": x_max - x_min,
         "wid_y": y_max - y_min,
+        "x_min": x_min,
+        "x_max": x_max,
+        "y_min": y_min,
+        "y_max": y_max,
         "bbox": [[x_min, x_max, x_max, x_min, x_min], [y_min, y_min, y_max, y_max, y_min]],
     }