interactive.py

import os
import numpy as np
import pickle
from matplotlib import pyplot as plt
from matplotlib.widgets import Slider, CheckButtons

import matplotlib as mpl
mpl.use("TkAgg")

OPTIM_LOSS_MAP = {
    "Adam/MSE": "Adam",
    "SGD/MSE": "SGD",
    "Adam/MatNorm": "MNorm1e3 Adam"
}
OPTIM_LOSS = [*OPTIM_LOSS_MAP]
BATCH_SIZES = [0, 32, 128]
METRICS = ["TestLoss", "Loss", "DesignMatNorm", "RSquared"]
METRIC_LABELS = {
    "Loss": "Loss (Train)",
    "TestLoss": "Loss (Test)",
    "DesignMatNorm": r"$|| M^\dagger ||$",
    "RSquared": "$R^2$"
}

def get_name(name_base, optim_loss, batch_size):
        return f"results/{name_base} {OPTIM_LOSS_MAP[optim_loss]} {batch_size}"

def find_files(name_base):
    valid = set()
    for e in OPTIM_LOSS:
        for j in BATCH_SIZES:
            if os.path.exists(get_name(name_base, e, j) + ".npz"):
                valid.add((e, j))
    return valid

class AliasingVisualizer:
    specs: list[tuple[int, ...]]
    spec_indices: dict[tuple[int, ...], list[int]]

    def __init__(self, title="Convergence of Housing Models", name_base="Giant House Group", optim_loss="Adam/MSE", batch_size=0):
        self.title = title
        self.name_base = name_base
        self.optim_loss = None
        self.batch_size = None
        self.results = None
        self.normalize = True
        self.show_epoch = 0
        self.show_complexity = 0
        self.over_time = True
        self.lines = {} 

        # Determine valid file names
        self._valid_files = find_files(name_base)

        # Instantiate the plot
        self.fig = plt.figure(figsize=(10, 6))
        self.fig.subplots_adjust(bottom=0.27)
        for l in METRICS:
            self.lines[l], = plt.plot([], [], label=l)

        # Override the save function
        self.fig.canvas.manager.toolbar._buttons["Save"].configure(command=self._save_figure)

        # Handle autoscaling
        ax = plt.gca()
        self.ax = ax
        ax.set_autoscale_on(True)

        # Construct UI elements
        # Metric selector
        metric_ax = ax.inset_axes([0., -.34, .235, .2])
        line_colors = [line.get_color() for line in self.lines.values()]
        self._ui_metric_check = CheckButtons(
            ax=metric_ax,
            labels=METRICS,
            actives=[True] * len(METRICS),
            label_props={'color': line_colors},
            frame_props={'edgecolor': line_colors},
            check_props={'facecolor': line_colors},
        )
        self._ui_metric_check.on_clicked(self._update_metrics)

        # Graph settings
        graph_ax = ax.inset_axes([.255, -.34, .235, .2])
        self._ui_graph_check = CheckButtons(
            ax=graph_ax,
            labels=["X Log", "Y Log", "Normalize", "By Epoch"],
            actives=[False, True, True, True]
        )
        self._ui_graph_check.on_clicked(self._update_settings)
        self.over_time = True
        self.ax.set_yscale("log")

        # Optimizer/Loss selector
        optim_ax = ax.inset_axes([.51, -.34, .235, .2])
        self._ui_optim_check = CheckButtons(
            ax=optim_ax,
            labels=OPTIM_LOSS,
            actives=[not i for i, v in enumerate(OPTIM_LOSS_MAP)]
        )
        self._ui_optim_check.on_clicked(self._update_optim)

        # Batch size selector
        batch_ax = ax.inset_axes([.765, -.34, .235, .2])
        self._ui_batch_check = CheckButtons(
            ax=batch_ax,
            labels=[f"{v} Batches" for v in BATCH_SIZES],
            actives=[not v for v in BATCH_SIZES]
        )
        self._ui_batch_check.on_clicked(self._update_batch)

        # Epoch slider
        epoch_ax = ax.inset_axes([.05, -.43, .35, .07])
        self._ui_epoch_slider = Slider(ax=epoch_ax, label="Epoch", valmin=1, valmax=2, valinit=1, valstep=1)
        self._ui_epoch_slider.on_changed(self._update_epoch)

        # Complexity slider
        comp_ax = ax.inset_axes([.6, -.43, .4, .07])
        self._ui_comp_slider = Slider(ax=comp_ax, label="Complexity", valmin=0, valmax=1, valinit=0, valstep=1)
        self._ui_comp_slider.on_changed(self._update_complexity)

        # Load in the data
        self.load_data(optim_loss, batch_size)

        plt.ylabel("Value")
        plt.show()

    def _save_figure(self):
        # Create a new plot
        plt.figure()
        for l in METRICS:
            if not self.lines[l].get_visible():
                continue
            plt.plot(*self.lines[l].get_data(), label=METRIC_LABELS[l], c=self.lines[l].get_color())
        
        plt.gca().set_yscale("log" if self.ax.get_yscale() == "log" else "linear")
        plt.gca().set_xscale("log" if self.ax.get_xscale() == "log" else "linear")
        plt.xlabel("Epoch" if self.over_time else "Number of Parameters")
        plt.ylabel("Value")
        plt.title(f"{self.title}, {self.show_complexity} Parameters" if self.over_time else f"{self.title}, Epoch {self.show_epoch}")
        plt.legend()

        
        # Save it
        if not os.path.exists("figures"):
            os.mkdir("figures")

        name = lambda n: f"figures/{self.title}-{n}.pdf"
        i = 0
        while os.path.exists(name(i)):
            i += 1
        plt.tight_layout()
        plt.savefig(name(i))

    def _update_metrics(self, label: str):
        line = self.lines[label]
        line.set_visible(not line.get_visible())

        self.ax.autoscale()
        self.ax.relim(visible_only=True)

        self.fig.canvas.draw_idle()

    def _update_settings(self, label: str):
        if label == "X Log":
            self.ax.set_xscale("log" if not self.ax.get_xscale() == "log" else "linear")
        elif label == "Y Log":
            self.ax.set_yscale("log" if not self.ax.get_yscale() == "log" else "linear")
        else:
            if label == "Normalize":
                self.normalize = not self.normalize
            elif label == "By Epoch":
                self.over_time = not self.over_time
            self.update_data()
        self.fig.canvas.draw_idle()

    def _update_optim(self, label: str):
        # Handle inputs
        self._ui_optim_check.eventson = False
        self._ui_optim_check.clear()
        if (label, self.batch_size) in self._valid_files:
            self._ui_optim_check.set_active(OPTIM_LOSS.index(label), True)
            self.load_data(label, self.batch_size)
        else:
            self._ui_optim_check.set_active(OPTIM_LOSS.index(self.optim_loss), True)
        self._ui_optim_check.eventson = True

    def _update_batch(self, label: str):
        # Handle inputs
        self._ui_batch_check.eventson = False
        self._ui_batch_check.clear()
        l_int = int(label.split(" ")[0])
        if (self.optim_loss, l_int) in self._valid_files:
            self._ui_batch_check.set_active(BATCH_SIZES.index(l_int), True)
            self.load_data(self.optim_loss, l_int)
        else:
            self._ui_batch_check.set_active(BATCH_SIZES.index(self.batch_size), True)
        self._ui_batch_check.eventson = True

    def _update_epoch(self, val: int):
        self.show_epoch = int(val) - 1
        self.update_data()

    def _update_complexity(self, val: int):
        self.show_complexity = int(val)
        self.update_data()

    def _process_model_set(self, indices: list[int], matrix: np.ndarray):
        # Determine the count of non-inf and non-nan values
        good_counts = {}
        for i in indices:
            if i >= matrix.shape[0]:
                continue
            data = matrix[i]
            good = np.min(np.argwhere(~np.isfinite(data)), initial=len(data))
            good_counts.setdefault(good, []).append(data)

        # Get the biggest group & average the values
        if not len(good_counts):
            return np.array([])
        return np.mean(np.array(good_counts[max(good_counts)]), axis=0)

    def _update_over_epoch(self):
        # Handle title and x-axis label appropriately
        plt.title(f"{self.title}, {self.show_complexity} Parameters")
        plt.xlabel("Epoch")

        # Determine the x-axis
        x = np.arange(self.n_epochs)

        # Find the models of the chosen complexity
        indices = self.spec_indices[min(self.spec_indices, key=lambda x: abs(sum(x) - self.show_complexity))]

        # Update the line for each metric
        for m in METRICS:
            res = self._process_model_set(indices, self.results[m])
            if self.normalize and len(res):
                res = res / np.max(res)
            self.lines[m].set_data(x[:len(res)], res)

    def _update_over_complexity(self):
        # Handle title and x-axis label appropriately
        plt.title(f"{self.title}, Epoch {self.show_epoch}")
        plt.xlabel("Number of Parameters")

        # Update the line for each metric
        for m in METRICS:
            complex_map = {}
            for s in self.spec_indices:
                res = self._process_model_set(self.spec_indices[s], self.results[m])
                v = res[self.show_epoch] if len(res) > self.show_epoch else np.inf
                complex_map.setdefault(sum(s), []).append(v)
            
            x = [*sorted(complex_map.keys())]
            self.lines[m].set_data(x, [np.mean(complex_map[k]) for k in x])

    def update_data(self):
        if self.over_time:
            self._update_over_epoch()
        else:
            self._update_over_complexity()

        # Ensure the correct slider is shown
        self._ui_epoch_slider.eventson = not self.over_time
        self._ui_epoch_slider.ax.set_visible(not self.over_time)
        self._ui_comp_slider.eventson = self.over_time
        self._ui_comp_slider.ax.set_visible(self.over_time)
            
        self.ax.autoscale()
        self.ax.relim(visible_only=True)
        self.fig.canvas.draw_idle()
    
    def load_data(self, optim_loss, batch_size):
        # Ensure we need to
        if self.optim_loss == optim_loss and self.batch_size == batch_size:
            return
        self.optim_loss, self.batch_size = optim_loss, batch_size

        # Read in the files
        base_name = get_name(self.name_base, optim_loss, batch_size)
        with open(base_name + " specs.pkl", "rb") as f:
            specs = pickle.load(f)
        self.results = np.load(base_name + ".npz")
        self.specs = specs
        self.spec_indices = {}
        for i, s in enumerate(self.specs):
            for j in range(8):
                self.spec_indices.setdefault(s, []).append(8 * i + j)

        # Determine number of epochs & update display
        self.n_epochs = max(self.results[k].shape[1] for k in self.results)
        self.show_epoch = int(self.n_epochs * .75)
        self.show_complexity = sum(specs[int(len(specs) * .75)])

        # Update the epoch slider
        self._ui_epoch_slider.ax.clear()
        self._ui_epoch_slider.disconnect_events()
        self._ui_epoch_slider = Slider(
            ax=self._ui_epoch_slider.ax,
            label="Epoch",
            valmin=1, valmax=self.n_epochs, valinit=self.show_epoch, valstep=1
        )
        self._ui_epoch_slider.on_changed(self._update_epoch)

        # Update the complexity slider
        self._ui_comp_slider.ax.clear()
        self._ui_comp_slider.disconnect_events()
        self._ui_comp_slider = Slider(
            ax=self._ui_comp_slider.ax,
            label="Complexity", valmin=sum(specs[0]), valmax=sum(specs[-1]), valinit=self.show_complexity, valstep=[sum(s) for s in specs]
        )
        self._ui_comp_slider.on_changed(self._update_complexity)

        # Update allowed selections
        self._ui_optim_check.set_label_props({'color': ["black" if (v, batch_size) in self._valid_files else "lightgray" for v in OPTIM_LOSS_MAP]})
        self._ui_batch_check.set_label_props({'color': ["black" if (optim_loss, v) in self._valid_files else "lightgray" for v in BATCH_SIZES]})

        # Update the display
        self.update_data()

if __name__ == "__main__":
    AliasingVisualizer()