diff --git a/README.md b/README.md index 12a0916..2266bb4 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,6 @@ Each experiment directory must contain: - `.hydra/config.yaml` - Hydra configuration file - `input_output.json` - Experiment input/output data -- `scores.json` - Scoring results - `timing.json` - Timing information **Example Structure:** @@ -49,14 +48,16 @@ experiments/ │ ├── affiliation-0.0/ │ │ ├── .hydra/config.yaml │ │ ├── input_output.json -│ │ ├── scores.json +│ │ ├── scores.json # optional │ │ └── timing.json -│ ├── affiliation-0.1/ -│ │ └── ... -│ └── ... -├── pipeline_random/ -│ └── ... -└── pipeline_other/ +│ └── affiliation-0.1/ +│ └── ... +├── deeply/nested/structure/ +│ └── experiment_dir/ +│ ├── .hydra/config.yaml +│ ├── input_output.json +│ └── timing.json +└── any_organization_works/ └── ... ``` @@ -64,7 +65,7 @@ experiments/ The build system will automatically: -- **Recursively search** through all subdirectories +- **Recursively search** through all subdirectories at any depth - **Skip directories** containing `OUTDATED` in their path (case-insensitive) - **Only process directories** that contain all required files diff --git a/align_browser/build.py b/align_browser/build.py index 15f1cd3..48e5734 100644 --- a/align_browser/build.py +++ b/align_browser/build.py @@ -88,18 +88,10 @@ def build_frontend( data_output_dir.mkdir(exist_ok=True) # Parse experiments and build manifest - experiments = parse_experiments_directory(experiments_root, data_output_dir) + experiments = parse_experiments_directory(experiments_root) manifest = build_manifest_from_experiments(experiments, experiments_root) - # Add generation timestamp (deterministic for tests) - import os - - if os.getenv("PYTEST_CURRENT_TEST"): - # Use deterministic timestamp during tests - manifest.metadata["generated_at"] = "2024-01-01T00:00:00" - else: - # Use actual timestamp in production - manifest.metadata["generated_at"] = datetime.now().isoformat() + manifest.generated_at = datetime.now().isoformat() # Copy experiment data files copy_experiment_files(experiments, experiments_root, data_output_dir) diff --git a/align_browser/conftest.py b/align_browser/conftest.py index 5819ced..a16c7fe 100644 --- a/align_browser/conftest.py +++ b/align_browser/conftest.py @@ -116,6 +116,34 @@ def ensure_kdma_slider_value(page, selector, value): return False +def ensure_dropdown_selection(page, selector, required_value, description="dropdown"): + """ + Ensures a dropdown has the required value selected. + Fails if the required value cannot be ensured (either by selection or auto-selection). + + Args: + page: Playwright page object + selector: CSS selector for the select element + required_value: The required value that must be selected + description: Human-readable description for error messages + + Raises: + AssertionError: If the required value cannot be ensured + """ + dropdown = page.locator(selector).first + + if dropdown.is_enabled(): + # Dropdown is enabled - try to select the required value + dropdown.select_option(required_value) + page.wait_for_load_state("networkidle") + + # Verify the required value is now selected (whether we selected it or it was auto-selected) + current_value = dropdown.input_value() + assert current_value == required_value, ( + f"{description} dropdown must have '{required_value}' selected, but has '{current_value}'" + ) + + class FrontendTestServer: """HTTP server for serving the built frontend during tests.""" diff --git a/align_browser/experiment_models.py b/align_browser/experiment_models.py index 0a54c44..01d55a0 100644 --- a/align_browser/experiment_models.py +++ b/align_browser/experiment_models.py @@ -2,12 +2,35 @@ import json import yaml -import re +import hashlib +import os from pathlib import Path from typing import List, Dict, Any, Optional from pydantic import BaseModel, Field, ConfigDict +def calculate_file_checksum(file_path: Path) -> str: + """Calculate SHA256 checksum of a file.""" + if not file_path.exists(): + return "" + + sha256_hash = hashlib.sha256() + with open(file_path, "rb") as f: + # Read in chunks to handle large files efficiently + for chunk in iter(lambda: f.read(4096), b""): + sha256_hash.update(chunk) + + return f"sha256:{sha256_hash.hexdigest()}" + + +def calculate_file_checksums(file_paths: List[Path]) -> Dict[str, str]: + """Calculate checksums for multiple files.""" + checksums = {} + for file_path in file_paths: + checksums[str(file_path)] = calculate_file_checksum(file_path) + return checksums + + class KDMAValue(BaseModel): """Represents a KDMA (Key Decision Making Attributes) value.""" @@ -20,30 +43,51 @@ def parse_alignment_target_id(alignment_target_id: str) -> List[KDMAValue]: """ Parse alignment_target_id string to extract KDMA values. - Examples: - "ADEPT-June2025-merit-0.0" -> [KDMAValue(kdma="merit", value=0.0)] - "ADEPT-June2025-affiliation-0.5" -> [KDMAValue(kdma="affiliation", value=0.5)] + Supports both single and multi-KDMA formats: + - Single: "ADEPT-June2025-merit-0.0" -> [KDMAValue(kdma="merit", value=0.0)] + - Multi: "ADEPT-June2025-affiliation_merit-0.0_0.0" -> + [KDMAValue(kdma="affiliation", value=0.0), KDMAValue(kdma="merit", value=0.0)] + - Unaligned: "unaligned" -> [] (no KDMAs) Args: - alignment_target_id: String like "ADEPT-June2025-merit-0.0" + alignment_target_id: String like "ADEPT-June2025-merit-0.0", + "ADEPT-June2025-affiliation_merit-0.0_0.0", or "unaligned" Returns: List of KDMAValue objects """ - if not alignment_target_id: + if not alignment_target_id or alignment_target_id == "unaligned": + return [] + + # Split by hyphens: [prefix, scenario, kdma_part, value_part] + parts = alignment_target_id.split("-") + if len(parts) < 4: return [] - # Pattern: {prefix}-{scenario}-{kdma}-{value} - pattern = r"^[^-]+-[^-]+-(.+)-(\d+(?:\.\d+)?)$" - match = re.match(pattern, alignment_target_id) + # Extract KDMA names and values from the last two parts + kdma_part = parts[-2] # e.g., "affiliation_merit" or "merit" + value_part = parts[-1] # e.g., "0.0_0.0" or "0.0" + + # Split KDMA names by underscore + kdma_names = kdma_part.split("_") + + # Split values by underscore and convert to float + try: + value_strings = value_part.split("_") + values = [float(v) for v in value_strings] + except ValueError: + return [] - if not match: + # Ensure we have the same number of KDMAs and values + if len(kdma_names) != len(values): return [] - kdma_name = match.group(1) - value = float(match.group(2)) + # Create KDMAValue objects + kdma_values = [] + for kdma_name, value in zip(kdma_names, values): + kdma_values.append(KDMAValue(kdma=kdma_name, value=value)) - return [KDMAValue(kdma=kdma_name, value=value)] + return kdma_values class AlignmentTarget(BaseModel): @@ -74,7 +118,7 @@ class ExperimentConfig(BaseModel): name: str = "unknown" adm: ADMConfig = Field(default_factory=ADMConfig) alignment_target: AlignmentTarget = Field(default_factory=AlignmentTarget) - run_variant: Optional[str] = None + run_variant: str = "default" def generate_key(self) -> str: """Generate a unique key for this experiment configuration.""" @@ -82,11 +126,39 @@ def generate_key(self) -> str: f"{kv.kdma}-{kv.value}" for kv in self.alignment_target.kdma_values ] kdma_string = "_".join(sorted(kdma_parts)) - base_key = f"{self.adm.name}_{self.adm.llm_backbone}_{kdma_string}" + return ( + f"{self.adm.name}:{self.adm.llm_backbone}:{kdma_string}:{self.run_variant}" + ) + + def generate_experiment_key(self, experiment_path: Path = None) -> str: + """Generate hash-based experiment key for new manifest structure.""" + key_data = { + "adm": self.adm.name, + "llm": self.adm.llm_backbone if self.adm.llm_backbone != "no_llm" else None, + "kdma": self._get_kdma_key(), + "run_variant": self.run_variant, + } + + # Add experiment path to ensure uniqueness across different directories + if experiment_path: + key_data["path"] = str(experiment_path) + + # Create deterministic hash from sorted key data + key_string = json.dumps(key_data, sort_keys=True) + hash_obj = hashlib.sha256(key_string.encode("utf-8")) + hash_hex = hash_obj.hexdigest() - if self.run_variant: - return f"{base_key}_{self.run_variant}" - return base_key + return f"exp_{hash_hex[:8]}" + + def _get_kdma_key(self) -> str: + """Generate KDMA key component for experiment identification.""" + if not self.alignment_target.kdma_values: + return "unaligned" + + kdma_parts = [ + f"{kv.kdma}-{kv.value}" for kv in self.alignment_target.kdma_values + ] + return "_".join(sorted(kdma_parts)) class InputData(BaseModel): @@ -104,6 +176,7 @@ class InputOutputItem(BaseModel): input: InputData output: Optional[Dict[str, Any]] = None + original_index: int # Index in the original file class ScenarioTiming(BaseModel): @@ -120,6 +193,7 @@ class TimingData(BaseModel): """Represents timing data from timing.json.""" scenarios: List[ScenarioTiming] + raw_times_s: List[float] # Indicies map to list in input_output.json class InputOutputFile(BaseModel): @@ -133,17 +207,13 @@ def from_file(cls, path: Path) -> "InputOutputFile": with open(path) as f: raw_data = json.load(f) - # Process data to append index to duplicate scenario_ids - processed_data = [] - for i, item in enumerate(raw_data): - # Create a copy of the item - item_copy = item.copy() - # Append index to scenario_id to make it unique - original_scenario_id = item_copy["input"]["scenario_id"] - item_copy["input"]["scenario_id"] = f"{original_scenario_id}-{i}" - processed_data.append(item_copy) + # Convert to InputOutputItem objects with original indices + items = [] + for i, item_data in enumerate(raw_data): + item = InputOutputItem(**item_data, original_index=i) + items.append(item) - return cls(data=processed_data) + return cls(data=items) @property def first_scenario_id(self) -> str: @@ -208,15 +278,19 @@ def from_directory(cls, experiment_dir: Path) -> "ExperimentData": ) @classmethod - def from_directory_new_format( + def from_directory_mixed_kdma( cls, experiment_dir: Path, alignment_target_id: str, - filtered_data: List[Dict[str, Any]], - input_output_file_path: Path = None, - timing_file_path: Path = None, + filtered_data: List[InputOutputItem], ) -> "ExperimentData": - """Load experiment data from new format directory for a specific alignment target.""" + """Load experiment data from mixed KDMA directory for a specific alignment target. + + Mixed KDMA format: Handles experiments where different scenes have different KDMA + configurations, with KDMAs defined per scene in alignment_target_id rather than config.yaml. + + This method works with logical filtering - the original files remain intact. + """ # Load config config_path = experiment_dir / ".hydra" / "config.yaml" with open(config_path) as f: @@ -232,7 +306,7 @@ def from_directory_new_format( config_data["alignment_target"] = alignment_target.model_dump() config = ExperimentConfig(**config_data) - # Create input_output from filtered data + # Create input_output from the logically filtered data (already InputOutputItems) input_output = InputOutputFile(data=filtered_data) # Load scores if available @@ -241,15 +315,13 @@ def from_directory_new_format( if scores_path.exists(): scores = ScoresFile.from_file(scores_path) - # Use specific timing file if provided, otherwise fall back to default - timing_path = ( - timing_file_path if timing_file_path else experiment_dir / "timing.json" - ) + # Load timing data from default location + timing_path = experiment_dir / "timing.json" with open(timing_path) as f: timing_data = json.load(f) timing = TimingData(**timing_data) - # Store the specific file paths for the manifest + # Create experiment instance experiment = cls( config=config, input_output=input_output, @@ -258,10 +330,6 @@ def from_directory_new_format( experiment_path=experiment_dir, ) - # Store the specific file paths as attributes for manifest generation - experiment._input_output_file_path = input_output_file_path - experiment._timing_file_path = timing_file_path - return experiment @property @@ -284,130 +352,222 @@ def has_required_files(cls, experiment_dir: Path) -> bool: ] return all((experiment_dir / f).exists() for f in required_files) - @classmethod - def is_new_format(cls, experiment_dir: Path) -> bool: - """Check if directory uses new format (no alignment_target in config).""" - if not cls.has_required_files(experiment_dir): - return False - config_path = experiment_dir / ".hydra" / "config.yaml" - try: - with open(config_path) as f: - config_data = yaml.safe_load(f) - return "alignment_target" not in config_data - except Exception: - return False +# Enhanced Manifest Models for New Structure +class SceneInfo(BaseModel): + """Information about a scene within a scenario.""" + + source_index: int # Index in the source input_output.json file + scene_id: str # Scene ID from meta_info.scene_id + timing_s: float # Timing from timing.json raw_times_s[source_index] + + +class InputOutputFileInfo(BaseModel): + """File information for input_output data.""" + + file: str # Path to the file + checksum: str # SHA256 checksum for integrity + alignment_target_filter: Optional[str] = None # Filter for multi-experiment files -# Output Models for Frontend Consumption -class ExperimentSummary(BaseModel): - """Summary of experiment data for the manifest.""" +class Scenario(BaseModel): + """Enhanced scenario structure with scene mapping.""" - input_output: str # Path to input_output.json + input_output: InputOutputFileInfo scores: Optional[str] = None # Path to scores.json timing: str # Path to timing.json - config: Dict[str, Any] # Full experiment configuration + scenes: Dict[str, SceneInfo] = Field(default_factory=dict) # scene_id -> SceneInfo -class ScenarioManifest(BaseModel): - """Manifest entry for scenarios within an experiment key.""" +class Experiment(BaseModel): + """Enhanced experiment structure with flexible parameters.""" - scenarios: Dict[str, ExperimentSummary] = Field(default_factory=dict) + parameters: Dict[str, Any] # Flexible parameter structure + scenarios: Dict[str, Scenario] = Field( + default_factory=dict + ) # scenario_id -> scenario -class GlobalManifest(BaseModel): - """Top-level manifest for all experiments.""" +class FileInfo(BaseModel): + """Metadata about a source file.""" + + checksum: str # SHA256 checksum + size: int # File size in bytes + experiments: List[str] = Field( + default_factory=list + ) # Experiment keys using this file + + +class ManifestIndices(BaseModel): + """Indices for fast experiment lookups.""" + + by_adm: Dict[str, List[str]] = Field(default_factory=dict) + by_llm: Dict[str, List[str]] = Field(default_factory=dict) + by_kdma: Dict[str, List[str]] = Field(default_factory=dict) + by_scenario: Dict[str, List[str]] = Field(default_factory=dict) - experiment_keys: Dict[str, ScenarioManifest] = Field(default_factory=dict) - metadata: Dict[str, Any] = Field(default_factory=dict) - def add_experiment(self, experiment: "ExperimentData", experiments_root: Path): - """Add an experiment to the manifest.""" - key = experiment.key +class Manifest(BaseModel): + """Global manifest with hierarchical structure and integrity validation.""" - # Calculate relative path + manifest_version: str = "1.0" + generated_at: str + metadata: Dict[str, Any] = Field(default_factory=dict) + experiments: Dict[str, Experiment] = Field(default_factory=dict) + indices: ManifestIndices = Field(default_factory=ManifestIndices) + files: Dict[str, FileInfo] = Field(default_factory=dict) + + def add_experiment( + self, + experiment: "ExperimentData", + experiments_root: Path, + source_file_checksums: Dict[str, str], + ): + """Add an experiment to the enhanced manifest.""" + # Generate experiment key with path for uniqueness + exp_key = experiment.config.generate_experiment_key(experiment.experiment_path) + + # Create parameter structure + parameters = { + "adm": { + "name": experiment.config.adm.name, + "instance": experiment.config.adm.instance, + }, + "llm": None + if experiment.config.adm.llm_backbone == "no_llm" + else { + "model_name": experiment.config.adm.llm_backbone, + # Add other LLM config from structured_inference_engine if available + **(experiment.config.adm.structured_inference_engine or {}), + }, + "kdma_values": [ + kv.model_dump() for kv in experiment.config.alignment_target.kdma_values + ], + "alignment_target_id": experiment.config.alignment_target.id, + "run_variant": experiment.config.run_variant, + } + + # Calculate relative paths relative_experiment_path = experiment.experiment_path.relative_to( experiments_root ) - # Ensure key exists - if key not in self.experiment_keys: - self.experiment_keys[key] = ScenarioManifest() - - # Use specific file paths if available (for new format), otherwise default paths - input_output_filename = "input_output.json" - timing_filename = "timing.json" + # Use standard file paths + input_output_path = str( + Path("data") / relative_experiment_path / "input_output.json" + ) + timing_path = str(Path("data") / relative_experiment_path / "timing.json") - if ( - hasattr(experiment, "_input_output_file_path") - and experiment._input_output_file_path - ): - input_output_filename = experiment._input_output_file_path.name - if hasattr(experiment, "_timing_file_path") and experiment._timing_file_path: - timing_filename = experiment._timing_file_path.name + # Get checksum for input_output file + full_input_output_path = str(experiment.experiment_path / "input_output.json") + input_output_checksum = source_file_checksums.get(full_input_output_path, "") - # Add all scenarios from the input_output data - for item in experiment.input_output.data: + # Create scenario mapping - group by actual scenario_id + scenarios_dict = {} + for i, item in enumerate(experiment.input_output.data): + # Use the scenario_id as-is since we no longer add numeric suffixes scenario_id = item.input.scenario_id - scores_path = None - if experiment.scores is not None: - scores_path = str( - Path("data") / relative_experiment_path / "scores.json" + scene_id = "unknown" + + # Use the original index from the InputOutputItem + source_index = item.original_index + + # Extract scene_id from full_state.meta_info.scene_id if available + if item.input.full_state and isinstance(item.input.full_state, dict): + meta_info = item.input.full_state.get("meta_info", {}) + if isinstance(meta_info, dict): + scene_id = meta_info.get("scene_id", f"scene_{source_index}") + + if scenario_id not in scenarios_dict: + scores_path = None + if experiment.scores is not None: + scores_path = str( + Path("data") / relative_experiment_path / "scores.json" + ) + + scenarios_dict[scenario_id] = Scenario( + input_output=InputOutputFileInfo( + file=input_output_path, + checksum=input_output_checksum, + alignment_target_filter=experiment.config.alignment_target.id, + ), + scores=scores_path, + timing=timing_path, + scenes={}, ) - self.experiment_keys[key].scenarios[scenario_id] = ExperimentSummary( - input_output=str( - Path("data") / relative_experiment_path / input_output_filename - ), - scores=scores_path, - timing=str(Path("data") / relative_experiment_path / timing_filename), - config=experiment.config.model_dump(), + scenarios_dict[scenario_id].scenes[scene_id] = SceneInfo( + source_index=source_index, + scene_id=scene_id, + timing_s=experiment.timing.raw_times_s[source_index], ) - def get_experiment_count(self) -> int: - """Get total number of experiments in the manifest.""" - return sum( - len(scenario_manifest.scenarios) - for scenario_manifest in self.experiment_keys.values() - ) + # Create enhanced experiment + enhanced_exp = Experiment(parameters=parameters, scenarios=scenarios_dict) + + self.experiments[exp_key] = enhanced_exp + + # Update indices + self._update_indices(exp_key, parameters, scenarios_dict.keys()) + + # Update file tracking + self._update_file_info(input_output_path, input_output_checksum, exp_key) + + def _update_indices( + self, exp_key: str, parameters: Dict[str, Any], scenario_ids: List[str] + ): + """Update lookup indices for the experiment.""" + adm_name = parameters["adm"]["name"] + llm_name = parameters["llm"]["model_name"] if parameters["llm"] else "no-llm" + kdma_key = parameters.get("kdma_key", "unaligned") # Will be computed properly + + # Compute KDMA key from kdma_values + if not parameters["kdma_values"]: + kdma_key = "unaligned" + else: + kdma_parts = [ + f"{kv['kdma']}-{kv['value']}" for kv in parameters["kdma_values"] + ] + kdma_key = "_".join(sorted(kdma_parts)) + + # Update indices + if adm_name not in self.indices.by_adm: + self.indices.by_adm[adm_name] = [] + self.indices.by_adm[adm_name].append(exp_key) + + if llm_name not in self.indices.by_llm: + self.indices.by_llm[llm_name] = [] + self.indices.by_llm[llm_name].append(exp_key) + + if kdma_key not in self.indices.by_kdma: + self.indices.by_kdma[kdma_key] = [] + self.indices.by_kdma[kdma_key].append(exp_key) + + for scenario_id in scenario_ids: + if scenario_id not in self.indices.by_scenario: + self.indices.by_scenario[scenario_id] = [] + self.indices.by_scenario[scenario_id].append(exp_key) + + def _update_file_info(self, file_path: str, checksum: str, exp_key: str): + """Update file tracking information.""" + if file_path not in self.files: + # Calculate file size if checksum is available (file exists) + file_size = 0 + if checksum: + try: + # Convert relative path to absolute for size calculation + # Remove "data/" prefix if present to get actual path + actual_path = file_path.replace("data/", "", 1) + file_size = os.path.getsize(actual_path) + except (OSError, FileNotFoundError): + file_size = 0 + + self.files[file_path] = FileInfo( + checksum=checksum, size=file_size, experiments=[] + ) - def get_adm_types(self) -> List[str]: - """Get unique ADM types from all experiments.""" - adm_types = set() - for experiment_key in self.experiment_keys.values(): - for scenario_summary in experiment_key.scenarios.values(): - adm_name = scenario_summary.config.get("adm", {}).get("name", "unknown") - adm_types.add(adm_name) - return sorted(list(adm_types)) - - def get_llm_backbones(self) -> List[str]: - """Get unique LLM backbones from all experiments.""" - llm_backbones = set() - for experiment_key in self.experiment_keys.values(): - for scenario_summary in experiment_key.scenarios.values(): - adm_config = scenario_summary.config.get("adm", {}) - structured_engine = adm_config.get("structured_inference_engine", {}) - if structured_engine is not None: - model_name = structured_engine.get("model_name", "no_llm") - else: - model_name = "no_llm" - llm_backbones.add(model_name) - return sorted(list(llm_backbones)) - - def get_kdma_combinations(self) -> List[str]: - """Get unique KDMA combinations from all experiments.""" - kdma_combinations = set() - for experiment_key in self.experiment_keys.values(): - for scenario_summary in experiment_key.scenarios.values(): - alignment_target = scenario_summary.config.get("alignment_target", {}) - kdma_values = alignment_target.get("kdma_values", []) - kdma_parts = [] - for kv in kdma_values: - kdma_parts.append(f"{kv['kdma']}-{kv['value']}") - if kdma_parts: - kdma_string = "_".join(sorted(kdma_parts)) - kdma_combinations.add(kdma_string) - return sorted(list(kdma_combinations)) + if exp_key not in self.files[file_path].experiments: + self.files[file_path].experiments.append(exp_key) class ChunkedExperimentData(BaseModel): diff --git a/align_browser/experiment_parser.py b/align_browser/experiment_parser.py index 9f2ce10..d6f2fa4 100644 --- a/align_browser/experiment_parser.py +++ b/align_browser/experiment_parser.py @@ -1,14 +1,15 @@ """Parser for experiment directory structures using Pydantic models.""" import re -import json +import yaml from pathlib import Path from typing import List, Dict from collections import defaultdict from align_browser.experiment_models import ( ExperimentData, - GlobalManifest, - InputOutputItem, + Manifest, + InputOutputFile, + calculate_file_checksums, ) @@ -24,160 +25,128 @@ def _extract_run_variant( all_conflicting_dirs: List of all directories that have conflicts (same ADM+LLM+KDMA) Returns: - String representing the run variant, or empty string for default + String representing the run variant, or "default" for default variant """ - try: - # Get the relative path from experiments_root - relative_path = experiment_dir.relative_to(experiments_root) - path_parts = relative_path.parts - - # Skip KDMA configuration directories (contain dashes with numbers) - # Examples: merit-0.4, affiliation-0.0, personal_safety-0.5 - def is_kdma_dir(dirname): - return bool(re.match(r"^[a-z_]+-(0\.\d+|1\.0|0)$", dirname)) - - # Find the ADM-level directory (first non-KDMA directory) - adm_dir = None - for part in path_parts: - if not is_kdma_dir(part): - adm_dir = part + # Get the relative path from experiments_root + relative_path = experiment_dir.relative_to(experiments_root) + path_parts = relative_path.parts + + # Skip KDMA configuration directories (contain dashes with numbers) + # Examples: merit-0.4, affiliation-0.0, personal_safety-0.5 + def is_kdma_dir(dirname): + return bool(re.match(r"^[a-z_]+-(0\.\d+|1\.0|0)$", dirname)) + + # Find the ADM-level directory (first non-KDMA directory) + adm_dir = None + for part in path_parts: + if not is_kdma_dir(part): + adm_dir = part + break + + if not adm_dir: + return "default" + + # Extract ADM directories from all conflicting paths + conflicting_adm_dirs = set() + for conflict_dir in all_conflicting_dirs: + try: + conflict_relative = conflict_dir.relative_to(experiments_root) + conflict_parts = conflict_relative.parts + for part in conflict_parts: + if not is_kdma_dir(part): + conflicting_adm_dirs.add(part) + break + except (ValueError, AttributeError): + continue + + # If there's only one unique ADM directory, no variant needed + if len(conflicting_adm_dirs) <= 1: + return "default" + + # Find the common prefix among all conflicting ADM directories + adm_dir_list = sorted(conflicting_adm_dirs) + common_prefix = "" + + if len(adm_dir_list) >= 2: + # Find longest common prefix + first_dir = adm_dir_list[0] + for i, char in enumerate(first_dir): + if all(i < len(d) and d[i] == char for d in adm_dir_list): + common_prefix += char + else: break - if not adm_dir: - return "" - - # Extract ADM directories from all conflicting paths - conflicting_adm_dirs = set() - for conflict_dir in all_conflicting_dirs: - try: - conflict_relative = conflict_dir.relative_to(experiments_root) - conflict_parts = conflict_relative.parts - for part in conflict_parts: - if not is_kdma_dir(part): - conflicting_adm_dirs.add(part) - break - except (ValueError, AttributeError): - continue - - # If there's only one unique ADM directory, no variant needed - if len(conflicting_adm_dirs) <= 1: - return "" - - # Find the common prefix among all conflicting ADM directories - adm_dir_list = sorted(conflicting_adm_dirs) - common_prefix = "" - - if len(adm_dir_list) >= 2: - # Find longest common prefix - first_dir = adm_dir_list[0] - for i, char in enumerate(first_dir): - if all(i < len(d) and d[i] == char for d in adm_dir_list): - common_prefix += char - else: - break + # Remove trailing underscores + common_prefix = common_prefix.rstrip("_") - # Remove trailing underscores - common_prefix = common_prefix.rstrip("_") + # Extract variant as the unique suffix after common prefix + if common_prefix and adm_dir.startswith(common_prefix): + variant = adm_dir[len(common_prefix) :].lstrip("_") + # Use lexicographically first directory as "default" + if adm_dir == min(adm_dir_list): + return "default" + return variant if variant else "default" - # Extract variant as the unique suffix after common prefix - if common_prefix and adm_dir.startswith(common_prefix): - variant = adm_dir[len(common_prefix) :].lstrip("_") - # Use lexicographically first directory as "default" (empty string) - if adm_dir == min(adm_dir_list): - return "" - return variant if variant else "" + # Fallback: use the full ADM directory name if no common prefix found + # Choose the lexicographically first one as default + if adm_dir == min(conflicting_adm_dirs): + return "default" + return adm_dir - # Fallback: use the full ADM directory name if no common prefix found - # Choose the lexicographically first one as default - if adm_dir == min(conflicting_adm_dirs): - return "" - return adm_dir - except (ValueError, AttributeError): - return "" +def _create_experiments_from_directory(experiment_dir: Path) -> List[ExperimentData]: + """Create experiments from a directory, handling both uniform and mixed KDMA alignment. + This unified function handles both cases: + - Uniform KDMA: All scenes use same alignment target (defined in config.yaml) + - Mixed KDMA: Different scenes have different alignment targets (defined per input item) -def _parse_new_format_directory( - experiment_dir: Path, output_data_dir: Path = None -) -> List[ExperimentData]: - """Parse a directory with new format (mixed alignment_target_ids).""" + Returns a list of experiments (one per unique alignment target). + """ experiments = [] - # Load input_output.json - input_output_path = experiment_dir / "input_output.json" - with open(input_output_path) as f: - input_output_data = json.load(f) + # Load input_output using the standard method (which now sets original_index) + input_output = InputOutputFile.from_file(experiment_dir / "input_output.json") + + # Load config to check for uniform alignment target + config_path = experiment_dir / ".hydra" / "config.yaml" + with open(config_path) as f: + config_data = yaml.safe_load(f) - # Load timing.json once - timing_path = experiment_dir / "timing.json" - with open(timing_path) as f: - full_timing_data = json.load(f) + has_config_alignment = "alignment_target" in config_data - # Group by alignment_target_id and track indices for timing filtering + # Group by alignment_target_id grouped_data = defaultdict(list) - grouped_indices = defaultdict(list) # Track original indices for timing data - for i, item in enumerate(input_output_data): - alignment_target_id = item["input"].get("alignment_target_id", "unknown") + + for item in input_output.data: + # Determine alignment target for this item + if has_config_alignment: + # Uniform KDMA: Use alignment target from config for all items + alignment_target_id = config_data["alignment_target"]["id"] + else: + # Mixed KDMA: Use alignment target from input item + alignment_target_id = item.input.alignment_target_id + if alignment_target_id is None: + alignment_target_id = "unaligned" # Handle null alignment targets + grouped_data[alignment_target_id].append(item) - grouped_indices[alignment_target_id].append(i) - # Create separate experiments for each alignment_target_id + # Create experiments for each alignment target group for alignment_target_id, items in grouped_data.items(): try: - # Create a safe filename from alignment_target_id - safe_filename = alignment_target_id.replace("/", "_").replace(":", "_") - - # Determine where to write filtered files - if output_data_dir: - # Write to output directory (production build) - experiment_output_dir = output_data_dir / experiment_dir.name - experiment_output_dir.mkdir(exist_ok=True) - filtered_input_output_path = ( - experiment_output_dir / f"input_output_{safe_filename}.json" - ) + if has_config_alignment: + # Uniform KDMA: Use standard from_directory method + experiment = ExperimentData.from_directory(experiment_dir) + experiments.append(experiment) + break # Only one experiment for uniform KDMA else: - # Write to source directory (dev mode - should be avoided) - filtered_input_output_path = ( - experiment_dir / f"input_output_{safe_filename}.json" - ) - - # Convert to InputOutputItem format and prepare data for writing - input_output_items = [] - filtered_data_for_json = [] - for i, item in enumerate(items): - item_copy = item.copy() - # Append index to scenario_id to make it unique - original_scenario_id = item_copy["input"]["scenario_id"] - item_copy["input"]["scenario_id"] = f"{original_scenario_id}-{i}" - input_output_items.append(InputOutputItem(**item_copy)) - filtered_data_for_json.append(item_copy) - - # Write the filtered JSON file - with open(filtered_input_output_path, "w") as f: - json.dump(filtered_data_for_json, f, indent=2) - - # For now, just use the original timing data structure - # TODO: Implement proper timing data filtering if needed - filtered_timing = full_timing_data - - # Write filtered timing file - if output_data_dir: - filtered_timing_path = ( - experiment_output_dir / f"timing_{safe_filename}.json" + # Mixed KDMA: Create experiment for this specific alignment target + experiment = ExperimentData.from_directory_mixed_kdma( + experiment_dir, + alignment_target_id, + items, # items already have original_index ) - else: - filtered_timing_path = experiment_dir / f"timing_{safe_filename}.json" - with open(filtered_timing_path, "w") as f: - json.dump(filtered_timing, f, indent=2) - - experiment = ExperimentData.from_directory_new_format( - experiment_dir, - alignment_target_id, - input_output_items, - filtered_input_output_path, - filtered_timing_path, - ) - experiments.append(experiment) + experiments.append(experiment) except Exception as e: print( @@ -188,9 +157,7 @@ def _parse_new_format_directory( return experiments -def parse_experiments_directory( - experiments_root: Path, output_data_dir: Path = None -) -> List[ExperimentData]: +def parse_experiments_directory(experiments_root: Path) -> List[ExperimentData]: """ Parse the experiments directory structure and return a list of ExperimentData. @@ -206,11 +173,17 @@ def parse_experiments_directory( """ experiments = [] + directories_found = 0 + directories_with_files = 0 + directories_processed = 0 + # Recursively find all directories that have required experiment files for experiment_dir in experiments_root.rglob("*"): if not experiment_dir.is_dir(): continue + directories_found += 1 + # Skip directories containing "OUTDATED" in their path if "OUTDATED" in str(experiment_dir).upper(): continue @@ -219,18 +192,12 @@ def parse_experiments_directory( if not ExperimentData.has_required_files(experiment_dir): continue + directories_with_files += 1 + try: - # Check if this is the new format - if ExperimentData.is_new_format(experiment_dir): - # Parse new format - may return multiple experiments - new_experiments = _parse_new_format_directory( - experiment_dir, output_data_dir - ) - experiments.extend(new_experiments) - else: - # Load experiment data using existing method - experiment = ExperimentData.from_directory(experiment_dir) - experiments.append(experiment) + directory_experiments = _create_experiments_from_directory(experiment_dir) + experiments.extend(directory_experiments) + directories_processed += 1 except Exception as e: print(f"Error processing {experiment_dir}: {e}") @@ -241,36 +208,50 @@ def parse_experiments_directory( def build_manifest_from_experiments( experiments: List[ExperimentData], experiments_root: Path -) -> GlobalManifest: +) -> Manifest: """ - Build the global manifest from a list of parsed experiments. + Build the enhanced global manifest from a list of parsed experiments. - Detects conflicts (same ADM+LLM+KDMA but different directories) and - adds run_variant parameter to resolve conflicts. + Uses the new flexible parameter-based structure with integrity validation + and fast lookup indices. Args: experiments: List of ExperimentData objects experiments_root: Path to experiments root (for calculating relative paths) Returns: - GlobalManifest object with experiment data + Manifest object with new structure """ - # First pass: detect TRUE conflicts by grouping experiments by their complete key - # True conflicts are experiments with identical ADM+LLM+KDMA in different directories + from datetime import datetime, timezone + + # Initialize manifest + manifest = Manifest( + manifest_version="2.0", generated_at=datetime.now(timezone.utc).isoformat() + ) + + # Collect all input_output files for checksum calculation + input_output_files = set() + for experiment in experiments: + # Add default input_output.json path + input_output_files.add(experiment.experiment_path / "input_output.json") + + # Calculate checksums for all files + print(f"Calculating checksums for {len(input_output_files)} files...") + source_file_checksums = calculate_file_checksums(list(input_output_files)) + + # Process experiments with conflict detection similar to original + # First pass: detect conflicts by grouping experiments by their base parameters base_key_groups: Dict[str, List[ExperimentData]] = {} for experiment in experiments: - # Generate base key without run_variant for conflict detection - original_run_variant = experiment.config.run_variant - experiment.config.run_variant = None + # Group experiments by their full key (including default run_variant) base_key = experiment.config.generate_key() - experiment.config.run_variant = original_run_variant # Restore original if base_key not in base_key_groups: base_key_groups[base_key] = [] base_key_groups[base_key].append(experiment) - # Second pass: add run_variant only for TRUE conflicts + # Second pass: add run_variant for conflicts and process all experiments enhanced_experiments = [] for base_key, group_experiments in base_key_groups.items(): @@ -278,64 +259,42 @@ def build_manifest_from_experiments( # No conflict, use original experiment enhanced_experiments.append(group_experiments[0]) else: - # TRUE conflict detected - same ADM+LLM+KDMA in different directories - # Check if these are actually different KDMA configurations that got the same key - # This shouldn't happen if KDMA parsing is working correctly - all_have_same_kdmas = True - if len(group_experiments) > 1: - first_kdmas = set( - (kv.kdma, kv.value) - for kv in group_experiments[0].config.alignment_target.kdma_values + # Conflict detected - add run_variant from directory structure + conflicting_dirs = [exp.experiment_path for exp in group_experiments] + for experiment in group_experiments: + run_variant = _extract_run_variant( + experiment.experiment_path, experiments_root, conflicting_dirs + ) + # Always create experiment with run_variant (will be "default" if not extracted) + enhanced_config = experiment.config.model_copy(deep=True) + enhanced_config.run_variant = run_variant + + enhanced_experiment = ExperimentData( + config=enhanced_config, + input_output=experiment.input_output, + scores=experiment.scores, + timing=experiment.timing, + experiment_path=experiment.experiment_path, ) - for exp in group_experiments[1:]: - exp_kdmas = set( - (kv.kdma, kv.value) - for kv in exp.config.alignment_target.kdma_values - ) - if exp_kdmas != first_kdmas: - all_have_same_kdmas = False - break - - if not all_have_same_kdmas: - # Different KDMAs but same key - shouldn't happen, just use originals - enhanced_experiments.extend(group_experiments) - else: - # True conflicts - add run_variant from directory structure - conflicting_dirs = [exp.experiment_path for exp in group_experiments] - for experiment in group_experiments: - run_variant = _extract_run_variant( - experiment.experiment_path, experiments_root, conflicting_dirs - ) - if run_variant: - # Create a copy of the experiment with run_variant - enhanced_config = experiment.config.model_copy(deep=True) - enhanced_config.run_variant = run_variant - - enhanced_experiment = ExperimentData( - config=enhanced_config, - input_output=experiment.input_output, - scores=experiment.scores, - timing=experiment.timing, - experiment_path=experiment.experiment_path, - ) - enhanced_experiments.append(enhanced_experiment) - else: - # Fallback: use original if no run variant available - enhanced_experiments.append(experiment) - - # Build manifest with enhanced experiments - manifest = GlobalManifest() + enhanced_experiments.append(enhanced_experiment) + + # Add experiments to enhanced manifest for experiment in enhanced_experiments: - manifest.add_experiment(experiment, experiments_root) + try: + manifest.add_experiment(experiment, experiments_root, source_file_checksums) + except Exception as e: + print(f"Error adding experiment {experiment.experiment_path}: {e}") + continue # Add metadata manifest.metadata = { - "total_experiments": manifest.get_experiment_count(), - "adm_types": manifest.get_adm_types(), - "llm_backbones": manifest.get_llm_backbones(), - "kdma_combinations": manifest.get_kdma_combinations(), - "generated_at": None, # Will be set in build.py + "total_experiments": len(manifest.experiments), + "total_scenarios": len(manifest.indices.by_scenario), + "total_files": len(manifest.files), + "adm_types": list(manifest.indices.by_adm.keys()), + "llm_backbones": list(manifest.indices.by_llm.keys()), + "kdma_combinations": list(manifest.indices.by_kdma.keys()), } return manifest diff --git a/align_browser/static/app.js b/align_browser/static/app.js index 2a49b5d..30cdcbb 100644 --- a/align_browser/static/app.js +++ b/align_browser/static/app.js @@ -1,18 +1,33 @@ // Client-side application logic for ADM Results import { createInitialState, - updateUserSelections, - updateCurrentData, - getSelectedKey, createRunConfig, createParameterStructure, encodeStateToURL, - decodeStateFromURL + decodeStateFromURL, + loadManifest, + fetchRunData, + KDMAUtils, } from './state.js'; -document.addEventListener("DOMContentLoaded", () => { +// Constants +const TEXT_PREVIEW_LENGTH = 800; +const FLOATING_POINT_TOLERANCE = 0.001; +const KDMA_SLIDER_DEBOUNCE_MS = 500; + +// CSS Classes +const CSS_TABLE_LLM_SELECT = 'table-llm-select'; +const CSS_TABLE_ADM_SELECT = 'table-adm-select'; +const CSS_TABLE_SCENARIO_SELECT = 'table-scenario-select'; +const CSS_TABLE_RUN_VARIANT_SELECT = 'table-run-variant-select'; - let manifest = {}; +// HTML Templates +const HTML_NA_SPAN = 'N/A'; +const HTML_NO_OPTIONS_SPAN = 'No options available'; +const HTML_NO_SCENE_SPAN = 'No scene'; +const HTML_NO_KDMAS_SPAN = 'No KDMAs'; + +document.addEventListener("DOMContentLoaded", () => { // UI state persistence for expandable content const expandableStates = { @@ -22,121 +37,75 @@ document.addEventListener("DOMContentLoaded", () => { // Central application state initialized with functional state let appState = { - ...createInitialState(), - // Convert arrays to Sets to maintain existing behavior - availableScenarios: new Set(), - availableBaseScenarios: new Set(), - availableAdmTypes: new Set(), - availableKDMAs: new Set(), - availableLLMs: new Set(), - - // Run configuration factory - createRunConfig: function() { - return createRunConfig(appState); - } + ...createInitialState() }; - // Constants for run identification - const CURRENT_RUN_ID = 'current'; + // Standalone function to create run config from parameters + function createRunConfigFromParams(params) { + // Get context-specific available options using updateAppParameters with the run's parameters + let availableKDMAs = []; + let enhancedParams = { ...params }; + + if (window.updateAppParameters) { + const result = window.updateAppParameters({ + scenario: params.scenario, + scene: params.scene, + kdma_values: params.kdmaValues || {}, + adm: params.admType, + llm: params.llmBackbone, + run_variant: params.runVariant + }, {}); + + availableKDMAs = result.options.kdma_values || []; + + // Add all available options to params if they weren't provided + enhancedParams = { + ...params, + availableScenarios: params.availableScenarios || result.options.scenario || [], + availableScenes: params.availableScenes || result.options.scene || [], + availableAdmTypes: params.availableAdmTypes || result.options.adm || [], + availableLLMs: params.availableLLMs || result.options.llm || [] + }; + } + + return createRunConfig(enhancedParams, availableKDMAs); + } + - // Parameter storage by run ID - enables multi-run parameter management + // Parameter storage by run ID const columnParameters = new Map(); - // Use imported parameter structure factory - // Get parameters for any run ID function getParametersForRun(runId) { if (!columnParameters.has(runId)) { // Initialize with default parameters using auto-correction let defaultParams; - if (runId === CURRENT_RUN_ID) { - // For current run, use existing appState as starting point + // For pinned runs, initialize with the run's actual parameters + const run = appState.pinnedRuns.get(runId); + if (run) { defaultParams = createParameterStructure({ - scenario: appState.selectedScenario, - baseScenario: appState.selectedBaseScenario, - admType: appState.selectedAdmType, - llmBackbone: appState.selectedLLM, - kdmas: appState.activeKDMAs + scenario: run.scenario, + scene: run.scene, + admType: run.admType, + llmBackbone: run.llmBackbone, + kdmas: run.kdmaValues }); - } else { - // For pinned runs, initialize with the run's actual parameters - const run = appState.pinnedRuns.get(runId); - if (run) { - defaultParams = createParameterStructure({ - scenario: run.scenario, - baseScenario: run.baseScenario, - admType: run.admType, - llmBackbone: run.llmBackbone, - kdmas: run.kdmaValues - }); - } else { - // For truly new runs, start with auto-corrected valid combination - defaultParams = correctParametersToValid({}); - } } - columnParameters.set(runId, defaultParams); } return columnParameters.get(runId); } - // Set parameters for any run ID with validation - function setParametersForRun(runId, params) { - // Always validate parameters before storing - const validParams = correctParametersToValid(params, true); - columnParameters.set(runId, createParameterStructure(validParams)); - - return validParams; - } - - // Sync appState FROM current run parameters - function syncAppStateFromRun(runId = CURRENT_RUN_ID) { - if (runId === CURRENT_RUN_ID) { - const params = getParametersForRun(CURRENT_RUN_ID); - appState = updateUserSelections(appState, { - scenario: params.scenario, - baseScenario: params.baseScenario, - admType: params.admType, - llm: params.llmBackbone, - kdmas: { ...params.kdmas } - }); - } - } - - // Sync current run parameters FROM appState - function syncRunFromAppState() { - const params = { - scenario: appState.selectedScenario, - baseScenario: appState.selectedBaseScenario, - admType: appState.selectedAdmType, - llmBackbone: appState.selectedLLM, - kdmas: { ...appState.activeKDMAs } - }; - - const validParams = setParametersForRun(CURRENT_RUN_ID, params); - - // If auto-correction changed parameters, sync back to appState - if (validParams.scenario !== params.scenario || - validParams.admType !== params.admType || - validParams.llmBackbone !== params.llmBackbone || - JSON.stringify(validParams.kdmas) !== JSON.stringify(params.kdmas)) { - syncAppStateFromRun(CURRENT_RUN_ID); - return true; // Parameters were corrected - } - - return false; // No correction needed - } - // Update a parameter for any run with validation and UI sync - function updateParameterForRun(runId, paramType, newValue, updateUI = true) { + function updateParameterForRun(runId, paramType, newValue) { const params = getParametersForRun(runId); // Map parameter types to parameter structure fields const paramMap = { 'scenario': 'scenario', - 'baseScenario': 'baseScenario', + 'scene': 'scene', 'admType': 'admType', 'llmBackbone': 'llmBackbone', 'llm': 'llmBackbone', // alias @@ -147,24 +116,57 @@ document.addEventListener("DOMContentLoaded", () => { const paramField = paramMap[paramType] || paramType; params[paramField] = newValue; - // Apply auto-correction - const correctedParams = setParametersForRun(runId, params); + // Use updateAppParameters for validation instead of setParametersForRun + const stateParams = { + scenario: params.scenario || null, + scene: params.scene || null, + kdma_values: params.kdmas || {}, + adm: params.admType || null, + llm: params.llmBackbone || null, + run_variant: params.runVariant || null + }; + + const result = window.updateAppParameters(stateParams, {}); + const validParams = result.params; + const validOptions = result.options; - // Update UI if it's the current run - if (runId === CURRENT_RUN_ID && updateUI) { - syncAppStateFromRun(CURRENT_RUN_ID); - } + // Convert back to app.js format + const kdmas = validParams.kdma_values || {}; - return correctedParams; - } - - // Initialize the run context system after manifest is loaded - function initializeRunContextSystem() { - // Initialize current run parameters from appState - // This establishes the baseline for the current run state - syncRunFromAppState(); + const correctedParams = { + scenario: validParams.scenario, + scene: validParams.scene, + admType: validParams.adm, + llmBackbone: validParams.llm, + kdmas: kdmas, + runVariant: validParams.run_variant + }; - console.log('Run context system initialized with current run:', getParametersForRun(CURRENT_RUN_ID)); + // Store corrected parameters + columnParameters.set(runId, createParameterStructure(correctedParams)); + + // Update the actual run state + const run = appState.pinnedRuns.get(runId); + run.scenario = correctedParams.scenario; + run.scene = correctedParams.scene; + run.admType = correctedParams.admType; + run.llmBackbone = correctedParams.llmBackbone; + run.runVariant = correctedParams.runVariant; + run.kdmaValues = correctedParams.kdmas; + + // Store the available options for UI dropdowns + run.availableOptions = { + scenarios: validOptions.scenario || [], + scenes: validOptions.scene || [], + admTypes: validOptions.adm || [], + llms: validOptions.llm || [], + runVariants: validOptions.run_variant || [], + kdmas: { + validCombinations: validOptions.kdma_values || [] + } + }; + + return correctedParams; } // URL State Management System @@ -180,28 +182,24 @@ document.addEventListener("DOMContentLoaded", () => { const state = decodeStateFromURL(); if (state) { - // Restore selections - appState = updateUserSelections(appState, { - baseScenario: state.baseScenario || appState.selectedBaseScenario, - scenario: state.scenario || appState.selectedScenario, - admType: state.admType || appState.selectedAdmType, - llm: state.llm || appState.selectedLLM, - kdmas: state.kdmas || appState.activeKDMAs - }); - - // Sync restored state to current run parameters - syncRunFromAppState(); - // Restore pinned runs if (state.pinnedRuns && state.pinnedRuns.length > 0) { for (const runConfig of state.pinnedRuns) { - await pinRunFromConfig(runConfig); + // Convert runConfig to params format expected by addColumn + // Don't pass availableOptions - let addColumn calculate them fresh + const params = { + scenario: runConfig.scenario, + scene: runConfig.scene, + admType: runConfig.admType, + llmBackbone: runConfig.llmBackbone, + runVariant: runConfig.runVariant, + kdmaValues: runConfig.kdmaValues + }; + // Skip URL updates during batch restoration + await addColumn(params, { updateURL: false }); } - } - - // Load current run if configured - if (appState.selectedScenario) { - await loadResults(); + // Update URL once after all runs are restored + urlState.updateURL(); } return true; // Successfully restored @@ -212,551 +210,72 @@ document.addEventListener("DOMContentLoaded", () => { // Function to fetch and parse manifest.json async function fetchManifest() { - try { - const response = await fetch("./data/manifest.json"); - manifest = await response.json(); - console.log("Manifest loaded:", manifest); - extractParametersFromManifest(); - populateUIControls(); + const result = await loadManifest(); + window.updateAppParameters = result.updateAppParameters; - // Initialize run context system - initializeRunContextSystem(); - - // Try to restore state from URL, otherwise load results normally - const restoredFromURL = await urlState.restoreFromURL(); - if (!restoredFromURL) { - await loadResults(); // Load results initially only if not restored from URL - // Auto-pin the initial configuration if no pinned runs exist - if (appState.pinnedRuns.size === 0 && appState.currentInputOutput) { - // Ensure we have a valid display name before pinning - setTimeout(() => { - pinCurrentRun(); - }, 100); // Small delay to ensure appState is fully updated - } - } - } catch (error) { - console.error("Error fetching manifest:", error); - // Error will be displayed in the table - updateComparisonDisplay(); - } - } - - // Extract unique parameters and build validCombinations structure - function extractParametersFromManifest() { - appState.availableScenarios.clear(); - appState.availableBaseScenarios.clear(); - appState.availableAdmTypes.clear(); - appState.availableKDMAs.clear(); - appState.availableLLMs.clear(); - appState.validCombinations = {}; - - // Handle new manifest structure with experiment_keys - const experiments = manifest.experiment_keys || manifest; - - // First pass: collect all scenarios and base scenario IDs - for (const experimentKey in experiments) { - const experiment = experiments[experimentKey]; - for (const scenarioId in experiment.scenarios) { - appState.availableScenarios.add(scenarioId); - // Extract base scenario ID by removing index suffix - const baseScenarioId = scenarioId.replace(/-\d+$/, ""); - appState.availableBaseScenarios.add(baseScenarioId); - } - } - - // Second pass: build global parameter sets - for (const experimentKey in experiments) { - const experiment = experiments[experimentKey]; - for (const scenarioId in experiment.scenarios) { - const scenario = experiment.scenarios[scenarioId]; - const config = scenario.config; - if (!config) continue; - - const admType = config.adm ? config.adm.name : "unknown_adm"; - const llmBackbone = - config.adm && - config.adm.structured_inference_engine && - config.adm.structured_inference_engine.model_name - ? config.adm.structured_inference_engine.model_name - : "no_llm"; - - appState.availableAdmTypes.add(admType); - appState.availableLLMs.add(llmBackbone); - - if (!appState.validCombinations[admType]) { - appState.validCombinations[admType] = {}; - } - if (!appState.validCombinations[admType][llmBackbone]) { - appState.validCombinations[admType][llmBackbone] = {}; - } - - if (config.alignment_target && config.alignment_target.kdma_values) { - config.alignment_target.kdma_values.forEach((kdma_entry) => { - const kdma = kdma_entry.kdma; - const value = kdma_entry.value; - appState.availableKDMAs.add(kdma); - - if (!appState.validCombinations[admType][llmBackbone][kdma]) { - appState.validCombinations[admType][llmBackbone][kdma] = new Set(); - } - appState.validCombinations[admType][llmBackbone][kdma].add(value); - }); - } - } - } - - // Convert Sets to Arrays for easier use in UI - appState.availableScenarios = Array.from(appState.availableScenarios); - appState.availableBaseScenarios = Array.from(appState.availableBaseScenarios).sort(); - appState.availableAdmTypes = Array.from(appState.availableAdmTypes).sort(); - appState.availableKDMAs = Array.from(appState.availableKDMAs).sort(); - appState.availableLLMs = Array.from(appState.availableLLMs).sort(); - - // Convert inner Sets to sorted Arrays - for (const adm in appState.validCombinations) { - for (const llm in appState.validCombinations[adm]) { - for (const kdma in appState.validCombinations[adm][llm]) { - appState.validCombinations[adm][llm][kdma] = Array.from( - appState.validCombinations[adm][llm][kdma], - ).sort((a, b) => a - b); - } - } - } - - console.log("Valid Combinations (structured):", appState.validCombinations); - } - - // Core function that extracts parameters from experiment config - function extractParametersFromConfig(config) { - if (!config) return null; - - const admType = config.adm ? config.adm.name : "unknown_adm"; - const llmBackbone = config.adm && - config.adm.structured_inference_engine && - config.adm.structured_inference_engine.model_name - ? config.adm.structured_inference_engine.model_name - : "no_llm"; - - const kdmas = {}; - if (config.alignment_target && config.alignment_target.kdma_values) { - config.alignment_target.kdma_values.forEach((kdma_entry) => { - const kdma = kdma_entry.kdma; - const value = kdma_entry.value; - - if (!kdmas[kdma]) { - kdmas[kdma] = new Set(); - } - kdmas[kdma].add(value); - }); - } - - return { admType, llmBackbone, kdmas }; - } - - // Check if extracted parameters match given constraints - function matchesConstraints(constraints, scenarioId, params) { - if (constraints.scenario && constraints.scenario !== scenarioId) { - return false; - } - if (constraints.admType && constraints.admType !== params.admType) { - return false; - } - if (constraints.llmBackbone && constraints.llmBackbone !== params.llmBackbone) { - return false; - } - if (constraints.kdmas) { - // Check if all constraint KDMAs have matching values - for (const [kdmaName, requiredValue] of Object.entries(constraints.kdmas)) { - if (!params.kdmas[kdmaName] || !params.kdmas[kdmaName].has(requiredValue)) { - return false; - } - } - } - return true; - } - - // Core function that finds all valid options given constraints - function getValidOptionsForConstraints(constraints = {}) { - const experiments = manifest.experiment_keys || manifest; - const validOptions = { - scenarios: new Set(), - admTypes: new Set(), - llmBackbones: new Set(), - kdmas: {} // kdmaName -> Set of valid values - }; - - for (const expKey in experiments) { - const experiment = experiments[expKey]; + const initialResult = window.updateAppParameters({ + scenario: null, + scene: null, + kdma_values: [], + adm: null, + llm: null, + run_variant: null + }, {}); - for (const scenarioId in experiment.scenarios) { - const scenario = experiment.scenarios[scenarioId]; - const params = extractParametersFromConfig(scenario.config); - - if (params && matchesConstraints(constraints, scenarioId, params)) { - validOptions.scenarios.add(scenarioId); - validOptions.admTypes.add(params.admType); - validOptions.llmBackbones.add(params.llmBackbone); - - // Merge KDMAs - for (const [kdmaName, kdmaValues] of Object.entries(params.kdmas)) { - if (!validOptions.kdmas[kdmaName]) { - validOptions.kdmas[kdmaName] = new Set(); - } - kdmaValues.forEach(value => validOptions.kdmas[kdmaName].add(value)); - } - } - } - } - - return validOptions; - } - - // Convenience function to check if a specific parameter combination is valid - function isValidParameterCombination(scenario, admType, llmBackbone, kdmas, baseScenario = null, runVariant = null) { - // Check baseScenario/scenario consistency if both are provided - if (baseScenario && scenario) { - const scenarioBase = scenario.replace(/-\d+$/, ""); - if (scenarioBase !== baseScenario) { - return false; - } - } - - const constraints = { scenario, admType, llmBackbone, kdmas }; - const validOptions = getValidOptionsForConstraints(constraints); - - // Check if the basic combination is valid - if (!validOptions.scenarios.has(scenario)) { - return false; - } - - // If no run variant specified, combination is valid - if (!runVariant) { - return true; - } - - // Check if run variant exists for this ADM+LLM+KDMA combination - const baseKey = buildExperimentKey(admType, llmBackbone, kdmas); - const runVariantKey = `${baseKey}_${runVariant}`; - - return Object.keys(manifest.experiment_keys || {}).includes(runVariantKey); - } - - // Find a valid parameter combination given partial constraints and preferences - // Priority order: 1) Scenario (highest), 2) KDMA values, 3) ADM type, 4) LLM backbone (lowest) - function findValidParameterCombination(constraints = {}, preferences = {}, depth = 0) { - // Prevent infinite recursion - if (depth > 2) { - console.warn('Auto-correction recursion limit reached, using fallback'); - const allValidOptions = getValidOptionsForConstraints({}); - if (allValidOptions.scenarios.size > 0) { - const firstScenario = Array.from(allValidOptions.scenarios)[0]; - return { - scenario: firstScenario, - baseScenario: firstScenario.replace(/-\d+$/, ""), - admType: Array.from(allValidOptions.admTypes)[0], - llmBackbone: Array.from(allValidOptions.llmBackbones)[0], - kdmas: {}, - runVariant: constraints.runVariant || null - }; - } - } - // Start with current selections as baseline - const currentParams = { - scenario: constraints.scenario || appState.selectedScenario, - baseScenario: constraints.baseScenario || appState.selectedBaseScenario, - admType: constraints.admType || appState.selectedAdmType, - llmBackbone: constraints.llmBackbone || appState.selectedLLM, - kdmas: constraints.kdmas || { ...appState.activeKDMAs }, - runVariant: constraints.runVariant || appState.selectedRunVariant || null - }; - - // If current combination is already valid, return it - if (isValidParameterCombination(currentParams.scenario, currentParams.admType, currentParams.llmBackbone, currentParams.kdmas, currentParams.baseScenario, currentParams.runVariant)) { - return currentParams; - } - - // Check if just the run variant is invalid while base parameters are valid - if (currentParams.runVariant && isValidParameterCombination(currentParams.scenario, currentParams.admType, currentParams.llmBackbone, currentParams.kdmas, currentParams.baseScenario, null)) { - // Base parameters are valid, but run variant is not - reset run variant to null - return { - ...currentParams, - runVariant: null + // Store first valid parameters for auto-pinning but don't populate appState selections + const firstValidParams = { + scenario: initialResult.params.scenario, + scene: initialResult.params.scene, + admType: initialResult.params.adm, + llmBackbone: initialResult.params.llm, + runVariant: initialResult.params.run_variant, + kdmaValues: initialResult.params.kdma_values || {}, + availableScenarios: initialResult.options.scenario || [], + availableScenes: initialResult.options.scene || [], + availableAdmTypes: initialResult.options.adm || [], + availableLLMs: initialResult.options.llm || [] }; - } - - // Priority 1: Preserve scenario, adjust other parameters to make it work - // But only if scenario matches baseScenario (if baseScenario is specified) - const scenarioMatchesBase = !currentParams.baseScenario || - currentParams.scenario.replace(/-\d+$/, "") === currentParams.baseScenario; - - if (currentParams.scenario && scenarioMatchesBase) { - const validOptions = getValidOptionsForConstraints({ scenario: currentParams.scenario }); - - if (validOptions.admTypes.size > 0) { - // Try to preserve current ADM type if valid for this scenario - let selectedADM = currentParams.admType; - if (!validOptions.admTypes.has(selectedADM)) { - selectedADM = Array.from(validOptions.admTypes)[0]; - } - - const admOptions = getValidOptionsForConstraints({ - scenario: currentParams.scenario, - admType: selectedADM - }); - - if (admOptions.llmBackbones.size > 0) { - // Try to preserve LLM preference for this ADM, or current LLM - let selectedLLM = currentParams.llmBackbone; - const preferredLLM = preferences.llmPreferences && preferences.llmPreferences[selectedADM]; - - if (preferredLLM && admOptions.llmBackbones.has(preferredLLM)) { - selectedLLM = preferredLLM; - } else if (!admOptions.llmBackbones.has(selectedLLM)) { - selectedLLM = Array.from(admOptions.llmBackbones)[0]; - } - - const kdmaOptions = getValidOptionsForConstraints({ - scenario: currentParams.scenario, - admType: selectedADM, - llmBackbone: selectedLLM - }); - - if (Object.keys(kdmaOptions.kdmas).length > 0) { - // Try to preserve current KDMA values, adjust if needed - const correctedKDMAs = {}; - - // For each current KDMA, check if it's still valid - for (const [kdma, value] of Object.entries(currentParams.kdmas)) { - if (kdmaOptions.kdmas[kdma] && kdmaOptions.kdmas[kdma].has(value)) { - correctedKDMAs[kdma] = value; // Keep current value - } else if (kdmaOptions.kdmas[kdma] && kdmaOptions.kdmas[kdma].size > 0) { - const newValue = Array.from(kdmaOptions.kdmas[kdma])[0]; - correctedKDMAs[kdma] = newValue; // Use first valid value - } - } - - // If no KDMAs preserved, use first available - if (Object.keys(correctedKDMAs).length === 0) { - const firstKDMA = Object.keys(kdmaOptions.kdmas)[0]; - const firstValue = Array.from(kdmaOptions.kdmas[firstKDMA])[0]; - correctedKDMAs[firstKDMA] = firstValue; - } - - return { - scenario: currentParams.scenario, - baseScenario: currentParams.scenario.replace(/-\d+$/, ""), - admType: selectedADM, - llmBackbone: selectedLLM, - kdmas: correctedKDMAs, - runVariant: currentParams.runVariant - }; - } - } - } - } - - // Priority 0: Fix baseScenario/scenario inconsistency first, then restart auto-correction - if (currentParams.baseScenario && !scenarioMatchesBase) { - const matchingScenarios = Array.from(appState.availableScenarios).filter((scenarioId) => { - const extractedBase = scenarioId.replace(/-\d+$/, ""); - return extractedBase === currentParams.baseScenario; - }); - - if (matchingScenarios.length > 0) { - // Recursively call with corrected scenario - this reuses all existing logic - return findValidParameterCombination({ - ...constraints, - scenario: matchingScenarios[0] - }, preferences, depth + 1); - } - } - - // Priority 2: Preserve KDMA values, find scenario+ADM+LLM that supports them - if (Object.keys(currentParams.kdmas).length > 0) { - const allValidOptions = getValidOptionsForConstraints({}); - - // Try scenarios that match the current base scenario first - let scenariosToTry = Array.from(allValidOptions.scenarios); - if (currentParams.scenario) { - const currentBaseScenario = currentParams.scenario.replace(/-\d+$/, ""); - scenariosToTry.sort((a, b) => { - const aBase = a.replace(/-\d+$/, ""); - const bBase = b.replace(/-\d+$/, ""); - if (aBase === currentBaseScenario && bBase !== currentBaseScenario) return -1; - if (bBase === currentBaseScenario && aBase !== currentBaseScenario) return 1; - return 0; - }); - } - for (const scenario of scenariosToTry) { - const scenarioOptions = getValidOptionsForConstraints({ scenario }); - - for (const admType of scenarioOptions.admTypes) { - const admOptions = getValidOptionsForConstraints({ scenario, admType }); - - for (const llmBackbone of admOptions.llmBackbones) { - const kdmaOptions = getValidOptionsForConstraints({ scenario, admType, llmBackbone }); - - // Check if all current KDMAs are valid for this combination - let allKDMAsValid = true; - for (const [kdma, value] of Object.entries(currentParams.kdmas)) { - if (!kdmaOptions.kdmas[kdma] || !kdmaOptions.kdmas[kdma].has(value)) { - allKDMAsValid = false; - break; - } - } - - if (allKDMAsValid) { - return { - scenario, - baseScenario: scenario.replace(/-\d+$/, ""), - admType, - llmBackbone, - kdmas: currentParams.kdmas, - runVariant: currentParams.runVariant - }; - } - } - } - } - } - - // Priority 3: Preserve ADM type, adjust LLM and scenario - if (currentParams.admType) { - const validOptions = getValidOptionsForConstraints({ admType: currentParams.admType }); - - if (validOptions.llmBackbones.size > 0 && validOptions.scenarios.size > 0) { - // Try to preserve LLM preference - const preferredLLM = preferences.llmPreferences && preferences.llmPreferences[currentParams.admType]; - let selectedLLM = currentParams.llmBackbone; - - if (preferredLLM && validOptions.llmBackbones.has(preferredLLM)) { - selectedLLM = preferredLLM; - } else if (!validOptions.llmBackbones.has(selectedLLM)) { - selectedLLM = Array.from(validOptions.llmBackbones)[0]; - } - - // Find scenario that works with this ADM+LLM - const scenarioOptions = getValidOptionsForConstraints({ - admType: currentParams.admType, - llmBackbone: selectedLLM - }); - - let selectedScenario; - // Try to preserve base scenario - if (currentParams.scenario) { - const currentBaseScenario = currentParams.scenario.replace(/-\d+$/, ""); - const matchingScenarios = Array.from(scenarioOptions.scenarios).filter(s => - s.replace(/-\d+$/, "") === currentBaseScenario - ); - - if (matchingScenarios.length > 0) { - selectedScenario = matchingScenarios[0]; - } - } - - if (!selectedScenario) { - selectedScenario = Array.from(scenarioOptions.scenarios)[0]; - } - - const kdmaOptions = getValidOptionsForConstraints({ - scenario: selectedScenario, - admType: currentParams.admType, - llmBackbone: selectedLLM - }); - - if (Object.keys(kdmaOptions.kdmas).length > 0) { - const firstKDMA = Object.keys(kdmaOptions.kdmas)[0]; - const firstValue = Array.from(kdmaOptions.kdmas[firstKDMA])[0]; - - return { - scenario: selectedScenario, - baseScenario: selectedScenario.replace(/-\d+$/, ""), - admType: currentParams.admType, - llmBackbone: selectedLLM, - kdmas: { [firstKDMA]: firstValue }, - runVariant: currentParams.runVariant - }; - } - } - } - - // Priority 4 (Fallback): Find any valid combination - const allValidOptions = getValidOptionsForConstraints({}); - - if (allValidOptions.admTypes.size > 0) { - const firstValidADM = Array.from(allValidOptions.admTypes)[0]; - const admOptions = getValidOptionsForConstraints({ admType: firstValidADM }); - - if (admOptions.llmBackbones.size > 0 && admOptions.scenarios.size > 0) { - const firstValidLLM = Array.from(admOptions.llmBackbones)[0]; - const firstValidScenario = Array.from(admOptions.scenarios)[0]; - - const kdmaOptions = getValidOptionsForConstraints({ - scenario: firstValidScenario, - admType: firstValidADM, - llmBackbone: firstValidLLM - }); - - const correctedParams = { - scenario: firstValidScenario, - baseScenario: firstValidScenario.replace(/-\d+$/, ""), - admType: firstValidADM, - llmBackbone: firstValidLLM, - kdmas: {}, - runVariant: currentParams.runVariant - }; - - if (Object.keys(kdmaOptions.kdmas).length > 0) { - const firstKDMA = Object.keys(kdmaOptions.kdmas)[0]; - const firstValue = Array.from(kdmaOptions.kdmas[firstKDMA])[0]; - correctedParams.kdmas = { [firstKDMA]: firstValue }; + // Try to restore state from URL, otherwise auto-pin first valid configuration + const restoredFromURL = await urlState.restoreFromURL(); + if (!restoredFromURL) { + // Auto-pin the first valid configuration if no pinned runs exist + if (appState.pinnedRuns.size === 0 && firstValidParams.scenario) { + await addColumn(firstValidParams); } - - return correctedParams; } - } - - // Fallback: return original parameters (should not happen with valid manifest) - console.warn('No valid parameter combination found, returning original parameters'); - return currentParams; - } - - // Correct parameters to be valid while preserving user preferences - function correctParametersToValid(currentParams, preservePreferences = true) { - const preferences = preservePreferences ? { - llmPreferences: appState.llmPreferences - } : {}; - - return findValidParameterCombination(currentParams, preferences); } - function populateUIControls() { - // Initialize current run parameters with initial state - syncRunFromAppState(); - } - - // Handle LLM change for pinned runs - global for onclick access - window.handleRunLLMChange = async function(runId, newLLM) { + // Generic parameter change handler for simple cases + async function handleSimpleParameterChange(runId, parameter, value, options = {}) { await window.updatePinnedRunState({ runId, - parameter: 'llmBackbone', - value: newLLM, + parameter, + value, needsReload: true, - updateUI: false + updateUI: true, + ...options }); + } + + // Generic parameter change handler factory + const createParameterChangeHandler = (parameterName, options = {}) => { + return async function(runId, newValue) { + await handleSimpleParameterChange(runId, parameterName, newValue, options); + }; }; + // Simple parameter change handlers - global for onclick access + window.handleRunLLMChange = createParameterChangeHandler('llmBackbone', { updateUI: false }); + window.handleRunVariantChange = createParameterChangeHandler('runVariant'); + window.handleRunSceneChange = createParameterChangeHandler('scene'); + window.handleRunScenarioChange = createParameterChangeHandler('scenario'); + // Handle ADM type change for pinned runs - global for onclick access + // Special case: preserves LLM preferences per ADM type window.handleRunADMChange = async function(runId, newADM) { - console.log(`Changing ADM type for run ${runId} to ${newADM}`); - const run = appState.pinnedRuns.get(runId); - if (!run) { - console.warn(`Run ${runId} not found`); - return; - } // Initialize LLM preferences for this run if not present if (!run.llmPreferences) { @@ -768,157 +287,58 @@ document.addEventListener("DOMContentLoaded", () => { run.llmPreferences[run.admType] = run.llmBackbone; } - // Update ADM type with validation - const updatedParams = updateParameterForRun(runId, 'admType', newADM); + // Update ADM type with validation - this will also update available options + updateParameterForRun(runId, 'admType', newADM); // Try to restore LLM preference for the new ADM type - if (run.llmPreferences[newADM]) { - // Check if preferred LLM is valid for new ADM - const validOptions = getValidOptionsForConstraints({ - scenario: updatedParams.scenario, - admType: newADM - }); - - if (validOptions.llmBackbones.has(run.llmPreferences[newADM])) { - console.log(`Restoring LLM preference for ADM ${newADM}: ${run.llmPreferences[newADM]}`); - updateParameterForRun(runId, 'llmBackbone', run.llmPreferences[newADM]); - } - } - - // Reload data for this specific run - await reloadPinnedRun(runId); - - // Update URL state - urlState.updateURL(); - }; - - // Handle run variant change for pinned runs - global for onclick access - window.handleRunVariantChange = async function(runId, newVariant) { - console.log(`Changing run variant for run ${runId} to ${newVariant}`); - - const run = appState.pinnedRuns.get(runId); - if (!run) { - console.warn(`Run ${runId} not found`); - return; + if (run.llmPreferences[newADM] && run.availableOptions?.llms?.includes(run.llmPreferences[newADM])) { + updateParameterForRun(runId, 'llmBackbone', run.llmPreferences[newADM]); } - // Update run variant with validation - updateParameterForRun(runId, 'runVariant', newVariant === 'default' ? null : newVariant); - - // Reload data for this specific run - await reloadPinnedRun(runId); - - // Update URL state - urlState.updateURL(); - }; - - // Handle base scenario change for pinned runs - global for onclick access - window.handleRunBaseScenarioChange = async function(runId, newBaseScenario) { - console.log(`Changing base scenario for run ${runId} to ${newBaseScenario}`); - - const run = appState.pinnedRuns.get(runId); - if (!run) { - console.warn(`Run ${runId} not found`); - return; - } - - // Update base scenario with validation through central system - updateParameterForRun(runId, 'baseScenario', newBaseScenario); - - // After scenario change, validate and potentially reset KDMAs - await validateKDMAsForScenarioChange(runId); - - // Reload data for this specific run - await reloadPinnedRun(runId); - - // Update URL state - urlState.updateURL(); - }; - - // Handle specific scenario change for pinned runs - global for onclick access - window.handleRunSpecificScenarioChange = async function(runId, newScenario) { - console.log(`Changing specific scenario for run ${runId} to ${newScenario}`); - - // Update scenario with validation through central system - updateParameterForRun(runId, 'scenario', newScenario); - - // After scenario change, validate and potentially reset KDMAs - await validateKDMAsForScenarioChange(runId); - - // Reload data for this specific run - await reloadPinnedRun(runId); - - // Update URL state - urlState.updateURL(); - }; - - // Validate KDMAs after scenario change and reset if necessary - async function validateKDMAsForScenarioChange(runId) { - const run = appState.pinnedRuns.get(runId); - if (!run) return; - - // Check if current KDMA configuration is valid for the new scenario - const currentParams = getParametersForRun(runId); - const baseKey = buildExperimentKey(currentParams.admType, currentParams.llmBackbone, currentParams.kdmas); - - // Check if this combination exists for the current scenario - const experimentExists = Object.keys(manifest.experiment_keys || {}).some(key => { - if (key === baseKey || key.startsWith(baseKey + '_')) { - const experiment = manifest.experiment_keys[key]; - return experiment && experiment.scenarios && experiment.scenarios[currentParams.scenario]; - } - return false; + await window.updatePinnedRunState({ + runId, + needsReload: true, + updateUI: true }); + }; - if (!experimentExists) { - console.log(`Current KDMA configuration not valid for scenario ${currentParams.scenario}, resetting KDMAs`); - - // Get first valid KDMA combination for this scenario+ADM+LLM - const constraints = { - scenario: currentParams.scenario, - admType: currentParams.admType, - llmBackbone: currentParams.llmBackbone - }; - - const validOptions = getValidOptionsForConstraints(constraints); - - if (Object.keys(validOptions.kdmas).length > 0) { - // Build first valid KDMA combination - const newKDMAs = {}; - for (const [kdmaName, kdmaValues] of Object.entries(validOptions.kdmas)) { - if (kdmaValues.size > 0) { - newKDMAs[kdmaName] = Array.from(kdmaValues)[0]; - } - } - - console.log(`Resetting to valid KDMA configuration:`, newKDMAs); - - // Update both run state and column parameters - run.kdmaValues = newKDMAs; - currentParams.kdmas = newKDMAs; - columnParameters.set(runId, createParameterStructure(currentParams)); - - // Update comparison display to show new KDMA controls - updateComparisonDisplay(); - } - } - } // Handle adding KDMA to pinned run - global for onclick access - window.addKDMAToRun = function(runId) { + window.addKDMAToRun = async function(runId) { const run = appState.pinnedRuns.get(runId); - if (!run) return; const availableKDMAs = getValidKDMAsForRun(runId); const currentKDMAs = run.kdmaValues || {}; const maxKDMAs = getMaxKDMAsForRun(runId); + const minimumRequired = getMinimumRequiredKDMAs(runId); if (Object.keys(currentKDMAs).length >= maxKDMAs) { console.warn(`Cannot add KDMA: max limit (${maxKDMAs}) reached for run ${runId}`); return; } - // Find first available KDMA type + // If we have no KDMAs and need to add multiple at once for a valid combination + if (Object.keys(currentKDMAs).length === 0 && minimumRequired > 1) { + // Add a complete valid combination + const validCombinations = run.availableOptions?.kdmas?.validCombinations || []; + if (validCombinations.length > 0) { + // Find the first non-empty combination (skip unaligned empty combinations) + const firstNonEmptyCombination = validCombinations.find(combination => Object.keys(combination).length > 0); + + if (firstNonEmptyCombination) { + await updatePinnedRunState({ + runId, + parameter: 'kdmas', + value: { ...firstNonEmptyCombination }, + needsReload: true, + updateUI: true + }); + return; + } + } + } + + // Standard single-KDMA addition logic const availableTypes = Object.keys(availableKDMAs).filter(type => currentKDMAs[type] === undefined ); @@ -931,524 +351,116 @@ document.addEventListener("DOMContentLoaded", () => { const kdmaType = availableTypes[0]; const validValues = Array.from(availableKDMAs[kdmaType] || []); const initialValue = validValues.length > 0 ? validValues[0] : 0.0; - console.log(`Adding KDMA ${kdmaType} with initial value ${initialValue} to run ${runId}`); // Update KDMAs through the parameter validation system const newKDMAs = { ...currentKDMAs, [kdmaType]: initialValue }; - // Use the parameter update system to ensure validation - updateParameterForRun(runId, 'kdmas', newKDMAs); - - // Refresh the comparison display to show new KDMA control - updateComparisonDisplay(); - - // Reload experiment data for the new KDMA combination - reloadPinnedRun(runId); - - // Update URL state - urlState.updateURL(); - }; - - // Handle removing KDMA from pinned run - global for onclick access - window.removeKDMAFromRun = function(runId, kdmaType) { - const run = appState.pinnedRuns.get(runId); - if (!run) return; - - const currentKDMAs = { ...(run.kdmaValues || {}) }; - delete currentKDMAs[kdmaType]; - - // Use the parameter update system to ensure validation - updateParameterForRun(runId, 'kdmas', currentKDMAs); - - // Refresh the comparison display - updateComparisonDisplay(); - - // Reload experiment data for the new KDMA combination - reloadPinnedRun(runId); - - // Update URL state - urlState.updateURL(); + await updatePinnedRunState({ + runId, + parameter: 'kdmas', + value: newKDMAs, + needsReload: true, + updateUI: true + }); }; - // Handle KDMA type change for pinned run - global for onclick access - window.handleRunKDMATypeChange = function(runId, oldKdmaType, newKdmaType) { + // Helper function for KDMA updates + async function updateKDMAsForRun(runId, modifier, options = {}) { const run = appState.pinnedRuns.get(runId); if (!run) return; const currentKDMAs = { ...(run.kdmaValues || {}) }; - const currentValue = currentKDMAs[oldKdmaType]; - - // Remove old type and add new type - delete currentKDMAs[oldKdmaType]; - - // Get valid values for new type and adjust value if needed - const availableKDMAs = getValidKDMAsForRun(runId); - const validValues = availableKDMAs[newKdmaType] || []; - let newValue = currentValue; - - if (validValues.length > 0 && !validValues.includes(currentValue)) { - newValue = validValues[0]; // Use first valid value - } - - currentKDMAs[newKdmaType] = newValue; - - // Use the parameter update system to ensure validation - updateParameterForRun(runId, 'kdmas', currentKDMAs); + const updatedKDMAs = modifier(currentKDMAs); - // Refresh the comparison display - updateComparisonDisplay(); - - // Reload experiment data for the new KDMA combination - reloadPinnedRun(runId); - - // Update URL state - urlState.updateURL(); - }; + await updatePinnedRunState({ + runId, + parameter: 'kdmas', + value: updatedKDMAs, + needsReload: true, + updateUI: true, + ...options + }); + } - // Handle KDMA slider input for pinned run - global for onclick access - window.handleRunKDMASliderInput = function(runId, kdmaType, sliderElement) { + // Handle removing KDMA from pinned run - global for onclick access + window.removeKDMAFromRun = async function(runId, kdmaType) { const run = appState.pinnedRuns.get(runId); - if (!run) return; - - const rawValue = parseFloat(sliderElement.value); - - // Get valid values considering current KDMA constraints - const currentKDMAs = { ...(run.kdmaValues || {}) }; - - // Create a constraint that includes other KDMAs but NOT the one being changed - const constraintKDMAs = { ...currentKDMAs }; - delete constraintKDMAs[kdmaType]; // Remove the one we're changing - - const constraints = { - scenario: run.scenario, - admType: run.admType, - llmBackbone: run.llmBackbone - }; - - // Add other KDMAs as constraints if any exist - if (Object.keys(constraintKDMAs).length > 0) { - constraints.kdmas = constraintKDMAs; - } - - const validOptions = getValidOptionsForConstraints(constraints); - const validValues = Array.from(validOptions.kdmas[kdmaType] || []); - - // Snap to nearest valid value if we have valid values - let newValue = rawValue; - if (validValues.length > 0) { - newValue = validValues.reduce((closest, validValue) => - Math.abs(validValue - rawValue) < Math.abs(closest - rawValue) ? validValue : closest - ); - - // Update slider to show snapped value - if (newValue !== rawValue) { - sliderElement.value = newValue; - } - } - - // Update the display value immediately - const valueDisplay = document.getElementById(`kdma-value-${runId}-${kdmaType}`); - if (valueDisplay) { - valueDisplay.textContent = formatKDMAValue(newValue); - } - - currentKDMAs[kdmaType] = newValue; - - // Update the run state immediately to prevent bouncing - run.kdmaValues = currentKDMAs; - - // Update column parameters directly without validation - // since slider values are already validated - const params = getParametersForRun(runId); - params.kdmas = currentKDMAs; - columnParameters.set(runId, createParameterStructure(params)); + const kdmaOptions = run?.availableOptions?.kdmas; - // Debounce the reload to avoid too many requests while sliding - if (window.kdmaReloadTimeout) { - clearTimeout(window.kdmaReloadTimeout); - } - window.kdmaReloadTimeout = setTimeout(async () => { - await reloadPinnedRun(runId); - urlState.updateURL(); - }, 500); - }; - - // Internal function to load results without loading guard - async function loadResultsInternal() { - if (!appState.selectedScenario) { - // Message will be displayed in the table + await updateKDMAsForRun(runId, (kdmas) => { + const updated = { ...kdmas }; + delete updated[kdmaType]; - // Clear current data when no scenario - appState = updateCurrentData(appState, { - inputOutput: null, - inputOutputArray: null, - timing: null + // Check if the remaining combination is valid + const hasValidRemaining = kdmaOptions?.validCombinations?.some(combination => { + return KDMAUtils.deepEqual(updated, combination); }); - updateComparisonDisplay(); // Update table with no scenario state - return; - } - - const selectedKey = getSelectedKey(appState); - console.log( - "Attempting to load:", - selectedKey, - "Scenario:", - appState.selectedScenario, - ); - - // Handle new manifest structure with experiment_keys - const experiments = manifest.experiment_keys || manifest; - if ( - experiments[selectedKey] && - experiments[selectedKey].scenarios[appState.selectedScenario] - ) { - const dataPaths = experiments[selectedKey].scenarios[appState.selectedScenario]; - try { - const inputOutputArray = await (await fetch(dataPaths.input_output)).json(); - const timingData = await (await fetch(dataPaths.timing)).json(); - - // Extract the index from the scenario ID (e.g., "test_scenario_1-0" → 0) - const scenarioIndex = parseInt(appState.selectedScenario.split('-').pop()); - - // Get the specific element from each array using the index - const inputOutputItem = inputOutputArray[scenarioIndex]; - - // Helper function to format complex data structures cleanly - const formatValue = (value, depth = 0) => { - - if (value === null || value === undefined) { - return 'null'; - } - - if (typeof value === 'boolean') { - return `${value}`; - } - - if (typeof value === 'number') { - return `${value}`; - } - - if (typeof value === 'string') { - if (value.length > 100) { - return `
${value}
`; - } - return `${value}`; - } - - if (Array.isArray(value)) { - if (value.length === 0) { - return 'empty list'; - } - - let html = '
'; - value.forEach((item, index) => { - html += `
`; - html += `${index + 1}. `; - html += formatValue(item, depth + 1); - html += '
'; - }); - html += '
'; - return html; - } - - if (typeof value === 'object') { - const keys = Object.keys(value); - if (keys.length === 0) { - return 'empty object'; - } - - let html = '
'; - keys.forEach(key => { - html += `
`; - html += `${key}: `; - html += formatValue(value[key], depth + 1); - html += '
'; - }); - html += '
'; - return html; - } - - return String(value); - }; - - // Content will be displayed via the comparison table - - // Store current data for pinning - appState = updateCurrentData(appState, { - inputOutput: inputOutputItem, - inputOutputArray: inputOutputArray, - timing: timingData - }); - - - // Update comparison display (always-on table mode) - updateComparisonDisplay(); - } catch (error) { - console.error("Error fetching experiment data:", error); - // Error will be displayed in the table - - // Clear current data on error - appState = updateCurrentData(appState, { - inputOutput: null, - inputOutputArray: null, - timing: null - }); - updateComparisonDisplay(); // Update table with error state - } - } else { - // Try to find a fallback experiment key with run variant - let fallbackKey = null; - - // Look for keys that start with the base pattern - const availableKeys = Object.keys(experiments); - const basePattern = selectedKey; - - // First, try to find exact match with available variants - for (const key of availableKeys) { - if (key.startsWith(basePattern + '_') && experiments[key].scenarios[appState.selectedScenario]) { - fallbackKey = key; - break; - } - } - if (fallbackKey) { - console.log(`Using fallback key: ${fallbackKey} for requested key: ${selectedKey}`); - - // Auto-update the app state to use the run variant found - const variantSuffix = fallbackKey.substring(basePattern.length + 1); - if (variantSuffix) { - appState.selectedRunVariant = variantSuffix; - console.log(`Auto-selected run variant: ${variantSuffix}`); - } + // If remaining combination is not valid but empty combination is available, + // clear all KDMAs to reach the unaligned state + if (!hasValidRemaining) { + const hasEmptyOption = kdmaOptions?.validCombinations?.some(combination => { + return Object.keys(combination).length === 0; + }); - const dataPaths = experiments[fallbackKey].scenarios[appState.selectedScenario]; - try { - const inputOutputArray = await (await fetch(dataPaths.input_output)).json(); - const timingData = await (await fetch(dataPaths.timing)).json(); - - const scenarioIndex = parseInt(appState.selectedScenario.split('-').pop()); - const inputOutputItem = inputOutputArray[scenarioIndex]; - - appState = updateCurrentData(appState, { - inputOutput: inputOutputItem, - inputOutputArray: inputOutputArray, - timing: timingData - }); - - updateComparisonDisplay(); - return; - } catch (error) { - console.error("Error fetching fallback experiment data:", error); + if (hasEmptyOption) { + return {}; // Clear all KDMAs to reach unaligned state } } - // No data message will be displayed in the table - console.warn(`No data found for key: ${selectedKey}, scenario: ${appState.selectedScenario}`); - - // Clear current data when no data found - appState.currentInputOutput = null; - appState.currentTiming = null; - updateComparisonDisplay(); // Update table with no data state - } - } - - - // Pure function to load experiment data for any parameter combination - async function loadExperimentData(scenario, admType, llmBackbone, kdmas, runVariant = null) { - if (!scenario) { - return { - inputOutput: null, - inputOutputArray: null, - timing: null, - error: 'No scenario provided' - }; - } + return updated; // Normal removal + }); + }; - // Generate experiment key from parameters using shared utility - let experimentKey = buildExperimentKey(admType, llmBackbone, kdmas); + // Handle KDMA type change for pinned run - global for onclick access + window.handleRunKDMATypeChange = async function(runId, oldKdmaType, newKdmaType) { + const availableKDMAs = getValidKDMAsForRun(runId); - // Add run variant if provided - if (runVariant) { - experimentKey += `_${runVariant}`; - } - - console.log("Loading experiment data:", experimentKey, "Scenario:", scenario); - - // Handle new manifest structure with experiment_keys - const experiments = manifest.experiment_keys || manifest; - if ( - experiments[experimentKey] && - experiments[experimentKey].scenarios[scenario] - ) { - const dataPaths = experiments[experimentKey].scenarios[scenario]; - try { - const inputOutputArray = await (await fetch(dataPaths.input_output)).json(); - const timingData = await (await fetch(dataPaths.timing)).json(); - - // Extract the index from the scenario ID (e.g., "test_scenario_1-0" → 0) - const scenarioIndex = parseInt(scenario.split('-').pop()); - - // Get the specific element from each array using the index - const inputOutputItem = inputOutputArray[scenarioIndex]; - - return { - inputOutput: inputOutputItem, - inputOutputArray: inputOutputArray, - timing: timingData, - experimentKey: experimentKey, - error: null - }; - } catch (error) { - console.error("Error fetching experiment data:", error); - return { - inputOutput: null, - inputOutputArray: null, - timing: null, - experimentKey: experimentKey, - error: error.message - }; - } - } else { - // Try to find a fallback experiment key - let fallbackKey = null; + await updateKDMAsForRun(runId, (kdmas) => { + const updated = { ...kdmas }; + const currentValue = updated[oldKdmaType]; - // If a run variant was requested, try falling back to the base key (without run variant) - if (runVariant) { - const baseKey = buildExperimentKey(admType, llmBackbone, kdmas); - if (experiments[baseKey] && experiments[baseKey].scenarios[scenario]) { - fallbackKey = baseKey; - console.log(`Fallback: Using base key without run variant: ${fallbackKey} for requested key: ${experimentKey}`); - } - } + // Remove old type + delete updated[oldKdmaType]; - // If no fallback found yet, try to find any other variant for the same base - if (!fallbackKey) { - const availableKeys = Object.keys(experiments); - const baseKey = runVariant ? buildExperimentKey(admType, llmBackbone, kdmas) : experimentKey; - - // Look for keys that match the base pattern (either exact or with variants) - for (const key of availableKeys) { - if ((key === baseKey || key.startsWith(baseKey + '_')) && experiments[key].scenarios[scenario]) { - fallbackKey = key; - break; - } - } - } + // Get valid values for new type and adjust value if needed + const validValues = availableKDMAs[newKdmaType] || []; + let newValue = currentValue; - if (fallbackKey) { - console.log(`Using fallback key: ${fallbackKey} for requested key: ${experimentKey}`); - const dataPaths = experiments[fallbackKey].scenarios[scenario]; - try { - const inputOutputArray = await (await fetch(dataPaths.input_output)).json(); - const timingData = await (await fetch(dataPaths.timing)).json(); - - const scenarioIndex = parseInt(scenario.split('-').pop()); - const inputOutputItem = inputOutputArray[scenarioIndex]; - - return { - inputOutput: inputOutputItem, - inputOutputArray: inputOutputArray, - timing: timingData, - experimentKey: fallbackKey, // Return the actual key used - error: null - }; - } catch (error) { - console.error("Error fetching fallback experiment data:", error); - } + if (validValues.length > 0 && !validValues.some(v => Math.abs(v - newValue) < FLOATING_POINT_TOLERANCE)) { + newValue = validValues[0]; // Use first valid value } - // Generate debug information to help identify the issue - const similarKeys = Object.keys(experiments).filter(key => - key.startsWith(`${experimentKey.split('_')[0]}_${experimentKey.split('_')[1]}_`) - ); - - console.warn(`No data found for key: ${experimentKey}, scenario: ${scenario}`); - console.warn(`Available similar keys:`, similarKeys); - - return { - inputOutput: null, - inputOutputArray: null, - timing: null, - experimentKey: experimentKey, - error: `No experiment data found for ${experimentKey} with scenario ${scenario}` - }; - } - } - - // Function to load and display results for current run - async function loadResults() { - if (appState.isUpdatingProgrammatically) { - // Don't update results while we're in the middle of updating dropdowns - return; - } - - await loadResultsInternal(); - } + updated[newKdmaType] = newValue; + return updated; + }); + }; - // Pin current run to comparison - function pinCurrentRun() { - if (!appState.currentInputOutput) { - showNotification('No data to pin - load a configuration first', 'error'); - return; - } - - const runConfig = appState.createRunConfig(); + // Handle KDMA slider input for pinned run - global for onclick access + window.handleRunKDMASliderInput = async function(runId, kdmaType, sliderElement) { + const run = appState.pinnedRuns.get(runId); + if (!run) return; - // Store complete run data - const pinnedData = { - ...runConfig, - inputOutput: appState.currentInputOutput, - inputOutputArray: appState.currentInputOutputArray, - timing: appState.currentTiming, - loadStatus: 'loaded' - }; + const normalizedValue = KDMAUtils.normalizeValue(sliderElement.value); - appState.pinnedRuns.set(runConfig.id, pinnedData); - updateComparisonDisplay(); + // Update the display value immediately for responsiveness + const valueDisplay = document.getElementById(`kdma-value-${runId}-${kdmaType}`); + if (valueDisplay) { + valueDisplay.textContent = formatKDMAValue(normalizedValue); + } - // Update URL after pinning - urlState.updateURL(); - } + // Update the KDMA values with debouncing + await updateKDMAsForRun(runId, (kdmas) => ({ + ...kdmas, + [kdmaType]: normalizedValue + }), { + updateURL: true, + debounceMs: KDMA_SLIDER_DEBOUNCE_MS // Debounce to avoid too many requests while sliding + }); + }; - // Pin run from configuration (for URL restoration) - async function pinRunFromConfig(runConfig) { - // Set app state to match the configuration - appState.selectedBaseScenario = runConfig.baseScenario; - appState.selectedScenario = runConfig.scenario; - appState.selectedAdmType = runConfig.admType; - appState.selectedLLM = runConfig.llmBackbone; - appState.activeKDMAs = { ...runConfig.kdmaValues }; - - // Load the results for this configuration - try { - await loadResultsForConfig(runConfig); - - // Store complete run data - const pinnedData = { - ...runConfig, - inputOutput: appState.currentInputOutput, - inputOutputArray: appState.currentInputOutputArray, - timing: appState.currentTiming, - loadStatus: 'loaded' - }; - - appState.pinnedRuns.set(runConfig.id, pinnedData); - - } catch (error) { - console.warn('Failed to load data for pinned configuration:', error); - // Still add to pinned runs but mark as failed - const pinnedData = { - ...runConfig, - inputOutput: null, - timing: null, - loadStatus: 'error' - }; - appState.pinnedRuns.set(runConfig.id, pinnedData); - } - } - // Reload data for a specific pinned run after parameter changes (pure approach) async function reloadPinnedRun(runId) { const run = appState.pinnedRuns.get(runId); @@ -1459,54 +471,50 @@ document.addEventListener("DOMContentLoaded", () => { // Prevent concurrent reloads for the same run if (run.isReloading) { - console.log(`Skipping reload for run ${runId} - already in progress`); return; } - console.log(`Reloading data for run ${runId}`); - // Mark as reloading to prevent concurrent requests run.isReloading = true; // Show loading state run.loadStatus = 'loading'; - updateComparisonDisplay(); - + renderComparisonTable(); + // Get updated parameters from columnParameters const params = getParametersForRun(runId); + try { - // Load new data using pure function - no global state modification - const experimentData = await loadExperimentData( - params.scenario, - params.admType, - params.llmBackbone, - params.kdmas, - params.runVariant - ); + // Load new data using fetchRunData + const experimentData = await fetchRunData({ + scenario: params.scenario, + scene: params.scene, + admType: params.admType, + llmBackbone: params.llmBackbone, + kdmaValues: params.kdmas, + runVariant: params.runVariant + }); // Always update run parameters to reflect the intended state run.scenario = params.scenario; - run.baseScenario = params.baseScenario; + run.scene = params.scene; run.admType = params.admType; run.llmBackbone = params.llmBackbone; run.runVariant = params.runVariant; run.kdmaValues = { ...params.kdmas }; - if (experimentData.error) { - console.error(`Failed to load data for run ${runId}:`, experimentData.error); + if (!experimentData || !experimentData.inputOutput) { + console.error(`Failed to load data for run ${runId}: No data returned`); run.loadStatus = 'error'; - // Keep existing data but update parameters - run.experimentKey = experimentData.experimentKey || run.experimentKey; } else { // Update with new results run.experimentKey = experimentData.experimentKey; run.inputOutput = experimentData.inputOutput; run.inputOutputArray = experimentData.inputOutputArray; run.timing = experimentData.timing; + run.timing_s = experimentData.timing_s; run.loadStatus = 'loaded'; - - console.log(`Successfully reloaded run ${runId} with new data`); } } catch (error) { @@ -1514,7 +522,7 @@ document.addEventListener("DOMContentLoaded", () => { // Even on exception, update run parameters to reflect the intended state run.scenario = params.scenario; - run.baseScenario = params.baseScenario; + run.scene = params.scene; run.admType = params.admType; run.llmBackbone = params.llmBackbone; run.runVariant = params.runVariant; @@ -1525,48 +533,10 @@ document.addEventListener("DOMContentLoaded", () => { run.isReloading = false; } - // Re-render the comparison table (current run data is unaffected) - updateComparisonDisplay(); - } - - - // Load results for a specific configuration - async function loadResultsForConfig(config) { - // Temporarily set state to this config - const originalState = { - selectedBaseScenario: appState.selectedBaseScenario, - selectedScenario: appState.selectedScenario, - selectedAdmType: appState.selectedAdmType, - selectedLLM: appState.selectedLLM, - activeKDMAs: { ...appState.activeKDMAs } - }; - - // Set state to the config - appState.selectedBaseScenario = config.baseScenario; - appState.selectedScenario = config.scenario; - appState.selectedAdmType = config.admType; - appState.selectedLLM = config.llmBackbone; - appState.activeKDMAs = { ...config.kdmaValues }; - - try { - // Load results using existing logic - await loadResults(); - } finally { - // Restore original state - appState.selectedBaseScenario = originalState.selectedBaseScenario; - appState.selectedScenario = originalState.selectedScenario; - appState.selectedAdmType = originalState.selectedAdmType; - appState.selectedLLM = originalState.selectedLLM; - appState.activeKDMAs = originalState.activeKDMAs; - } - } - - // Update the comparison display with current + pinned runs - function updateComparisonDisplay() { - // Always use table mode - this is the "Always-On Comparison Mode" renderComparisonTable(); } + // Render the comparison table with pinned runs only function renderComparisonTable() { const container = document.getElementById('runs-container'); @@ -1650,7 +620,7 @@ document.addEventListener("DOMContentLoaded", () => { const parameters = new Map(); // Configuration parameters - parameters.set("base_scenario", { type: "string", required: true }); + parameters.set("scene", { type: "string", required: true }); parameters.set("scenario", { type: "string", required: true }); parameters.set("scenario_state", { type: "longtext", required: false }); parameters.set("available_choices", { type: "choices", required: false }); @@ -1677,7 +647,7 @@ document.addEventListener("DOMContentLoaded", () => { if (!run) return 'N/A'; // Configuration parameters - if (paramName === 'base_scenario') return run.baseScenario || 'N/A'; + if (paramName === 'scene') return run.scene || 'N/A'; if (paramName === 'scenario') return run.scenario || 'N/A'; if (paramName === 'adm_type') return run.admType || 'N/A'; if (paramName === 'llm_backbone') return run.llmBackbone || 'N/A'; @@ -1702,222 +672,123 @@ document.addEventListener("DOMContentLoaded", () => { if (paramName === 'adm_decision' && run.inputOutput?.output && run.inputOutput?.input?.choices) { const choiceIndex = run.inputOutput.output.choice; const choices = run.inputOutput.input.choices; - if (typeof choiceIndex === 'number' && choices[choiceIndex]) { - return choices[choiceIndex].unstructured || choices[choiceIndex].action_id || 'N/A'; - } - return 'N/A'; - } - - // Justification - proper path using Pydantic model structure - if (paramName === 'justification' && run.inputOutput?.output?.action) { - return run.inputOutput.output.action.justification || 'N/A'; - } - - // Timing data - if (paramName === 'probe_time' && run.timing && run.scenario) { - try { - // Extract the scenario index from the scenario ID (e.g., "test_scenario_1-0" → 0) - const scenarioIndex = parseInt(run.scenario.split('-').pop()); - if (scenarioIndex >= 0 && run.timing.raw_times_s && run.timing.raw_times_s[scenarioIndex] !== undefined) { - return run.timing.raw_times_s[scenarioIndex].toFixed(2); - } - } catch (error) { - console.warn('Error getting individual probe time:', error); - } - return 'N/A'; - } - - // Raw Data - if (paramName === 'input_output_json') { - if (run.inputOutputArray && run.scenario) { - try { - // Extract the scenario index from the scenario ID (e.g., "test_scenario_1-0" → 0) - const scenarioIndex = parseInt(run.scenario.split('-').pop()); - - if (scenarioIndex >= 0 && Array.isArray(run.inputOutputArray) && run.inputOutputArray[scenarioIndex]) { - return run.inputOutputArray[scenarioIndex]; - } - } catch (error) { - console.warn('Error getting input/output JSON:', error); - } + if (typeof choiceIndex === 'number' && choices[choiceIndex]) { + return choices[choiceIndex].unstructured || choices[choiceIndex].action_id || 'N/A'; } return 'N/A'; } - return 'N/A'; - } - - // Create dropdown HTML for LLM selection in table cells - function createLLMDropdownForRun(runId, currentValue) { - const run = appState.pinnedRuns.get(runId); - if (!run) return escapeHtml(currentValue); - - const validOptions = getValidOptionsForConstraints({ - scenario: run.scenario, - admType: run.admType - }); - const validLLMs = Array.from(validOptions.llmBackbones).sort(); - - let html = `'; - - return html; - } - - // Create dropdown HTML for ADM type selection in table cells - function createADMDropdownForRun(runId, currentValue) { - const run = appState.pinnedRuns.get(runId); - if (!run) return escapeHtml(currentValue); + // Justification - proper path using Pydantic model structure + if (paramName === 'justification' && run.inputOutput?.output?.action) { + return run.inputOutput.output.action.justification || 'N/A'; + } - const validOptions = getValidOptionsForConstraints({ - scenario: run.scenario - }); - const validADMs = Array.from(validOptions.admTypes).sort(); + // Timing data - comes from scene timing_s in manifest + if (paramName === 'probe_time' && run.timing_s !== undefined && run.timing_s !== null) { + return run.timing_s.toFixed(2); + } - let html = `'; + // Raw Data - inputOutput is already the correct object for this scene + if (paramName === 'input_output_json' && run.inputOutput) { + return run.inputOutput; + } - return html; + return 'N/A'; } - // Create dropdown HTML for base scenario selection in table cells - function createBaseScenarioDropdownForRun(runId, currentValue) { - // Check if run exists - const run = appState.pinnedRuns.get(runId); - if (!run) return escapeHtml(currentValue); - - // For base scenario, we show all available base scenarios - const availableBaseScenarios = Array.from(appState.availableBaseScenarios).sort(); - - let html = `'; + // Generic dropdown creation function + function createDropdownForRun(runId, currentValue, options) { + const { + optionsPath, + cssClass, + onChangeHandler, + noOptionsMessage = null, + preCondition = null + } = options; - return html; - } - - // Create dropdown HTML for specific scenario selection in table cells - function createSpecificScenarioDropdownForRun(runId, currentValue) { - // Check if run exists const run = appState.pinnedRuns.get(runId); if (!run) return escapeHtml(currentValue); - const baseScenarioId = run.baseScenario; + // Check pre-condition if provided + if (preCondition && !preCondition(run)) { + return noOptionsMessage || HTML_NA_SPAN; + } - if (!baseScenarioId) { - return 'No base scenario'; + // Get options from the specified path in run.availableOptions + const availableOptions = optionsPath.split('.').reduce((obj, key) => obj?.[key], run.availableOptions); + if (!availableOptions || availableOptions.length === 0) { + return noOptionsMessage || HTML_NO_OPTIONS_SPAN; } - const matchingScenarios = Array.from(appState.availableScenarios).filter((scenarioId) => { - const extractedBase = scenarioId.replace(/-\d+$/, ""); - return extractedBase === baseScenarioId; - }); + const sortedOptions = [...availableOptions].sort(); - if (matchingScenarios.length === 0) { - return 'No scenarios available'; - } + // Always disable dropdowns when there are few options + const isDisabled = availableOptions.length <= 1; + const disabledAttr = isDisabled ? 'disabled' : ''; - let html = ``; + sortedOptions.forEach(option => { + const selected = option === currentValue ? 'selected' : ''; + html += ``; }); html += ''; return html; } + // Dropdown configuration for different parameter types + const DROPDOWN_CONFIGS = { + llm: { + optionsPath: 'llms', + cssClass: CSS_TABLE_LLM_SELECT, + onChangeHandler: 'handleRunLLMChange' + }, + adm: { + optionsPath: 'admTypes', + cssClass: CSS_TABLE_ADM_SELECT, + onChangeHandler: 'handleRunADMChange' + }, + scene: { + optionsPath: 'scenes', + cssClass: CSS_TABLE_SCENARIO_SELECT, + onChangeHandler: 'handleRunSceneChange' + }, + scenario: { + optionsPath: 'scenarios', + cssClass: CSS_TABLE_SCENARIO_SELECT, + onChangeHandler: 'handleRunScenarioChange', + preCondition: (run) => run.scene, + noOptionsMessage: HTML_NO_SCENE_SPAN + } + }; + + // Generic dropdown creation factory + const createDropdownForParameter = (parameterType) => { + return (runId, currentValue) => { + const config = DROPDOWN_CONFIGS[parameterType]; + return createDropdownForRun(runId, currentValue, config); + }; + }; + + // Create dropdown functions using the factory + const createLLMDropdownForRun = createDropdownForParameter('llm'); + const createADMDropdownForRun = createDropdownForParameter('adm'); + const createSceneDropdownForRun = createDropdownForParameter('scene'); + const createSpecificScenarioDropdownForRun = createDropdownForParameter('scenario'); + // Create dropdown HTML for run variant selection in table cells function createRunVariantDropdownForRun(runId, currentValue) { const run = appState.pinnedRuns.get(runId); if (!run) return escapeHtml(currentValue); - // Use the run's actual runVariant instead of the passed currentValue - // This ensures we show the correct selection after parameter updates + // Use the run's actual runVariant to ensure correct selection after parameter updates const actualCurrentValue = run.runVariant; - - // Get available run variants for the current ADM+LLM+KDMA combination - // Use the same buildExperimentKey function that's used throughout the app - const baseKey = buildExperimentKey(run.admType, run.llmBackbone, run.kdmaValues); - - // Find all experiment keys that match this base pattern AND have data for the current scenario - const availableVariants = new Set(); - let hasExactMatch = false; - - for (const experimentKey of Object.keys(manifest.experiment_keys || {})) { - const experiment = manifest.experiment_keys[experimentKey]; - - // Only consider variants that have data for the current scenario - if (!experiment.scenarios[run.scenario]) { - continue; - } - - if (experimentKey === baseKey) { - hasExactMatch = true; - availableVariants.add('default'); - } else if (experimentKey.startsWith(baseKey + '_')) { - // Extract potential run variant from the key - const suffix = experimentKey.substring(baseKey.length + 1); // Remove base key and underscore - - // Only consider as run variant if it's NOT a KDMA extension - // KDMA extensions follow pattern: kdma-value (e.g., merit-0.0, affiliation-1.0) - // Run variants are typically words/phrases (e.g., greedy_w_cache, rerun) - const isKDMAExtension = /^[a-z_]+-(0\.?\d*|1\.0?)$/.test(suffix); - - if (!isKDMAExtension) { - availableVariants.add(suffix); - } - } - } - - // If no exact match for base key, don't add default option - // Just show available variants without auto-selection - - // Add default option only if base key exists without variant AND has data for current scenario - if (hasExactMatch) { - availableVariants.add('default'); - } - - // If no variants found, try to extract from the current run's experiment key - if (availableVariants.size === 0) { - // Try to extract run variant from the current experiment key being used - if (run.experimentKey && run.experimentKey.startsWith(baseKey + '_')) { - const extractedVariant = run.experimentKey.substring(baseKey.length + 1); - return escapeHtml(extractedVariant); - } - return escapeHtml(actualCurrentValue || 'N/A'); - } - - // If only one variant, show it without dropdown - if (availableVariants.size === 1) { - const variant = Array.from(availableVariants)[0]; - const displayValue = variant === 'default' ? '(default)' : variant; - return escapeHtml(displayValue); - } - - const sortedVariants = Array.from(availableVariants).sort(); - - let html = `'; - - return html; } // Get max KDMAs allowed for a specific run based on its constraints and current selections @@ -1925,91 +796,113 @@ document.addEventListener("DOMContentLoaded", () => { const run = appState.pinnedRuns.get(runId); if (!run) return 0; - // First check if we can add more KDMAs given current constraints - const currentKDMAs = run.kdmaValues || {}; - const currentCount = Object.keys(currentKDMAs).length; + const kdmaOptions = run.availableOptions?.kdmas; + if (!kdmaOptions || !kdmaOptions.validCombinations) { + return 1; // Default to at least 1 KDMA if no options available + } - // Try to see if adding one more KDMA is possible - const constraints = { - scenario: run.scenario, - admType: run.admType, - llmBackbone: run.llmBackbone - }; + // Find the maximum number of KDMAs in any valid combination + let maxKDMAs = 0; + kdmaOptions.validCombinations.forEach(combination => { + maxKDMAs = Math.max(maxKDMAs, Object.keys(combination).length); + }); - // If we have current KDMAs, include them as constraints - if (currentCount > 0) { - constraints.kdmas = { ...currentKDMAs }; + return Math.max(maxKDMAs, 1); // At least 1 KDMA should be possible + } + + // Get minimum required KDMAs for a run - if all combinations have the same count, return that count + function getMinimumRequiredKDMAs(runId) { + const run = appState.pinnedRuns.get(runId); + if (!run?.availableOptions?.kdmas?.validCombinations) { + return 1; // Default to 1 if no options available } - const validOptions = getValidOptionsForConstraints(constraints); - const availableTypes = Object.keys(validOptions.kdmas || {}).filter(type => - !currentKDMAs[type] - ); - - // If we can add more types, max is at least current + 1 - if (availableTypes.length > 0) { - return currentCount + 1; + const combinations = run.availableOptions.kdmas.validCombinations; + if (combinations.length === 0) { + return 1; } - // Otherwise, check what we actually have experimentally - const experiments = manifest.experiment_keys || manifest; - let maxKDMAs = currentCount; + // Filter out empty combinations (unaligned cases with 0 KDMAs) + const nonEmptyCombinations = combinations.filter(combination => Object.keys(combination).length > 0); - for (const expKey in experiments) { - if (expKey.startsWith(`${run.admType}_${run.llmBackbone}_`) && - experiments[expKey].scenarios && - experiments[expKey].scenarios[run.scenario]) { - - // Count KDMAs in this experiment key - const keyParts = expKey.split('_'); - let kdmaCount = 0; - for (let i = 2; i < keyParts.length; i++) { - if (keyParts[i].includes('-')) { - kdmaCount++; - } - } - maxKDMAs = Math.max(maxKDMAs, kdmaCount); - } + if (nonEmptyCombinations.length === 0) { + return 1; // Only empty combinations available } - return Math.max(maxKDMAs, 1); // At least 1 KDMA should be possible + // Get the count of KDMAs in each non-empty combination + const kdmaCounts = nonEmptyCombinations.map(combination => Object.keys(combination).length); + + // Check if all non-empty combinations have the same number of KDMAs + const firstCount = kdmaCounts[0]; + const allSameCount = kdmaCounts.every(count => count === firstCount); + + if (allSameCount && firstCount > 1) { + return firstCount; // All non-empty combinations require the same number > 1 + } + return 1; // Either mixed counts or all require 1, use single-add behavior } // Get valid KDMAs for a specific run function getValidKDMAsForRun(runId) { const run = appState.pinnedRuns.get(runId); - if (!run) return {}; - - // Include current KDMAs as constraints to ensure we only get valid combinations - const constraints = { - scenario: run.scenario, - admType: run.admType, - llmBackbone: run.llmBackbone - }; - - // If there are existing KDMAs, include them as constraints - if (run.kdmaValues && Object.keys(run.kdmaValues).length > 0) { - constraints.kdmas = { ...run.kdmaValues }; + if (!run?.availableOptions?.kdmas?.validCombinations) { + return {}; } - const validOptions = getValidOptionsForConstraints(constraints); + // Extract all available types and values from valid combinations + const availableOptions = {}; + run.availableOptions.kdmas.validCombinations.forEach(combination => { + Object.entries(combination).forEach(([kdmaType, value]) => { + if (!availableOptions[kdmaType]) { + availableOptions[kdmaType] = new Set(); + } + availableOptions[kdmaType].add(value); + }); + }); - return validOptions.kdmas; + return availableOptions; } - // Check if removing KDMAs is allowed for a run (i.e., experiments exist without KDMAs) - function canRemoveKDMAsForRun(runId) { + // Get valid KDMA types that can be selected for a specific run + function getValidKDMATypesForRun(runId, currentKdmaType, currentKDMAs) { const run = appState.pinnedRuns.get(runId); - if (!run) return false; + if (!run?.availableOptions?.kdmas?.validCombinations) { + return [currentKdmaType]; // Fallback to just current type + } - // Check if there are any experiments for this ADM/LLM combination without KDMAs - const experiments = manifest.experiment_keys || manifest; - const baseKey = `${run.admType}_${run.llmBackbone}`; + const validTypes = new Set([currentKdmaType]); // Always include current type + + // For each unused KDMA type, check if replacing current type would create valid combination + const availableKDMAs = getValidKDMAsForRun(runId); + Object.keys(availableKDMAs).forEach(kdmaType => { + // Skip if this type is already used (except current one we're replacing) + if (kdmaType !== currentKdmaType && currentKDMAs[kdmaType] !== undefined) { + return; + } + + // Test if this type can be used by checking valid combinations + const testKDMAs = { ...currentKDMAs }; + delete testKDMAs[currentKdmaType]; // Remove current type + + // If we're adding a different type, add it with any valid value + if (kdmaType !== currentKdmaType) { + const validValues = Array.from(availableKDMAs[kdmaType] || []); + if (validValues.length > 0) { + testKDMAs[kdmaType] = validValues[0]; // Use first valid value for testing + } + } + + // Check if this combination exists in validCombinations + const isValidCombination = run.availableOptions.kdmas.validCombinations.some(combination => { + return KDMAUtils.deepEqual(testKDMAs, combination); + }); + + if (isValidCombination) { + validTypes.add(kdmaType); + } + }); - // Look for experiments that match the base key exactly (no KDMAs) - return experiments.hasOwnProperty(baseKey) && - experiments[baseKey].scenarios && - experiments[baseKey].scenarios[run.scenario]; + return Array.from(validTypes).sort(); } // Check if a specific KDMA can be removed from a run @@ -2018,100 +911,83 @@ document.addEventListener("DOMContentLoaded", () => { if (!run) return false; const currentKDMAs = run.kdmaValues || {}; + const kdmaOptions = run.availableOptions?.kdmas; + if (!kdmaOptions || !kdmaOptions.validCombinations) { + return false; + } // Create a copy of current KDMAs without the one we want to remove const remainingKDMAs = { ...currentKDMAs }; delete remainingKDMAs[kdmaType]; - // If no KDMAs would remain, use the original canRemoveKDMAsForRun check - if (Object.keys(remainingKDMAs).length === 0) { - return canRemoveKDMAsForRun(runId); + // Check if the remaining KDMA combination exists in validCombinations + const hasValidRemaining = kdmaOptions.validCombinations.some(combination => { + return KDMAUtils.deepEqual(remainingKDMAs, combination); + }); + + if (hasValidRemaining) { + return true; // Normal case - remaining combination is valid + } + + // Special case: If empty combination {} is valid (unaligned case), + // allow removal of any KDMA (will result in clearing all KDMAs) + const hasEmptyOption = kdmaOptions.validCombinations.some(combination => { + return Object.keys(combination).length === 0; + }); + + if (hasEmptyOption) { + return true; } - // Check if experiments exist with the remaining KDMAs for this specific scenario - // We need to directly check the manifest instead of using getValidOptionsForConstraints - // because that function might be too permissive - return checkExperimentExistsForScenario(run.scenario, run.admType, run.llmBackbone, remainingKDMAs); + return false; } // Format KDMA value consistently across the application function formatKDMAValue(value) { - return typeof value === 'number' ? value.toFixed(1) : value; + return KDMAUtils.formatValue(value); } - // Generate experiment key from parameters (shared utility function) - function buildExperimentKey(admType, llmBackbone, kdmas) { - const kdmaParts = []; - Object.entries(kdmas || {}).forEach(([kdma, value]) => { - kdmaParts.push(`${kdma}-${formatKDMAValue(value)}`); - }); - const kdmaString = kdmaParts.sort().join("_"); - return kdmaString ? `${admType}_${llmBackbone}_${kdmaString}` : `${admType}_${llmBackbone}`; - } - - // Check if experiments exist for a specific scenario with given parameters - function checkExperimentExistsForScenario(scenario, admType, llmBackbone, kdmas) { - const experiments = manifest.experiment_keys || manifest; - - // Build the experiment key using shared utility - const experimentKey = buildExperimentKey(admType, llmBackbone, kdmas); - - // Check if this experiment exists and has the target scenario - if (experiments[experimentKey] && - experiments[experimentKey].scenarios && - experiments[experimentKey].scenarios[scenario]) { - return true; + // Check if we can add another KDMA given current KDMA values + function canAddKDMAToRun(runId, currentKDMAs) { + const run = appState.pinnedRuns.get(runId); + if (!run?.availableOptions?.kdmas?.validCombinations) { + return false; } - // If direct key lookup fails, try all possible orderings of KDMAs - // since the experiment keys might have different KDMA ordering - const kdmaKeys = Object.keys(kdmas || {}); - if (kdmaKeys.length > 1) { - const permutations = getKDMAPermutations(kdmaKeys); - for (const permutation of permutations) { - const reorderedKdmas = {}; - permutation.forEach(kdmaName => { - if (kdmas[kdmaName] !== undefined) { - reorderedKdmas[kdmaName] = kdmas[kdmaName]; - } - }); - - const altKey = buildExperimentKey(admType, llmBackbone, reorderedKdmas); - if (experiments[altKey] && - experiments[altKey].scenarios && - experiments[altKey].scenarios[scenario]) { - return true; - } - } + const currentKDMAEntries = Object.entries(currentKDMAs || {}); + const maxKDMAs = getMaxKDMAsForRun(runId); + + // First check if we're already at max + if (currentKDMAEntries.length >= maxKDMAs) { + return false; } - return false; - } - - // Generate all permutations of KDMA keys for experiment key lookup - function getKDMAPermutations(kdmaKeys) { - if (kdmaKeys.length <= 1) return [kdmaKeys]; - - const permutations = []; - for (let i = 0; i < kdmaKeys.length; i++) { - const rest = kdmaKeys.slice(0, i).concat(kdmaKeys.slice(i + 1)); - const restPermutations = getKDMAPermutations(rest); - for (const perm of restPermutations) { - permutations.push([kdmaKeys[i]].concat(perm)); + // Check if there are any valid combinations that: + // 1. Include all current KDMAs with their exact values + // 2. Have at least one additional KDMA + return run.availableOptions.kdmas.validCombinations.some(combination => { + + const combinationKeys = Object.keys(combination); + if (combinationKeys.length <= currentKDMAEntries.length) { + return false; } - } - return permutations; + + // Check if this combination includes all current KDMAs with matching values + return currentKDMAEntries.every(([kdmaType, value]) => { + return combination.hasOwnProperty(kdmaType) && + Math.abs(combination[kdmaType] - value) < FLOATING_POINT_TOLERANCE; + }); + }); } // Create KDMA controls HTML for table cells function createKDMAControlsForRun(runId, currentKDMAs) { const run = appState.pinnedRuns.get(runId); - if (!run) return 'N/A'; + if (!run) return HTML_NA_SPAN; - const maxKDMAs = getMaxKDMAsForRun(runId); const currentKDMAEntries = Object.entries(currentKDMAs || {}); - const canAddMore = currentKDMAEntries.length < maxKDMAs; + const canAddMore = canAddKDMAToRun(runId, currentKDMAs); let html = `
`; @@ -2121,22 +997,12 @@ document.addEventListener("DOMContentLoaded", () => { }); // Add button - always show but enable/disable based on availability - const availableKDMAs = getValidKDMAsForRun(runId); - const availableTypes = Object.keys(availableKDMAs).filter(type => - !currentKDMAs || currentKDMAs[type] === undefined - ); - - const canAdd = canAddMore && availableTypes.length > 0; - const disabledAttr = canAdd ? '' : 'disabled'; + const disabledAttr = canAddMore ? '' : 'disabled'; // Determine tooltip text for disabled state let tooltipText = ''; - if (!canAdd) { - if (!canAddMore) { - tooltipText = `title="Maximum KDMAs reached (${maxKDMAs})"`; - } else { - tooltipText = 'title="All available KDMA types have been added"'; - } + if (!canAddMore) { + tooltipText = 'title="No valid KDMA combinations available with current values"'; } html += ` +
`; + } + + // Format KDMA association bar for choice display + function formatKDMAAssociationBar(kdma, val) { + const percentage = Math.round(val * 100); + const color = val >= 0.7 ? '#28a745' : val >= 0.4 ? '#ffc107' : '#dc3545'; + return `
+ ${kdma} +
+
+
+ ${val.toFixed(2)} +
`; + } + + // Format single choice item with KDMA associations + function formatChoiceItem(choice) { + let html = `
+
${escapeHtml(choice.unstructured || choice.description || 'No description')}
`; + + // Add KDMA associations if available + if (choice.kdma_association) { + html += '
'; + html += '
KDMA Association Truth
'; + Object.entries(choice.kdma_association).forEach(([kdma, val]) => { + html += formatKDMAAssociationBar(kdma, val); + }); + html += '
'; + } + html += '
'; + return html; + } + + // Format choices array for display + function formatChoicesValue(choices) { + if (!Array.isArray(choices)) { + return escapeHtml(choices.toString()); + } + + let html = '
'; + choices.forEach((choice) => { + html += formatChoiceItem(choice); + }); + html += '
'; + return html; + } + + // Format KDMA values object for display + function formatKDMAValuesObject(kdmaObject) { + const kdmaEntries = Object.entries(kdmaObject); + if (kdmaEntries.length === 0) { + return HTML_NO_KDMAS_SPAN; } + let html = '
'; + kdmaEntries.forEach(([kdmaName, kdmaValue]) => { + html += `
+ ${escapeHtml(kdmaName)}: + ${formatKDMAValue(kdmaValue)} +
`; + }); + html += '
'; + return html; + } + + // Format values for display in table cells + function formatValue(value, type, paramName = '', runId = '') { if (value === null || value === undefined || value === 'N/A') { - return 'N/A'; + return HTML_NA_SPAN; } - // Special handling for editable parameters in pinned runs - if (runId !== 'current' && runId !== '') { - if (paramName === 'llm_backbone') { - return createLLMDropdownForRun(runId, value); - } - if (paramName === 'adm_type') { - return createADMDropdownForRun(runId, value); - } - if (paramName === 'base_scenario') { - return createBaseScenarioDropdownForRun(runId, value); - } - if (paramName === 'scenario') { - return createSpecificScenarioDropdownForRun(runId, value); - } - if (paramName === 'kdma_values') { - return createKDMAControlsForRun(runId, value); - } + // Handle dropdown parameters for pinned runs + if (runId !== '' && PARAMETER_DROPDOWN_HANDLERS[paramName]) { + return PARAMETER_DROPDOWN_HANDLERS[paramName](runId, value); } switch (type) { @@ -2234,21 +1192,9 @@ document.addEventListener("DOMContentLoaded", () => { return typeof value === 'number' ? value.toFixed(3) : value.toString(); case 'longtext': - if (typeof value === 'string' && value.length > 800) { - const truncated = value.substring(0, 800); - // Include runId for per-column state persistence + if (typeof value === 'string' && value.length > TEXT_PREVIEW_LENGTH) { const id = `text_${paramName}_${runId}_${type}`; - const isExpanded = expandableStates.text.get(id) || false; - - const shortDisplay = isExpanded ? 'none' : 'inline'; - const fullDisplay = isExpanded ? 'inline' : 'none'; - const buttonText = isExpanded ? 'Show Less' : 'Show More'; - - return `
- ${escapeHtml(truncated)}... - ${escapeHtml(value)} - -
`; + return createExpandableContent(value, id, true); } return escapeHtml(value.toString()); @@ -2256,75 +1202,14 @@ document.addEventListener("DOMContentLoaded", () => { return escapeHtml(value.toString()); case 'choices': - if (Array.isArray(value)) { - let choicesHtml = '
'; - value.forEach((choice) => { - choicesHtml += `
-
${escapeHtml(choice.unstructured || choice.description || 'No description')}
`; - - // Add KDMA associations if available - if (choice.kdma_association) { - choicesHtml += '
'; - choicesHtml += '
KDMA Association Truth
'; - Object.entries(choice.kdma_association).forEach(([kdma, val]) => { - const percentage = Math.round(val * 100); - const color = val >= 0.7 ? '#28a745' : val >= 0.4 ? '#ffc107' : '#dc3545'; - choicesHtml += `
- ${kdma} -
-
-
- ${val.toFixed(2)} -
`; - }); - choicesHtml += '
'; - } - choicesHtml += '
'; - }); - choicesHtml += '
'; - return choicesHtml; - } - return escapeHtml(value.toString()); + return formatChoicesValue(value); case 'kdma_values': - if (typeof value === 'object' && value !== null) { - const kdmaEntries = Object.entries(value); - if (kdmaEntries.length === 0) { - return 'No KDMAs'; - } - - let kdmaHtml = '
'; - kdmaEntries.forEach(([kdmaName, kdmaValue]) => { - kdmaHtml += `
- ${escapeHtml(kdmaName)}: - ${formatKDMAValue(kdmaValue)} -
`; - }); - kdmaHtml += '
'; - return kdmaHtml; - } - return 'N/A'; + return formatKDMAValuesObject(value); case 'object': - if (typeof value === 'object') { - // Include runId for per-column state persistence - const id = `object_${paramName}_${runId}_${type}`; - const isExpanded = expandableStates.objects.get(id) || false; - - const preview = getObjectPreview(value); - const fullJson = JSON.stringify(value, null, 2); - - const previewDisplay = isExpanded ? 'none' : 'inline'; - const fullDisplay = isExpanded ? 'block' : 'none'; - const buttonText = isExpanded ? 'Show Preview' : 'Show Details'; - - return `
- ${escapeHtml(preview)} -
${escapeHtml(fullJson)}
- -
`; - } - return escapeHtml(value.toString()); + const id = `object_${paramName}_${runId}_${type}`; + return createExpandableContent(value, id, false); default: return escapeHtml(value.toString()); @@ -2348,7 +1233,7 @@ document.addEventListener("DOMContentLoaded", () => { // Handle numeric comparison with floating point tolerance if (typeof val1 === 'number' && typeof val2 === 'number') { - return Math.abs(val1 - val2) < 0.001; + return Math.abs(val1 - val2) < FLOATING_POINT_TOLERANCE; } // Handle string comparison @@ -2366,88 +1251,99 @@ document.addEventListener("DOMContentLoaded", () => { } // Handle object comparison - if (typeof val1 === 'object' && typeof val2 === 'object') { - const keys1 = Object.keys(val1); - const keys2 = Object.keys(val2); - - if (keys1.length !== keys2.length) return false; - - for (const key of keys1) { - if (!keys2.includes(key)) return false; - if (!compareValues(val1[key], val2[key])) return false; - } - return true; + const keys1 = Object.keys(val1); + const keys2 = Object.keys(val2); + + if (keys1.length !== keys2.length) return false; + + for (const key of keys1) { + if (!keys2.includes(key)) return false; + if (!compareValues(val1[key], val2[key])) return false; + } + return true; + } + + // Add a column with specific parameters (no appState manipulation) + async function addColumn(params, options = {}) { + if (!params.scenario) { + console.warn('No scenario provided for addColumn'); + return; } + + // Create run config from parameters + const runConfig = createRunConfigFromParams(params); - return false; + // Fetch data for these parameters + const runData = await fetchRunData({ + scenario: params.scenario, + scene: params.scene, + admType: params.admType, + llmBackbone: params.llmBackbone, + runVariant: params.runVariant, + kdmaValues: params.kdmaValues + }); + + if (!runData || !runData.inputOutput) { + throw new Error('No data found for parameters'); + } + + // Store complete run data + const pinnedData = { + ...runConfig, + inputOutput: runData.inputOutput, + inputOutputArray: runData.inputOutputArray, + timing: runData.timing, + timing_s: runData.timing_s, + loadStatus: 'loaded' + }; + + appState.pinnedRuns.set(runConfig.id, pinnedData); + renderComparisonTable(); + + // Only update URL if not explicitly disabled (e.g., during batch restoration) + if (options.updateURL !== false) { + urlState.updateURL(); + } + + return runConfig.id; // Return the ID for reference } function getObjectPreview(obj) { - if (!obj || typeof obj !== 'object') return 'N/A'; + if (!obj) return 'N/A'; const keys = Object.keys(obj); if (keys.length === 0) return '{}'; - if (keys.length === 1 && typeof obj[keys[0]] !== 'object') { + if (keys.length === 1) { return `${keys[0]}: ${obj[keys[0]]}`; } return `{${keys.slice(0, 3).join(', ')}${keys.length > 3 ? '...' : ''}}`; } - // Add a new column by duplicating the rightmost column's parameters - async function addNewColumn() { - if (appState.pinnedRuns.size === 0) return; + // Copy the rightmost column's parameters to create a new column + async function copyColumn() { + if (appState.pinnedRuns.size === 0) { + console.warn('No columns to copy from'); + return; + } - // Get the rightmost (last) pinned run + // Get parameters from the rightmost (last) pinned run const pinnedRunsArray = Array.from(appState.pinnedRuns.values()); const lastRun = pinnedRunsArray[pinnedRunsArray.length - 1]; - // Temporarily update app state to match the last run's configuration - const originalState = { - selectedBaseScenario: appState.selectedBaseScenario, - selectedScenario: appState.selectedScenario, - selectedAdmType: appState.selectedAdmType, - selectedLLM: appState.selectedLLM, - activeKDMAs: { ...appState.activeKDMAs } + const params = { + scene: lastRun.scene, + scenario: lastRun.scenario, + admType: lastRun.admType, + llmBackbone: lastRun.llmBackbone, + runVariant: lastRun.runVariant, + kdmaValues: lastRun.kdmaValues, + availableScenarios: lastRun.availableOptions?.scenarios || [], + availableScenes: lastRun.availableOptions?.scenes || [], + availableAdmTypes: lastRun.availableOptions?.admTypes || [], + availableLLMs: lastRun.availableOptions?.llms || [] }; - appState.selectedBaseScenario = lastRun.baseScenario; - appState.selectedScenario = lastRun.scenario; - appState.selectedAdmType = lastRun.admType; - appState.selectedLLM = lastRun.llmBackbone; - appState.activeKDMAs = { ...lastRun.kdmaValues }; - - // Pin directly without duplicate checking since we want to allow duplicates for comparison - const runConfig = appState.createRunConfig(); - - try { - await loadResultsForConfig(runConfig); - - // Store complete run data - const pinnedData = { - ...runConfig, - inputOutput: appState.currentInputOutput, - inputOutputArray: appState.currentInputOutputArray, - timing: appState.currentTiming, - loadStatus: 'loaded' - }; - - appState.pinnedRuns.set(runConfig.id, pinnedData); - updateComparisonDisplay(); - urlState.updateURL(); - - } catch (error) { - console.warn('Failed to load data for new column:', error); - // Still add to pinned runs but mark as failed - const pinnedData = { - ...runConfig, - loadStatus: 'failed', - error: error.message - }; - appState.pinnedRuns.set(runConfig.id, pinnedData); - updateComparisonDisplay(); - } - - // Restore original app state - Object.assign(appState, originalState); + // Use the new addColumn function + return await addColumn(params); } // Toggle functions for expandable content @@ -2558,7 +1454,7 @@ document.addEventListener("DOMContentLoaded", () => { // Update UI if requested if (updateUI) { - updateComparisonDisplay(); + renderComparisonTable(); } // Update URL state if requested @@ -2605,25 +1501,10 @@ document.addEventListener("DOMContentLoaded", () => { // Make removePinnedRun globally accessible for onclick handlers window.removeRun = removeRun; - // Display name generation uses imported function - - function showNotification(message, type = 'info') { - const notification = document.createElement('div'); - notification.className = `notification notification-${type}`; - notification.textContent = message; - notification.style.cssText = ` - position: fixed; top: 20px; right: 20px; padding: 10px 20px; - background: ${type === 'error' ? '#f44336' : type === 'success' ? '#4caf50' : '#2196F3'}; - color: white; border-radius: 4px; z-index: 1000; - `; - document.body.appendChild(notification); - setTimeout(() => notification.remove(), 3000); - } - // Initialize static button event listeners const addColumnBtn = document.getElementById('add-column-btn'); if (addColumnBtn) { - addColumnBtn.addEventListener('click', addNewColumn); + addColumnBtn.addEventListener('click', copyColumn); } // Initial manifest fetch on page load diff --git a/align_browser/static/index.html b/align_browser/static/index.html index de7f611..6e64d70 100644 --- a/align_browser/static/index.html +++ b/align_browser/static/index.html @@ -28,12 +28,12 @@

Align System Experiments

- - Base Scenario - Scenario + + Scene + Scenario State diff --git a/align_browser/static/state.js b/align_browser/static/state.js index 70eeba1..d9b9931 100644 --- a/align_browser/static/state.js +++ b/align_browser/static/state.js @@ -1,141 +1,153 @@ // Functional State Management Module // Pure functions for managing application state without mutations +// Constants for KDMA processing +const KDMA_CONSTANTS = { + DECIMAL_PRECISION: 10, // For 1 decimal place normalization + DISPLAY_PRECISION: 1 // For display formatting +}; + +// KDMA Utility Functions +export const KDMAUtils = { + // Normalize KDMA value to 1 decimal place + normalizeValue: (value) => Math.round(parseFloat(value) * KDMA_CONSTANTS.DECIMAL_PRECISION) / KDMA_CONSTANTS.DECIMAL_PRECISION, + + // Format KDMA value for display (1 decimal place) + formatValue: (value) => typeof value === 'number' ? value.toFixed(KDMA_CONSTANTS.DISPLAY_PRECISION) : value, + + // Convert KDMA object to sorted array for serialization + toSortedArray: (kdmaObject) => { + return Object.entries(kdmaObject) + .map(([kdma, value]) => ({ kdma, value: KDMAUtils.normalizeValue(value) })) + .sort((a, b) => a.kdma.localeCompare(b.kdma)); + }, + + // Convert KDMA object to sorted array and serialize for keys + serializeToKey: (kdmaObject) => { + return JSON.stringify(KDMAUtils.toSortedArray(kdmaObject)); + }, + + // Convert KDMA object to key parts for experiment keys + toKeyParts: (kdmaObject) => { + return Object.entries(kdmaObject) + .map(([kdma, value]) => `${kdma}-${KDMAUtils.formatValue(value)}`) + .sort(); + }, + + // Deep equality comparison for objects + deepEqual: (obj1, obj2) => { + if (obj1 === obj2) return true; + if (!obj1 || !obj2) return obj1 === obj2; + + const keys1 = Object.keys(obj1); + const keys2 = Object.keys(obj2); + + if (keys1.length !== keys2.length) return false; + + return keys1.every(key => { + if (!keys2.includes(key)) return false; + const val1 = obj1[key]; + const val2 = obj2[key]; + + // Handle nested objects recursively + if (typeof val1 === 'object' && typeof val2 === 'object') { + return KDMAUtils.deepEqual(val1, val2); + } + + // Handle numeric comparison with normalization + if (typeof val1 === 'number' && typeof val2 === 'number') { + return KDMAUtils.normalizeValue(val1) === KDMAUtils.normalizeValue(val2); + } + + return val1 === val2; + }); + }, + + // Backwards compatibility alias + objectsEqual: function(obj1, obj2) { + return this.deepEqual(obj1, obj2); + } +}; + // Create initial empty state export function createInitialState() { return { // Data from manifest availableScenarios: [], - availableBaseScenarios: [], + availableScenes: [], availableAdmTypes: [], availableKDMAs: [], availableLLMs: [], - validCombinations: {}, - - // User selections - selectedBaseScenario: null, - selectedScenario: null, - selectedAdmType: null, - selectedLLM: null, - selectedRunVariant: null, - activeKDMAs: {}, - - // LLM preferences per ADM type for preservation - llmPreferences: {}, - - // UI state - isUpdatingProgrammatically: false, - isTransitioning: false, // Comparison state - pinnedRuns: new Map(), - currentInputOutput: null, - currentScores: null, - currentTiming: null, - currentInputOutputArray: null - }; -} - -// Pure state updaters (immutable) -export function updateUserSelections(state, updates) { - const newState = { ...state }; - - if (updates.baseScenario !== undefined) { - newState.selectedBaseScenario = updates.baseScenario; - } - if (updates.scenario !== undefined) { - newState.selectedScenario = updates.scenario; - } - if (updates.admType !== undefined) { - newState.selectedAdmType = updates.admType; - } - if (updates.llm !== undefined) { - newState.selectedLLM = updates.llm; - } - if (updates.runVariant !== undefined) { - newState.selectedRunVariant = updates.runVariant; - } - if (updates.kdmas !== undefined) { - newState.activeKDMAs = { ...updates.kdmas }; - } - - return newState; -} - -export function updateCurrentData(state, updates) { - return { - ...state, - currentInputOutput: updates.inputOutput !== undefined ? updates.inputOutput : state.currentInputOutput, - currentScores: updates.scores !== undefined ? updates.scores : state.currentScores, - currentTiming: updates.timing !== undefined ? updates.timing : state.currentTiming, - currentInputOutputArray: updates.inputOutputArray !== undefined ? updates.inputOutputArray : state.currentInputOutputArray + pinnedRuns: new Map() }; } -// Pure selectors (computed values) -export function getSelectedKey(state) { - const admType = state.selectedAdmType; - const llmBackbone = state.selectedLLM; - const runVariant = state.selectedRunVariant; - const kdmaParts = []; - Object.entries(state.activeKDMAs).forEach(([kdma, value]) => { - kdmaParts.push(`${kdma}-${value.toFixed(1)}`); - }); - - // Sort KDMA parts to match the key generation in build.py - const kdmaString = kdmaParts.sort().join("_"); - const baseKey = `${admType}_${llmBackbone}_${kdmaString}`; - - // Add run variant if present - if (runVariant && runVariant !== 'default') { - return `${baseKey}_${runVariant}`; - } - - return baseKey; -} // Generate a unique run ID export function generateRunId() { const timestamp = new Date().getTime(); - const random = Math.random().toString(36).substr(2, 9); + const random = Math.random().toString(36).substring(2, 11); return `run_${timestamp}_${random}`; } -// Generate display name for a run based on current state -export function generateDisplayName(state) { - const parts = []; - if (state.selectedAdmType) { - parts.push(state.selectedAdmType.replace(/_/g, ' ')); - } - if (state.selectedLLM) { - parts.push(state.selectedLLM.replace(/_/g, ' ')); - } - const kdmaKeys = Object.keys(state.activeKDMAs || {}); - if (kdmaKeys.length > 0) { - const kdmaStr = kdmaKeys.map(k => `${k}=${state.activeKDMAs[k]}`).join(', '); - parts.push(`(${kdmaStr})`); - } - const result = parts.join(' - ') || 'Unnamed Run'; - return result === '' ? 'Unnamed Run' : result; -} // Create a run configuration factory function -export function createRunConfig(state) { +export function createRunConfig(params, availableKDMAs) { + // Create sophisticated KDMA structure that preserves permutation constraints + const kdmaStructure = { + validCombinations: [], // Array of valid KDMA combinations + availableTypes: new Set(), // All KDMA types that appear in any combination + typeValueMap: {} // kdmaType -> Set of all possible values for that type + }; + + if (availableKDMAs && Array.isArray(availableKDMAs)) { + // Store all valid combinations and build type/value maps + availableKDMAs.forEach(kdmaCombination => { + // Store the valid combination as object for unified usage + kdmaStructure.validCombinations.push({ ...kdmaCombination }); + + // Extract types and values for the maps + Object.entries(kdmaCombination).forEach(([kdma, value]) => { + if (kdma && value !== undefined) { + kdmaStructure.availableTypes.add(kdma); + if (!kdmaStructure.typeValueMap[kdma]) { + kdmaStructure.typeValueMap[kdma] = new Set(); + } + kdmaStructure.typeValueMap[kdma].add(value); + } + }); + }); + } + + // Generate experiment key directly + const kdmaParts = KDMAUtils.toKeyParts(params.kdmaValues || {}); + const kdmaString = kdmaParts.join("_"); + const experimentKey = `${params.admType}:${params.llmBackbone}:${kdmaString}:${params.runVariant}`; + return { id: generateRunId(), timestamp: new Date().toISOString(), - scenario: state.selectedScenario, - baseScenario: state.selectedBaseScenario, - admType: state.selectedAdmType, - llmBackbone: state.selectedLLM, - runVariant: state.selectedRunVariant || null, - kdmaValues: { ...state.activeKDMAs }, - experimentKey: getSelectedKey(state), - displayName: generateDisplayName(state), - loadStatus: 'pending' + scenario: params.scenario, + scene: params.scene, + admType: params.admType, + llmBackbone: params.llmBackbone, + runVariant: params.runVariant, + kdmaValues: { ...params.kdmaValues }, + experimentKey, + loadStatus: 'pending', + // Store available options at time of creation for dropdown population + availableOptions: { + scenarios: params.availableScenarios || [], + scenes: params.availableScenes || [], + admTypes: params.availableAdmTypes || [], + llms: params.availableLLMs || [], + kdmas: kdmaStructure // Sophisticated structure with constraint information + } }; } @@ -143,26 +155,22 @@ export function createRunConfig(state) { export function createParameterStructure(params = {}) { return { scenario: params.scenario || null, - baseScenario: params.baseScenario || null, + scene: params.scene || null, admType: params.admType || null, llmBackbone: params.llmBackbone || null, - runVariant: params.runVariant || null, + runVariant: params.runVariant || 'default', kdmas: params.kdmas || {} }; } // URL State Management Functions export function encodeStateToURL(state) { + const manifest = GlobalState.getManifest(); const urlState = { - baseScenario: state.selectedBaseScenario, - scenario: state.selectedScenario, - admType: state.selectedAdmType, - llm: state.selectedLLM, - runVariant: state.selectedRunVariant, - kdmas: state.activeKDMAs, + manifestCreatedAt: manifest?.generated_at, pinnedRuns: Array.from(state.pinnedRuns.values()).map(run => ({ scenario: run.scenario, - baseScenario: run.baseScenario, + scene: run.scene, admType: run.admType, llmBackbone: run.llmBackbone, runVariant: run.runVariant, @@ -186,7 +194,17 @@ export function decodeStateFromURL() { if (stateParam) { try { - return JSON.parse(atob(stateParam)); + const decodedState = JSON.parse(atob(stateParam)); + + // Validate manifest creation date if present + const currentManifest = GlobalState.getManifest(); + if (currentManifest && decodedState.manifestCreatedAt && + decodedState.manifestCreatedAt !== currentManifest.generated_at) { + console.warn('URL parameters are from a different manifest version, ignoring URL state'); + return null; + } + + return decodedState; } catch (e) { console.warn('Invalid URL state, using defaults:', e); return null; @@ -195,4 +213,248 @@ export function decodeStateFromURL() { return null; } +// Configuration for parameter validation system +const PARAMETER_CONFIG = { + // Priority order for parameter cascading + PRIORITY_ORDER: ['scenario', 'scene', 'kdma_values', 'adm', 'llm', 'run_variant'], + + // Parameters that require special handling + SPECIAL_COMPARISON_PARAMS: new Set(['kdma_values']) +}; +// Parameter update system with priority-based cascading +const updateParametersBase = (priorityOrder) => (manifest) => (currentParams, changes) => { + const newParams = { ...currentParams, ...changes }; + + // Helper to check if manifest entry matches current selection + const matchesCurrentSelection = (manifestEntry, excludeParam, currentSelection) => { + const excludeParamIndex = priorityOrder.indexOf(excludeParam); + + for (const param of priorityOrder) { + if (param === excludeParam) continue; + + const paramIndex = priorityOrder.indexOf(param); + + // Only apply constraints from higher priority parameters (already set) + // Lower priority parameters shouldn't constrain higher priority ones + if (paramIndex >= excludeParamIndex) { + continue; // Skip constraints from same or lower priority parameters + } + + // Only check constraint if the current selection has a non-null value for this parameter + if (currentSelection[param] !== null && currentSelection[param] !== undefined) { + // Special handling for parameters that need custom comparison + if (PARAMETER_CONFIG.SPECIAL_COMPARISON_PARAMS.has(param)) { + if (param === 'kdma_values') { + const manifestKdmas = manifestEntry[param]; + const selectionKdmas = currentSelection[param]; + + if (!KDMAUtils.deepEqual(manifestKdmas, selectionKdmas)) { + return false; + } + } + } else if (manifestEntry[param] !== currentSelection[param]) { + return false; + } + } + } + return true; + }; + + // Helper to get valid options for a parameter + const getValidOptionsFor = (parameterName, currentSelection) => { + const validEntries = manifest.filter(entry => + matchesCurrentSelection(entry, parameterName, currentSelection) + ); + const options = [...new Set(validEntries.map(entry => entry[parameterName]))]; + + return options; + }; + + // Find the highest priority parameter that changed + const changedParams = Object.keys(changes); + let highestChangedIndex; + + if (changedParams.length === 0) { + // No changes provided - validate/correct all parameters from the beginning + highestChangedIndex = -1; + } else { + highestChangedIndex = Math.min( + ...changedParams.map(param => priorityOrder.indexOf(param)) + ); + } + + // Check and potentially update parameters starting from the highest changed index + for (let i = highestChangedIndex + 1; i < priorityOrder.length; i++) { + const param = priorityOrder[i]; + const currentValue = newParams[param]; + const validOptions = getValidOptionsFor(param, newParams); + + // Only change if current value is invalid + let isValid = validOptions.includes(currentValue); + + // For special parameters, use custom comparison logic + if (PARAMETER_CONFIG.SPECIAL_COMPARISON_PARAMS.has(param) && !isValid) { + if (param === 'kdma_values') { + isValid = validOptions.some(option => { + return KDMAUtils.deepEqual(option, currentValue); + }); + } + } + + if (!isValid) { + const newValue = validOptions.length > 0 ? validOptions[0] : null; + newParams[param] = newValue; + } + } + + // Calculate available options for all parameters + const availableOptions = {}; + for (const param of priorityOrder) { + availableOptions[param] = getValidOptionsFor(param, newParams); + } + + return { + params: newParams, + options: availableOptions + }; +}; + +// Export updateParameters with priority order already curried +export const updateParameters = updateParametersBase(PARAMETER_CONFIG.PRIORITY_ORDER); + +// Global state encapsulation +const GlobalState = { + manifest: null, + parameterRunMap: new Map(), + + // Getters + getManifest: () => GlobalState.manifest, + getParameterRunMap: () => GlobalState.parameterRunMap, + + // Setters + setManifest: (newManifest) => { GlobalState.manifest = newManifest; }, + clearParameterRunMap: () => { GlobalState.parameterRunMap.clear(); }, + setParameterRun: (key, value) => { GlobalState.parameterRunMap.set(key, value); }, + getParameterRun: (key) => GlobalState.parameterRunMap.get(key), + + // State queries + hasManifest: () => GlobalState.manifest !== null, + isParameterRunMapEmpty: () => GlobalState.parameterRunMap.size === 0 +}; + +// Load and initialize manifest +export async function loadManifest() { + const response = await fetch("./data/manifest.json"); + const manifest = await response.json(); + GlobalState.setManifest(manifest); + + // Initialize updateParameters with the transformed manifest + const transformedManifest = transformManifestForUpdateParameters(manifest); + const updateAppParameters = updateParameters(transformedManifest); + + return { manifest, updateAppParameters }; +} + + +function resolveParametersToRun(params) { + if (GlobalState.isParameterRunMapEmpty()) { + console.warn('parameterRunMap is empty or not initialized'); + return undefined; + } + + const { scenario, scene, kdmaValues, admType, llmBackbone, runVariant } = params; + + const kdmaString = KDMAUtils.serializeToKey(kdmaValues || {}); + const mapKey = `${scenario}:${scene}:${kdmaString}:${admType}:${llmBackbone}:${runVariant}`; + + return GlobalState.getParameterRun(mapKey); +} + +export async function fetchRunData(params) { + const runInfo = resolveParametersToRun(params); + if (!runInfo) { + return undefined; + } + + try { + // Fetch both input/output and timing data + const [inputOutputResponse, timingResponse] = await Promise.all([ + fetch(runInfo.inputOutputPath), + fetch(runInfo.timingPath) + ]); + + const inputOutputArray = await inputOutputResponse.json(); + const timingData = await timingResponse.json(); + + + // Return complete data structure + return { + inputOutput: inputOutputArray[runInfo.sourceIndex], + inputOutputArray: inputOutputArray, + timing: timingData, + experimentKey: runInfo.experimentKey, + timing_s: runInfo.timing_s + }; + } catch (error) { + console.error('Error fetching run data:', error); + return undefined; + } +} + +// Transform hierarchical manifest to flat array for updateParameters +export function transformManifestForUpdateParameters(manifest) { + const entries = []; + + if (!manifest.experiments) { + console.warn('No experiments found in manifest'); + return entries; + } + + GlobalState.clearParameterRunMap(); + + for (const [experimentKey, experiment] of Object.entries(manifest.experiments)) { + + const { adm, llm, kdma_values, run_variant } = experiment.parameters; + + for (const [scenarioId, scenario] of Object.entries(experiment.scenarios)) { + + for (const [sceneId, sceneInfo] of Object.entries(scenario.scenes)) { + // Convert KDMA array to object format for unified usage + const kdmaObject = {}; + if (kdma_values && Array.isArray(kdma_values)) { + kdma_values.forEach(kdmaItem => { + if (kdmaItem.kdma && kdmaItem.value !== undefined) { + kdmaObject[kdmaItem.kdma] = KDMAUtils.normalizeValue(kdmaItem.value); + } + }); + } + + const entry = { + scenario: scenarioId, + scene: sceneId, + kdma_values: kdmaObject, + adm: adm.name, + llm: llm?.model_name || null, + run_variant: run_variant + }; + + entries.push(entry); + + const kdmaString = KDMAUtils.serializeToKey(kdmaObject); + const mapKey = `${scenarioId}:${sceneId}:${kdmaString}:${adm.name}:${llm?.model_name || null}:${run_variant}`; + + GlobalState.setParameterRun(mapKey, { + experimentKey, + sourceIndex: sceneInfo.source_index, + inputOutputPath: scenario.input_output.file, + timingPath: scenario.timing, + timing_s: sceneInfo.timing_s + }); + } + } + } + + + return entries; +} \ No newline at end of file diff --git a/align_browser/static/style.css b/align_browser/static/style.css index 520cfa0..065bfd0 100644 --- a/align_browser/static/style.css +++ b/align_browser/static/style.css @@ -143,7 +143,7 @@ footer { background: #fafafa; } -.parameter-row[data-category="base_scenario"], +.parameter-row[data-category="scene"], .parameter-row[data-category="scenario"], .parameter-row[data-category="adm_type"], .parameter-row[data-category="llm_backbone"] { diff --git a/align_browser/test_experiment_parser.py b/align_browser/test_experiment_parser.py index 2698e47..8d116e2 100644 --- a/align_browser/test_experiment_parser.py +++ b/align_browser/test_experiment_parser.py @@ -11,10 +11,8 @@ InputOutputFile, ScoresFile, ExperimentData, - GlobalManifest, - ScenarioManifest, - ExperimentSummary, ChunkedExperimentData, + Manifest, ) from align_browser.experiment_parser import ( parse_experiments_directory, @@ -92,7 +90,8 @@ def create_sample_timing_data(): "max_time_s": 0.0005, "raw_times_s": [0.0003, 0.0004, 0.0002], } - ] + ], + "raw_times_s": [0.0003, 0.0004, 0.0002], } @@ -125,7 +124,7 @@ def test_experiment_config_key_generation(): config = ExperimentConfig(**config_data) key = config.generate_key() - expected_key = "pipeline_random_llama3.3-70b_affiliation-0.5" + expected_key = "pipeline_random:llama3.3-70b:affiliation-0.5:default" assert key == expected_key @@ -140,8 +139,8 @@ def test_input_output_file_model(): try: input_output = InputOutputFile.from_file(temp_path) assert len(input_output.data) == 1 - assert input_output.first_scenario_id == "June2025-AF-train-0" - assert input_output.data[0].input.scenario_id == "June2025-AF-train-0" + assert input_output.first_scenario_id == "June2025-AF-train" + assert input_output.data[0].input.scenario_id == "June2025-AF-train" finally: temp_path.unlink() @@ -195,8 +194,8 @@ def test_experiment_data_from_directory(): # Test loading (no experiments_root, so no directory context) experiment = ExperimentData.from_directory(experiment_dir) - assert experiment.key == "pipeline_random_llama3.3-70b_affiliation-0.5" - assert experiment.scenario_id == "June2025-AF-train-0" + assert experiment.key == "pipeline_random:llama3.3-70b:affiliation-0.5:default" + assert experiment.scenario_id == "June2025-AF-train" assert experiment.config.adm.name == "pipeline_random" assert len(experiment.input_output.data) == 1 assert len(experiment.scores.data) == 1 @@ -263,7 +262,9 @@ def test_parse_experiments_directory(): # Test parsing experiments = parse_experiments_directory(experiments_root) assert len(experiments) == 1 - assert experiments[0].key == "pipeline_random_llama3.3-70b_affiliation-0.5" + assert ( + experiments[0].key == "pipeline_random:llama3.3-70b:affiliation-0.5:default" + ) def test_parse_experiments_directory_excludes_outdated(): @@ -314,7 +315,9 @@ def test_parse_experiments_directory_excludes_outdated(): # Test parsing - should only find the valid experiment, not the OUTDATED one experiments = parse_experiments_directory(experiments_root) assert len(experiments) == 1, f"Expected 1 experiment, found {len(experiments)}" - assert experiments[0].key == "pipeline_random_llama3.3-70b_affiliation-0.5" + assert ( + experiments[0].key == "pipeline_random:llama3.3-70b:affiliation-0.5:default" + ) # Verify the OUTDATED experiment was actually excluded experiment_paths = [str(exp.experiment_path) for exp in experiments] @@ -382,40 +385,31 @@ def test_run_variant_conflict_resolution(): manifest = build_manifest_from_experiments(experiments, experiments_root) # Verify that conflicts were resolved with run_variant in experiment keys - experiment_keys = list(manifest.experiment_keys.keys()) + experiment_keys = list(manifest.experiments.keys()) assert len(experiment_keys) == 2, ( f"Expected 2 unique experiment keys, got {len(experiment_keys)}" ) - # Check that experiment keys include run variants - # At least one key should contain a run variant - has_run_variant = any( - "_pipeline_test" in key or "_rerun" in key for key in experiment_keys - ) - assert has_run_variant, f"Expected run_variant in keys: {experiment_keys}" + # Check that experiment keys are hash-based + for key in experiment_keys: + assert key.startswith("exp_"), f"Expected hash-based key: {key}" - # Check that run_variant is in the config + # Check that run_variant is in the parameters + has_run_variant = False for exp_key in experiment_keys: - scenarios = manifest.experiment_keys[exp_key].scenarios - first_scenario = next(iter(scenarios.values())) - config = first_scenario.config - - # If key contains run variant, config should have run_variant field - if "_pipeline_test" in exp_key or "_rerun" in exp_key: - assert "run_variant" in config, ( - f"Expected run_variant in config for key {exp_key}" - ) - assert config["run_variant"] is not None, ( - f"run_variant should not be None for {exp_key}" - ) + parameters = manifest.experiments[exp_key].parameters + if parameters["run_variant"] and parameters["run_variant"] != "default": + has_run_variant = True + break + assert has_run_variant, "Expected at least one experiment with run_variant" # Verify scenario IDs remain unchanged (no directory context in scenario IDs) all_scenarios = [] - for scenario_manifest in manifest.experiment_keys.values(): - all_scenarios.extend(scenario_manifest.scenarios.keys()) + for experiment in manifest.experiments.values(): + all_scenarios.extend(experiment.scenarios.keys()) for scenario_id in all_scenarios: - assert scenario_id.startswith("June2025-AF-train-"), ( + assert scenario_id.startswith("June2025"), ( f"Scenario ID should not have directory context: {scenario_id}" ) @@ -427,48 +421,45 @@ def test_build_manifest_from_experiments(): experiments_root = temp_path / "experiments" experiments_root.mkdir() - # Create a mock experiment in the correct path structure + # Create a complete experiment structure for testing pipeline_dir = experiments_root / "test_pipeline" pipeline_dir.mkdir() experiment_dir = pipeline_dir / "test_experiment" experiment_dir.mkdir() + hydra_dir = experiment_dir / ".hydra" + hydra_dir.mkdir() - # Create mock experiment data (simplified) - from unittest.mock import Mock + # Create required files + config_data = create_sample_config_data() + with open(hydra_dir / "config.yaml", "w") as f: + yaml.dump(config_data, f) - mock_input_item = Mock() - mock_input_item.input.scenario_id = "test_scenario" + with open(experiment_dir / "input_output.json", "w") as f: + json.dump(create_sample_input_output_data(), f) - mock_input_output = Mock() - mock_input_output.data = [mock_input_item] + with open(experiment_dir / "scores.json", "w") as f: + json.dump(create_sample_scores_data(), f) - mock_experiment = Mock( - spec_set=[ - "key", - "scenario_id", - "experiment_path", - "input_output", - "scores", - "config", - ] - ) - mock_experiment.key = "test_key" - mock_experiment.scenario_id = "test_scenario" - mock_experiment.experiment_path = experiment_dir - mock_experiment.input_output = mock_input_output - mock_experiment.scores = None - mock_experiment.config.model_dump.return_value = {"test": "config"} + with open(experiment_dir / "timing.json", "w") as f: + json.dump(create_sample_timing_data(), f) - experiments = [mock_experiment] + # Load real experiment instead of using mocks + experiment = ExperimentData.from_directory(experiment_dir) + experiments = [experiment] manifest = build_manifest_from_experiments(experiments, experiments_root) - assert "test_key" in manifest.experiment_keys - assert "scenarios" in manifest.experiment_keys["test_key"].model_dump() - assert "test_scenario" in manifest.experiment_keys["test_key"].scenarios - assert manifest.experiment_keys["test_key"].scenarios[ - "test_scenario" - ].config == {"test": "config"} + # Check that at least one experiment was added (hash-based keys) + assert len(manifest.experiments) >= 1, "Should have at least one experiment" + + # Get first experiment key + first_key = list(manifest.experiments.keys())[0] + assert first_key.startswith("exp_"), f"Expected hash-based key: {first_key}" + + # Check experiment structure + experiment_obj = manifest.experiments[first_key] + assert "scenarios" in experiment_obj.model_dump() + assert len(experiment_obj.scenarios) >= 1, "Should have at least one scenario" def test_parse_real_experiments_if_available(): @@ -491,95 +482,6 @@ def test_parse_real_experiments_if_available(): print(f"✅ Real experiment validation passed: {first_exp.key}") -def test_experiment_summary_model(): - """Test ExperimentSummary model.""" - summary = ExperimentSummary( - input_output="data/test/input_output.json", - scores="data/test/scores.json", - timing="data/test/timing.json", - config={"test": "config"}, - ) - - assert summary.input_output == "data/test/input_output.json" - assert summary.scores == "data/test/scores.json" - assert summary.timing == "data/test/timing.json" - assert summary.config == {"test": "config"} - - -def test_scenario_manifest_model(): - """Test ScenarioManifest model.""" - manifest = ScenarioManifest() - - # Test adding scenarios - summary = ExperimentSummary( - input_output="data/test/input_output.json", - scores="data/test/scores.json", - timing="data/test/timing.json", - config={"test": "config"}, - ) - - manifest.scenarios["test_scenario"] = summary - assert "test_scenario" in manifest.scenarios - assert manifest.scenarios["test_scenario"] == summary - - -def test_global_manifest_model(): - """Test GlobalManifest model functionality.""" - with tempfile.TemporaryDirectory() as temp_dir: - temp_path = Path(temp_dir) - experiments_root = temp_path / "experiments" - experiments_root.mkdir() - - # Create a complete experiment structure for testing - pipeline_dir = experiments_root / "pipeline_test" - pipeline_dir.mkdir() - experiment_dir = pipeline_dir / "test_experiment" - experiment_dir.mkdir() - hydra_dir = experiment_dir / ".hydra" - hydra_dir.mkdir() - - # Create required files - config_data = create_sample_config_data() - with open(hydra_dir / "config.yaml", "w") as f: - yaml.dump(config_data, f) - - with open(experiment_dir / "input_output.json", "w") as f: - json.dump(create_sample_input_output_data(), f) - - with open(experiment_dir / "scores.json", "w") as f: - json.dump(create_sample_scores_data(), f) - - with open(experiment_dir / "timing.json", "w") as f: - json.dump(create_sample_timing_data(), f) - - # Test loading experiment - experiment = ExperimentData.from_directory(experiment_dir) - - # Test GlobalManifest - manifest = GlobalManifest() - manifest.add_experiment(experiment, experiments_root) - - # Test experiment count - assert manifest.get_experiment_count() == 1 - - # Test ADM types extraction - adm_types = manifest.get_adm_types() - assert "pipeline_random" in adm_types - - # Test LLM backbones extraction - llm_backbones = manifest.get_llm_backbones() - assert "llama3.3-70b" in llm_backbones - - # Test KDMA combinations extraction - kdma_combinations = manifest.get_kdma_combinations() - assert "affiliation-0.5" in kdma_combinations - - # Test experiment key structure - expected_key = "pipeline_random_llama3.3-70b_affiliation-0.5" - assert expected_key in manifest.experiment_keys - assert "June2025-AF-train-0" in manifest.experiment_keys[expected_key].scenarios - - def test_chunked_experiment_data_model(): """Test ChunkedExperimentData model.""" with tempfile.TemporaryDirectory() as temp_dir: @@ -632,29 +534,6 @@ def test_chunked_experiment_data_model(): assert scenario_chunk.metadata["count"] == 1 -def test_global_manifest_serialization(): - """Test that GlobalManifest can be properly serialized to JSON.""" - manifest = GlobalManifest() - manifest.metadata = { - "total_experiments": 0, - "adm_types": [], - "llm_backbones": [], - "kdma_combinations": [], - "generated_at": "2024-01-01T00:00:00", - } - - # Test serialization - manifest_dict = manifest.model_dump() - json_str = json.dumps(manifest_dict, indent=2) - - # Test deserialization - loaded_dict = json.loads(json_str) - loaded_manifest = GlobalManifest(**loaded_dict) - - assert loaded_manifest.metadata["total_experiments"] == 0 - assert loaded_manifest.metadata["generated_at"] == "2024-01-01T00:00:00" - - def test_end_to_end_build_process(): """Test the complete build process from experiments to output validation.""" import tempfile @@ -705,35 +584,34 @@ def test_end_to_end_build_process(): manifest_data = json.load(f) # Validate manifest structure using Pydantic - manifest = GlobalManifest(**manifest_data) + manifest = Manifest(**manifest_data) # Basic validation - assert manifest.get_experiment_count() > 0, ( - "Should have parsed some experiments" - ) - assert len(manifest.get_adm_types()) > 0, "Should have identified ADM types" - assert manifest.metadata["generated_at"] is not None, ( - "Should have generation timestamp" - ) + assert len(manifest.experiments) > 0, "Should have parsed some experiments" + assert len(manifest.indices.by_adm) > 0, "Should have identified ADM types" + assert manifest.generated_at is not None, "Should have generation timestamp" # Validate that experiment files exist - first_key = list(manifest.experiment_keys.keys())[0] - first_scenario = list(manifest.experiment_keys[first_key].scenarios.keys())[ - 0 - ] - experiment_summary = manifest.experiment_keys[first_key].scenarios[ + first_key = list(manifest.experiments.keys())[0] + first_scenario = list(manifest.experiments[first_key].scenarios.keys())[0] + experiment_summary = manifest.experiments[first_key].scenarios[ first_scenario ] # Check that referenced files actually exist - input_output_path = output_dir / experiment_summary.input_output - scores_path = output_dir / experiment_summary.scores + input_output_path = output_dir / experiment_summary.input_output.file + scores_path = ( + output_dir / experiment_summary.scores + if experiment_summary.scores + else None + ) timing_path = output_dir / experiment_summary.timing assert input_output_path.exists(), ( f"Input/output file should exist: {input_output_path}" ) - assert scores_path.exists(), f"Scores file should exist: {scores_path}" + if scores_path: + assert scores_path.exists(), f"Scores file should exist: {scores_path}" assert timing_path.exists(), f"Timing file should exist: {timing_path}" # Validate JSON files are valid @@ -744,22 +622,23 @@ def test_end_to_end_build_process(): ) assert len(input_output_data) > 0, "Input/output should have data" - with open(scores_path) as f: - scores_data = json.load(f) - assert isinstance(scores_data, list), "Scores should be a list" + if scores_path: + with open(scores_path) as f: + scores_data = json.load(f) + assert isinstance(scores_data, list), "Scores should be a list" with open(timing_path) as f: timing_data = json.load(f) assert "scenarios" in timing_data, "Timing should have scenarios" print( - f"✅ End-to-end build test passed with {manifest.get_experiment_count()} experiments" + f"✅ End-to-end build test passed with {len(manifest.experiments)} experiments" ) print( - f"✅ Found {len(manifest.get_adm_types())} ADM types: {', '.join(manifest.get_adm_types()[:3])}..." + f"✅ Found {len(manifest.indices.by_adm)} ADM types: {', '.join(list(manifest.indices.by_adm.keys())[:3])}..." ) print( - f"✅ Found {len(manifest.get_llm_backbones())} LLM backbones: {', '.join(manifest.get_llm_backbones()[:3])}..." + f"✅ Found {len(manifest.indices.by_llm)} LLM backbones: {', '.join(list(manifest.indices.by_llm.keys())[:3])}..." ) except Exception as e: @@ -782,11 +661,7 @@ def run_all_tests(): test_parse_experiments_directory_excludes_outdated, test_run_variant_conflict_resolution, test_build_manifest_from_experiments, - test_experiment_summary_model, - test_scenario_manifest_model, - test_global_manifest_model, test_chunked_experiment_data_model, - test_global_manifest_serialization, test_end_to_end_build_process, test_parse_real_experiments_if_available, ] diff --git a/align_browser/test_frontend_real_data.py b/align_browser/test_frontend_real_data.py index ea7dc58..43de8d5 100644 --- a/align_browser/test_frontend_real_data.py +++ b/align_browser/test_frontend_real_data.py @@ -7,7 +7,11 @@ """ from playwright.sync_api import expect -from .conftest import wait_for_new_experiment_result, ensure_select_value +from .conftest import ( + wait_for_new_experiment_result, + ensure_select_value, + ensure_dropdown_selection, +) def wait_for_run_results_loaded(page, timeout=3000): @@ -294,16 +298,13 @@ def test_kdma_combination_default_value_issue(page, real_data_test_server): "document.querySelectorAll('.table-adm-select').length > 0", timeout=10000 ) - # Select pipeline_baseline ADM to enable KDMA functionality - adm_select = page.locator(".table-adm-select").first - adm_select.select_option("pipeline_baseline") - # Wait for UI to update after ADM selection - page.wait_for_load_state("networkidle") + ensure_dropdown_selection(page, ".table-adm-select", "pipeline_baseline", "ADM") - # Select a specific LLM known to have multi-KDMA experiments + # For LLM, we need to check what's actually available since it might vary llm_select = page.locator(".table-llm-select").first - llm_select.select_option("mistralai/Mistral-7B-Instruct-v0.3") - page.wait_for_load_state("networkidle") + current_llm = llm_select.input_value() + print(f"Using LLM: {current_llm}") + # Don't enforce a specific LLM since availability may vary with test data # Select June2025-AF-train scenario to get multi-KDMA support scenario_select = page.locator(".table-scenario-select").first @@ -385,6 +386,7 @@ def test_kdma_combination_default_value_issue(page, real_data_test_server): ) # Also check that the dropdowns don't go blank + adm_select = page.locator(".table-adm-select").first adm_select_value = adm_select.input_value() assert adm_select_value != "", "ADM select should not go blank after adding KDMA" diff --git a/align_browser/test_parsing.py b/align_browser/test_parsing.py index b05fd5d..206e35f 100644 --- a/align_browser/test_parsing.py +++ b/align_browser/test_parsing.py @@ -1,7 +1,7 @@ """Simple tests for experiment parsing using real experiment data.""" import sys -from align_browser.experiment_models import ExperimentData +from align_browser.experiment_models import ExperimentData, Manifest from align_browser.experiment_parser import ( parse_experiments_directory, build_manifest_from_experiments, @@ -49,28 +49,32 @@ def test_build_manifest(): manifest = build_manifest_from_experiments(experiments, experiments_root) print( - f"✅ Built manifest with {len(manifest.experiment_keys)} unique experiment configurations" + f"✅ Built manifest with {len(manifest.experiments)} unique experiment configurations" ) # Check manifest structure - for key, value in list(manifest.experiment_keys.items())[:3]: # Show first 3 - scenarios = value.get("scenarios", {}) + for key, value in list(manifest.experiments.items())[:3]: # Show first 3 + scenarios = value.scenarios print(f"📋 Config '{key}' has {len(scenarios)} scenarios") # Verify manifest structure assert manifest, "Empty manifest generated" + assert isinstance(manifest, Manifest), "Should return Manifest instance" - first_key = list(manifest.keys())[0] - first_entry = manifest[first_key] + if manifest.experiments: + first_key = list(manifest.experiments.keys())[0] + first_experiment = manifest.experiments[first_key] - assert "scenarios" in first_entry, "Manifest missing scenarios key" + assert hasattr(first_experiment, "scenarios"), "Experiment missing scenarios" + assert hasattr(first_experiment, "parameters"), "Experiment missing parameters" - first_scenario = list(first_entry["scenarios"].values())[0] - required_fields = ["input_output", "scores", "timing", "config"] + if first_experiment.scenarios: + first_scenario = list(first_experiment.scenarios.values())[0] + required_fields = ["input_output", "timing"] # scores is optional + + for field in required_fields: + assert hasattr(first_scenario, field), f"Scenario missing {field} field" - assert all(field in first_scenario for field in required_fields), ( - "Manifest missing required fields" - ) print("✅ Manifest structure is correct")