ITM-Kitware · ygefen · Jan 16, 2026 · Jan 30, 2026 · Feb 5, 2026 · Feb 5, 2026
diff --git a/align_system/algorithms/abstracts.py b/align_system/algorithms/abstracts.py
@@ -16,11 +16,11 @@ def choose_action(self,
 
 class StructuredInferenceEngine(ABC):
     @abstractmethod
-    def dialog_to_prompt(dialog: list[dict]) -> str:
+    def dialog_to_prompt(self, dialog: list[dict]) -> str:
         pass
 
     @abstractmethod
-    def run_inference(prompts: Union[str, list[str]],
+    def run_inference(self, prompts: Union[str, list[str]],
                       schema: str) -> Union[dict, list[dict]]:
         pass
 

diff --git a/align_system/algorithms/openai_inference_engine.py b/align_system/algorithms/openai_inference_engine.py
diff --git a/align_system/configs/experiment/openai_api/baseline.yaml b/align_system/configs/experiment/openai_api/baseline.yaml
@@ -0,0 +1,36 @@
+# @package _global_
+defaults:
+  - override /adm: pipeline_baseline
+  - override /interface: ta3
+  - override /inference_engine@adm.structured_inference_engine: openai_gpt5
+
+interface:
+  session_type: adept
+  training_session: full
+  username: "testrun-pipeline_baseline"
+  domain: "p2triage"
+
+adm:
+  step_definitions:
+    outlines_baseline:
+      scenario_description_template:
+        _target_: align_system.prompt_engineering.outlines_prompts.Phase2ScenarioDescription
+      prompt_template:
+        _target_: align_system.prompt_engineering.outlines_prompts.Phase2BaselinePrompt
+
+      enable_caching: true
+
+  instance:
+    steps:
+    # Reference the step instances we want to use in order
+    - ${ref:adm.step_definitions.format_choices}
+    - ${ref:adm.step_definitions.outlines_baseline}
+    # - ${ref:adm.step_definitions.action_parameter_completion}
+    - ${ref:adm.step_definitions.ensure_chosen_action}
+    - ${ref:adm.step_definitions.populate_choice_info}
+
+driver:
+  apply_action_filtering: false
+
+force_determinism: true
+align_to_target: false
diff --git a/align_system/configs/experiment/openai_api/comparative_regression.yaml b/align_system/configs/experiment/openai_api/comparative_regression.yaml
@@ -0,0 +1,34 @@
+# @package _global_
+defaults:
+  - override /adm: phase2_pipeline_fewshot_comparative_regression
+  - override /interface: ta3
+  - override /adm_component/alignment@adm.step_definitions.scalar_alignment: random_effects_tuple
+  - override /inference_engine@adm.structured_inference_engine: openai_gpt5
+
+interface:
+  session_type: adept
+  training_session: full
+  username: "testrun-pipeline_fewshot_comp_reg_loo"
+  domain: "p2triage"
+
+adm:
+  step_definitions:
+    regression_icl:
+      icl_generator_partial:
+        incontext_settings:
+          number: 20
+          leave_one_out_strategy: 'scenario_description' # LOO - Remove for eval
+          datasets:
+            medical: /data/shared/samba/phase2_icl/Feb2026-MU-train_20251218.json
+            affiliation: /data/shared/samba/phase2_icl/Feb2026-AF-train_20251218.json
+            merit: /data/shared/samba/phase2_icl/Feb2026-MF-train_20251218.json
+            personal_safety: /data/shared/samba/phase2_icl/Feb2026-PS-train_20251218.json
+            search: /data/shared/samba/phase2_icl/Feb2026-SS-train_20251218.json
+    comparative_regression:
+      enable_caching: true
+
+driver:
+  apply_action_filtering: false
+
+force_determinism: true
+align_to_target: true
diff --git a/align_system/configs/experiment/openai_api/direct_regression_test.yaml b/align_system/configs/experiment/openai_api/direct_regression_test.yaml
@@ -0,0 +1,17 @@
+# @package _global_
+defaults:
+  - /alignment_target: "june2025/ADEPT-June2025-affiliation-1.0.yaml"
+  - override /adm: phase2_pipeline_direct_regression
+  - override /adm_component/alignment@adm.step_definitions.scalar_alignment: medical_urgency_weighted_scalar
+  - override /inference_engine@adm.structured_inference_engine: openai_gpt5
+  - override /interface: input_output_file
+
+interface:
+  state_hydration_domain: "p2triage"
+  input_output_filepath: ${test_data_dir}/July2025-AF-train_20250804_timing_subsample.json
+
+driver:
+  apply_action_filtering: false
+
+force_determinism: true
+align_to_target: true
diff --git a/align_system/configs/experiment/openai_api/full_train_run.yaml b/align_system/configs/experiment/openai_api/full_train_run.yaml
@@ -0,0 +1,23 @@
+# @package _global_
+defaults:
+  - override /adm: phase2_pipeline_direct_regression
+  - override /interface: ta3
+  - override /adm_component/alignment@adm.step_definitions.scalar_alignment: random_effects_tuple
+  - override /inference_engine@adm.structured_inference_engine: openai_gpt5
+
+interface:
+  session_type: adept
+  training_session: full
+  username: "testrun-pipeline_direct"
+  domain: "p2triage"
+
+adm:
+  step_definitions:
+    direct_regression:
+      enable_caching: true
+
+driver:
+  apply_action_filtering: false
+
+force_determinism: true
+align_to_target: true
diff --git a/align_system/configs/inference_engine/openai_gpt4.yaml b/align_system/configs/inference_engine/openai_gpt4.yaml
@@ -0,0 +1,17 @@
+_target_: align_system.algorithms.openai_inference_engine.OpenAIInferenceEngine
+
+# https://github.com/openai/openai-python/blob/main/src/openai/_client.py#L99
+model_name: gpt-4o
+temperature: 0.7
+top_p: 0.9
+max_tokens: 4096
+
+# Optional kwargs
+base_url: null       # Set to null for OpenAI cloud
+api_key: null        # Reads from OPENAI_API_KEY env var if null
+organization: null   # Reads from OPENAI_ORG_ID  env var if null
+project: null        # Reads from OPENAI_PROJECT_ID  env var if null
+webhook_secret: null # Reads from OPENAI_WEBHOOK_SECRET  env var if null
+timeout: "NOT_GIVEN"
+# Optional unstable kwargs
+_strict_response_validation: True
diff --git a/align_system/configs/inference_engine/openai_gpt5.yaml b/align_system/configs/inference_engine/openai_gpt5.yaml
@@ -0,0 +1,14 @@
+_target_: align_system.algorithms.openai_inference_engine.OpenAIInferenceEngine
+
+# https://github.com/openai/openai-python/blob/main/src/openai/_client.py#L99
+model_name: gpt-5.2
+temperature: 0.7
+top_p: 0.9
+max_tokens: 4096
+
+# Optional kwargs
+base_url: null       # Set to null for OpenAI cloud
+api_key: null        # Reads from OPENAI_API_KEY env var if null
+timeout: "NOT_GIVEN"
+# Optional unstable kwargs
+_strict_response_validation: True
diff --git a/align_system/configs/inference_engine/vllm_openai_llama3.yaml b/align_system/configs/inference_engine/vllm_openai_llama3.yaml
@@ -0,0 +1,7 @@
+_target_: align_system.algorithms.openai_inference_engine.OpenAIInferenceEngine
+
+model_name: meta-llama/Llama-3.1-8B-Instruct
+base_url: http://localhost:8000/v1
+temperature: 0.3
+top_p: 0.9
+max_tokens: 4096
diff --git a/align_system/prompt_engineering/outlines_prompts.py b/align_system/prompt_engineering/outlines_prompts.py
@@ -1480,7 +1480,18 @@ def __call__(self, character, scenario_state):
         threat_unstructured = scenario_state['threat_state']['unstructured']
         char_unstructured = character['unstructured']
 
-        if m := re.match(f'{threat_unstructured}(.+){char_unstructured}', full_state_unstructured.replace("\n", " ")):
+        pattern = "{}(.+?){}".format(
+            threat_unstructured.replace("\n", " "),
+            char_unstructured.replace("\n", " "))
+
+        if m := re.match(pattern, full_state_unstructured.replace("\n", " ")):
             setup = m.group(1).strip()
+        else:
+            # FIXME: This case is needed specifically for
+            # multi-attribute targets (and single attribute scenarios)
+            # where we may need to run regression for Personal Safety
+            # on an Affiliation scenario (where the text structure is
+            # a bit different)
+            return f"  - {char_unstructured}"
 
         return f"{setup}\n  - {char_unstructured}"