From 2eda50bdbc06f2d30e43152e936beeb5487926e0 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Tue, 19 Aug 2025 07:23:07 -0400
Subject: [PATCH 01/20] iniital

---
 src/bots/assistant_bot.py                | 182 +++++++++++++++--
 src/data_models/chat_context.py          |  20 +-
 src/data_models/chat_context_accessor.py |  13 +-
 src/group_chat.py                        |  52 +++--
 src/scenarios/default/config/agents.yaml |  47 +++++
 src/services/__init__.py                 |   0
 src/services/patient_context_analyzer.py | 201 +++++++++++++++++++
 src/services/patient_context_service.py  | 241 +++++++++++++++++++++++
 8 files changed, 720 insertions(+), 36 deletions(-)
 create mode 100644 src/services/__init__.py
 create mode 100644 src/services/patient_context_analyzer.py
 create mode 100644 src/services/patient_context_service.py

diff --git a/src/bots/assistant_bot.py b/src/bots/assistant_bot.py
index d7a6f66..d9ce036 100644
--- a/src/bots/assistant_bot.py
+++ b/src/bots/assistant_bot.py
@@ -2,8 +2,10 @@
 # Licensed under the MIT License.
 
 import asyncio
+import json
 import logging
 import os
+import json
 
 from botbuilder.core import MessageFactory, TurnContext
 from botbuilder.core.teams import TeamsActivityHandler
@@ -11,10 +13,17 @@
 from botbuilder.schema import Activity, ActivityTypes
 from semantic_kernel.agents import AgentGroupChat
 
+
+from semantic_kernel.contents import AuthorRole
+from services.patient_context_service import PATIENT_CONTEXT_PREFIX
+
 from data_models.app_context import AppContext
 from data_models.chat_context import ChatContext
 from errors import NotAuthorizedError
 from group_chat import create_group_chat
+from services.patient_context_service import PatientContextService
+from services.patient_context_analyzer import PatientContextAnalyzer
+
 
 logger = logging.getLogger(__name__)
 
@@ -32,10 +41,13 @@ def __init__(
         self.name = agent["name"]
         self.turn_contexts = turn_contexts
         self.adapters = adapters
-        self.adapters[self.name].on_turn_error = self.on_error  # add error handling
+        self.adapters[self.name].on_turn_error = self.on_error
         self.data_access = app_context.data_access
         self.root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
+        analyzer = PatientContextAnalyzer(token_provider=app_context.cognitive_services_token_provider)
+        self.patient_context_service = PatientContextService(analyzer=analyzer)
+
     async def get_bot_context(
         self, conversation_id: str, bot_name: str, turn_context: TurnContext
     ):
@@ -89,20 +101,40 @@ async def on_message_activity(self, turn_context: TurnContext) -> None:
         chat_context_accessor = self.data_access.chat_context_accessor
         chat_artifact_accessor = self.data_access.chat_artifact_accessor
 
-        # Load chat context
         chat_ctx = await chat_context_accessor.read(conversation_id)
 
-        # Delete thread if user asks
-        if turn_context.activity.text.endswith("clear"):
-            # Add clear message to chat history
-            chat_ctx.chat_history.add_user_message(turn_context.activity.text.strip())
+        # Extract raw user text (without bot mention) once
+        raw_user_text = turn_context.remove_recipient_mention(turn_context.activity).strip()
+
+        # Full conversation clear (existing behavior)
+        if raw_user_text.endswith("clear"):
+            chat_ctx.chat_history.add_user_message(raw_user_text)
             await chat_context_accessor.archive(chat_ctx)
             await chat_artifact_accessor.archive(conversation_id)
             await turn_context.send_activity("Conversation cleared!")
             return
+
+        # Decide & apply patient context BEFORE building group chat
+        # decision = await self.patient_context_service.decide_and_apply(raw_user_text, chat_ctx)
+        # Decide & apply patient context BEFORE building group chat
+        # Decide & apply patient context BEFORE building group chat
+        logger.info(f"🤖 BOT CONTEXT START - About to call patient context service")
+        logger.info(f"🤖 BOT CONTEXT - Conversation: {conversation_id} | Input: '{raw_user_text}'")
+        logger.info(f"🤖 BOT CONTEXT - Current patient before service: {getattr(chat_ctx, 'patient_id', None)}")
+        logger.info(
+            f"🤖 BOT CONTEXT - Known patients before service: {list(getattr(chat_ctx, 'patient_contexts', {}).keys())}")
+
+        decision, timing = await self.patient_context_service.decide_and_apply(raw_user_text, chat_ctx)
+
+        logger.info(f"🤖 BOT CONTEXT COMPLETE - Decision: {decision} | Timing: {timing}")
+        logger.info(f"🤖 BOT CONTEXT - Current patient after service: {getattr(chat_ctx, 'patient_id', None)}")
+        logger.info(
+            f"🤖 BOT CONTEXT - Known patients after service: {list(getattr(chat_ctx, 'patient_contexts', {}).keys())}")
+        logger.info(f"🤖 BOT CONTEXT - Total chat messages: {len(chat_ctx.chat_history.messages)}")
+        logger.info(f"Patient context decision: {decision} | Input: '{raw_user_text}' | Timing: {timing}")
+
         agents = self.all_agents
         if len(chat_ctx.chat_history.messages) == 0:
-            # new conversation. Let's see which agents are available.
             async def is_part_of_conversation(agent):
                 context = await self.get_bot_context(turn_context.activity.conversation.id, agent["name"], turn_context)
                 typing_activity = Activity(
@@ -118,24 +150,21 @@ async def is_part_of_conversation(agent):
                     return True
                 except Exception as e:
                     logger.info(f"Failed to send typing activity to {agent['name']}: {e}")
-                    # This happens if the agent is not part of the group chat.
-                    # Remove the agent from the list of available agents
                     return False
 
             part_of_conversation = await asyncio.gather(*(is_part_of_conversation(agent) for agent in self.all_agents))
-            agents = [agent for agent, should_include in zip(self.all_agents, part_of_conversation) if should_include]
+            agents = [agent for agent, include in zip(self.all_agents, part_of_conversation) if include]
 
         (chat, chat_ctx) = create_group_chat(self.app_context, chat_ctx, participants=agents)
 
-        # Add user message to chat history
-        text = turn_context.remove_recipient_mention(turn_context.activity).strip()
-        text = f"{self.name}: {text}"
-        chat_ctx.chat_history.add_user_message(text)
+        # Add user message after context decision (no extra tagging here)
+        # chat_ctx.chat_history.add_user_message(f"{self.name}: {raw_user_text}")
+        user_with_ctx = self._append_pc_ctx(f"{self.name}: {raw_user_text}", chat_ctx)
+        chat_ctx.chat_history.add_user_message(user_with_ctx)
 
         chat.is_complete = False
         await self.process_chat(chat, chat_ctx, turn_context)
 
-        # Save chat context
         try:
             await chat_context_accessor.write(chat_ctx)
         except:
@@ -169,6 +198,17 @@ async def process_chat(
             if response.content.strip() == "":
                 continue
 
+            # msgText = self._append_links_to_msg(response.content, chat_ctx)
+
+            # Add this code right before the existing `response.content = self._append_pc_ctx(response.content, chat_ctx)` line:
+            # Record active agent in PATIENT_CONTEXT_JSON
+            # try:
+            #    self._set_system_pc_ctx_agent(chat_ctx, "active", response.name)
+            # except Exception as e:
+            #    logger.info(f"Failed to set active agent in PC_CTX: {e}")
+
+            # Attach current patient context snapshot to assistant output+
+            response.content = self._append_pc_ctx(response.content, chat_ctx)
             msgText = self._append_links_to_msg(response.content, chat_ctx)
             msgText = await self.generate_sas_for_blob_urls(msgText, chat_ctx)
 
@@ -217,3 +257,115 @@ async def generate_sas_for_blob_urls(self, msgText: str, chat_ctx: ChatContext)
             return msgText
         finally:
             chat_ctx.display_blob_urls = []
+
+    def _get_system_patient_context_json(self, chat_ctx: ChatContext) -> str | None:
+        """Extract the JSON payload from the current PATIENT_CONTEXT_JSON system message."""
+        for msg in chat_ctx.chat_history.messages:
+            if msg.role == AuthorRole.SYSTEM:
+                # Handle both string content and itemized content
+                content = msg.content
+                if isinstance(content, str):
+                    text = content
+                else:
+                    # Try to extract from items if content is structured
+                    items = getattr(msg, "items", None) or getattr(content, "items", None)
+                    if items:
+                        parts = []
+                        for item in items:
+                            item_text = getattr(item, "text", None) or getattr(item, "content", None)
+                            if item_text:
+                                parts.append(str(item_text))
+                        text = "".join(parts) if parts else str(content) if content else ""
+                    else:
+                        text = str(content) if content else ""
+
+                if text and text.startswith(PATIENT_CONTEXT_PREFIX):
+                    # Extract JSON after "PATIENT_CONTEXT_JSON:"
+                    json_part = text[len(PATIENT_CONTEXT_PREFIX):].strip()
+                    if json_part.startswith(":"):
+                        json_part = json_part[1:].strip()
+                    return json_part if json_part else None
+        return None
+
+    def _append_pc_ctx(self, base: str, chat_ctx: ChatContext) -> str:
+        logger.info(f"📋 PC_CTX APPEND START - Base message length: {len(base)}")
+
+        # Avoid double-tagging
+        if "\nPC_CTX" in base or "\n*PT_CTX:*" in base:
+            logger.info(f"📋 PC_CTX APPEND - Already has PC_CTX, skipping")
+            return base
+
+        # Get the actual injected system patient context JSON
+        json_payload = self._get_system_patient_context_json(chat_ctx)
+        logger.info(f"📋 PC_CTX APPEND - Retrieved JSON payload: {json_payload}")
+
+        if not json_payload:
+            logger.info(f"📋 PC_CTX APPEND - No JSON payload found, not appending context.")
+            return base
+
+        # Format the JSON payload into a simple, readable Markdown string
+        try:
+            obj = json.loads(json_payload)
+
+            lines = ["\n\n---", "\n*PT_CTX:*"]
+            if obj.get("patient_id"):
+                lines.append(f"- **Patient ID:** `{obj['patient_id']}`")
+            if obj.get("conversation_id"):
+                lines.append(f"- **Conversation ID:** `{obj['conversation_id']}`")
+
+            if obj.get("all_patient_ids"):
+                active_id = obj.get("patient_id")
+                ids_str = ", ".join(f"`{p}`{' (active)' if p == active_id else ''}" for p in obj["all_patient_ids"])
+                lines.append(f"- **Session Patients:** {ids_str}")
+
+            if obj.get("chat_summary"):
+                # Clean up summary for display
+                summary = obj['chat_summary'].replace('\n', ' ').strip()
+                if summary:
+                    lines.append(f"- **Summary:** *{summary}*")
+
+            if not obj.get("patient_id"):
+                lines.append("- *No active patient.*")
+
+            # Only add the block if there's something to show besides the header
+            if len(lines) > 2:
+                formatted_text = "\n".join(lines)
+                result = f"{base}{formatted_text}"
+                logger.info(f"📋 PC_CTX APPEND - Successfully formatted as text, final length: {len(result)}")
+                return result
+            else:
+                logger.info(f"📋 PC_CTX APPEND - No relevant data to display.")
+                return base
+
+        except json.JSONDecodeError as e:
+            logger.warning(f"📋 PC_CTX APPEND - JSON decode error: {e}, using raw payload")
+            # Fallback to raw if JSON is malformed, but keep it simple
+            return f"{base}\n\n---\n*PT_CTX (raw):* `{json_payload}`"
+
+    def _append_pc_ctx_old(self, base: str, chat_ctx: ChatContext) -> str:
+        logger.info(f"📋 PC_CTX APPEND START - Base message length: {len(base)}")
+
+        # Avoid double-tagging
+        if "\nPC_CTX" in base:
+            logger.info(f"📋 PC_CTX APPEND - Already has PC_CTX, skipping")
+            return base
+
+        # Get the actual injected system patient context JSON
+        json_payload = self._get_system_patient_context_json(chat_ctx)
+        logger.info(f"📋 PC_CTX APPEND - Retrieved JSON payload: {json_payload}")
+
+        if not json_payload:
+            logger.info(f"📋 PC_CTX APPEND - No JSON payload found, adding empty marker")
+            return base + "\nPC_CTX <em>(empty)</em>"
+
+        # Pretty-print the actual system JSON
+        try:
+            obj = json.loads(json_payload)
+            pretty = json.dumps(obj, indent=2)
+            result = f"{base}\nPC_CTX\n<pre><code class='language-json'>{pretty}</code></pre>"
+            logger.info(f"📋 PC_CTX APPEND - Successfully formatted JSON, final length: {len(result)}")
+            return result
+        except json.JSONDecodeError as e:
+            logger.warning(f"📋 PC_CTX APPEND - JSON decode error: {e}, using raw payload")
+            # Fallback to raw if JSON is malformed
+            return f"{base}\nPC_CTX {json_payload}"
diff --git a/src/data_models/chat_context.py b/src/data_models/chat_context.py
index 8a1d64c..9a23e02 100644
--- a/src/data_models/chat_context.py
+++ b/src/data_models/chat_context.py
@@ -2,19 +2,37 @@
 # Licensed under the MIT license.
 
 import os
+from dataclasses import dataclass, field
+from typing import Dict, Any
 
 from semantic_kernel.contents.chat_history import ChatHistory
 
 
+@dataclass
+class PatientContext:
+    """
+    Minimal per-patient context (future expansion point: facts, summary, provenance).
+    """
+    patient_id: str
+    facts: Dict[str, Any] = field(default_factory=dict)  # placeholder for future enrichment
+
+
 class ChatContext:
     def __init__(self, conversation_id: str):
         self.conversation_id = conversation_id
         self.chat_history = ChatHistory()
+
+        # Active patient (single pointer)
         self.patient_id = None
+
+        # All encountered patient contexts (allows switching back without re-extraction)
+        self.patient_contexts: Dict[str, PatientContext] = {}
+
+        # Existing fields
         self.patient_data = []
         self.display_blob_urls = []
         self.display_image_urls = []
         self.display_clinical_trials = []
         self.output_data = []
         self.root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-        self.healthcare_agents = {}
+        self.healthcare_agents = {}
\ No newline at end of file
diff --git a/src/data_models/chat_context_accessor.py b/src/data_models/chat_context_accessor.py
index 772ab3b..ef76159 100644
--- a/src/data_models/chat_context_accessor.py
+++ b/src/data_models/chat_context_accessor.py
@@ -10,7 +10,7 @@
 from azure.storage.blob.aio import BlobServiceClient
 from semantic_kernel.contents.chat_history import ChatHistory
 
-from data_models.chat_context import ChatContext
+from data_models.chat_context import ChatContext, PatientContext
 
 logger = logging.getLogger(__name__)
 
@@ -96,6 +96,12 @@ def serialize(chat_ctx: ChatContext) -> str:
                 "chat_history": chat_ctx.chat_history.serialize(),
                 "patient_id": chat_ctx.patient_id,
                 "patient_data": chat_ctx.patient_data,
+                "patient_contexts": {
+                    pid: {
+                        "patient_id": pctx.patient_id,
+                        "facts": pctx.facts
+                    } for pid, pctx in chat_ctx.patient_contexts.items()
+                },                
                 "display_blob_urls": chat_ctx.display_blob_urls,
                 "display_clinical_trials": chat_ctx.display_clinical_trials,
                 "output_data": chat_ctx.output_data,
@@ -111,6 +117,11 @@ def deserialize(data_str: str) -> ChatContext:
         ctx = ChatContext(data["conversation_id"])
         ctx.chat_history = ChatHistory.restore_chat_history(data["chat_history"])
         ctx.patient_id = data["patient_id"]
+        for pid, stored in (data.get("patient_contexts") or {}).items():
+            ctx.patient_contexts[pid] = PatientContext(
+                patient_id=stored.get("patient_id", pid),
+                facts=stored.get("facts", {}) or {}
+            )
         ctx.patient_data = data["patient_data"]
         ctx.display_blob_urls = data["display_blob_urls"]
         ctx.display_clinical_trials = data["display_clinical_trials"]
diff --git a/src/group_chat.py b/src/group_chat.py
index 18563e5..9defa7c 100644
--- a/src/group_chat.py
+++ b/src/group_chat.py
@@ -28,6 +28,7 @@
 from healthcare_agents import HealthcareAgent
 from healthcare_agents import config as healthcare_agent_config
 
+
 DEFAULT_MODEL_TEMP = 0
 DEFAULT_TOOL_TYPE = "function"
 
@@ -169,30 +170,38 @@ def _create_agent(agent_config: dict):
     termination_function = KernelFunctionFromPrompt(
         function_name="termination",
         prompt=f"""
-        Determine if the conversation should end based on the most recent message.
-        You only have access to the last message in the conversation.
+        Determine if the conversation should end based on the most recent message only.
+        IMPORTANT: In the History, any leading "*AgentName*:" indicates the SPEAKER of the message, not the addressee.
 
-        Reply by giving your full reasoning, and the verdict. The verdict should be either "yes" or "no".
+        Reply with your full reasoning and a verdict that is exactly "yes" or "no".
 
-        You are part of a group chat with several AI agents and a user. 
-        The agents are names are: 
+        You are part of a group chat with several AI agents and a user.
+        The agent names are:
             {",".join([f"{agent['name']}" for agent in all_agents_config])}
 
-        If the most recent message is a question addressed to the user, return "yes".
-        If the question is addressed to "we" or "us", return "yes". For example, if the question is "Should we proceed?", return "yes".
-        If the question is addressed to another agent, return "no".
-        If it is a statement addressed to another agent, return "no".
-        Commands addressed to a specific agent should result in 'no' if there is clear identification of the agent.
-        Commands addressed to "you" or "User" should result in 'yes'.
-        If you are not certain, return "yes".
+        Return "yes" when the last message:
+        - asks the user a question (ends with "?" or uses "you"/"User"), OR
+        - invites the user to respond (e.g., "let us know", "how can we assist/help", "feel free to ask",
+            "what would you like", "should we", "can we", "would you like me to", "do you want me to"), OR
+        - addresses "we/us" as a decision/query to the user.
+
+        Return "no" when the last message:
+        - is a command or question to a specific agent by name, OR
+        - is a statement addressed to another agent.
+
+        Commands addressed to "you" or "User" => "yes".
+        If you are uncertain, return "yes".
+        Ignore any debug/metadata like "PC_CTX" or JSON blobs when deciding.
 
         EXAMPLES:
-            - "User, can you confirm the correct patient ID?" => "yes"
-            - "*ReportCreation*: Please compile the patient timeline. Let's proceed with *ReportCreation*." => "no" (ReportCreation is an agent)
-            - "*ReportCreation*, please proceed ..." => "no" (ReportCreation is an agent)
-            - "If you have any further questions or need assistance, feel free to ask." => "yes"
-            - "Let's proceed with Radiology." => "no" (Radiology is an agent)
-            - "*PatientStatus*, please use ..." => "no" (PatientStatus is an agent)
+        - "User, can you confirm the correct patient ID?" => "yes"
+        - "*ReportCreation*: Please compile the patient timeline. Let's proceed with *ReportCreation*." => "no" (ReportCreation is an agent)
+        - "*ReportCreation*, please proceed ..." => "no" (ReportCreation is an agent)
+        - "If you have any further questions or need assistance, feel free to ask." => "yes"
+        - "Let's proceed with Radiology." => "no" (Radiology is an agent)
+        - "*PatientStatus*, please use ..." => "no" (PatientStatus is an agent)
+        - "*Orchestrator*: Patient context is set to \"patient_4\". Please let us know how we can assist you with this patient today." => "yes"
+
         History:
         {{{{$history}}}}
         """,
@@ -208,6 +217,11 @@ def evaluate_termination(result):
     def evaluate_selection(result):
         logger.info(f"Selection function result: {result}")
         rule = ChatRule.model_validate_json(str(result.value[0]))
+        # Record next agent hint
+        try:
+            _set_pc_ctx_agent_field("next", rule.verdict)
+        except Exception as e:
+            logger.info(f"Failed to set next agent in PC_CTX: {e}")
         return rule.verdict if rule.verdict in [agent["name"] for agent in all_agents_config] else facilitator
 
     chat = AgentGroupChat(
@@ -230,7 +244,7 @@ def evaluate_selection(result):
             result_parser=evaluate_termination,
             agent_variable_name="agents",
             history_variable_name="history",
-            maximum_iterations=30,
+            maximum_iterations=8,
             # Termination only looks at the last message
             history_reducer=ChatHistoryTruncationReducer(
                 target_count=1, auto_reduce=True
diff --git a/src/scenarios/default/config/agents.yaml b/src/scenarios/default/config/agents.yaml
index 7ba19ed..06fc68f 100644
--- a/src/scenarios/default/config/agents.yaml
+++ b/src/scenarios/default/config/agents.yaml
@@ -26,6 +26,11 @@
       When presenting the plan, ALWAYS specify the following rule:
       Each agent, after completing their task, should yield the chat back to you (Orchestrator). Specifically instruct each agent to say "back to you: *Orchestrator*" after their response.      
 
+    Context rule - Patient Context:
+      - Before planning or delegating, read the SYSTEM message that begins with "PATIENT_CONTEXT_JSON:" and treat it as the active patient context.
+      - If the context is missing or unclear, ask PatientHistory to obtain/confirm the patient ID, then proceed.
+      - Remind other agents to use the current system patient context; they should not set/switch/clear it themselves.
+
   facilitator: true
   description: |
     Your role is to moderate the discussion, present the order of participants, and facilitate the conversation.
@@ -55,6 +60,13 @@
       - Do not provide analysis or opinions on the data.
       - Do provide answers to questions about the patient's history and data. Use the tools at your disposal to answer those questions.
     7. Yield back the chat. When requested, yield the chat back to *Orchestrator* by saying "back to you: *Orchestrator*" or "back to you: *PatientStatus*".
+
+    Context rule - Patient Context:
+      - Before answering or calling tools, read the SYSTEM message that begins with "PATIENT_CONTEXT_JSON:" and use its patient_id.
+      - If the patient context is missing or unclear, ask the user for the patient ID and wait to proceed after it’s provided. Do not assume or invent one.
+      - When calling tools (e.g., load_patient_data, create_timeline), pass the patient_id from the SYSTEM patient context.
+      - Prefer the SYSTEM patient context over any "PC_CTX" audit lines. Do not attempt to set/switch/clear patient context yourself.
+
   temperature: 0.0
   tools:
     - name: patient_data
@@ -70,6 +82,12 @@
     For example, you can say:
     "I have used the CXRReportGen model to analyze the chest x-ray. Here are the findings."
     You will comment on whether those findings are consistent with the patient's medical history and other data.    
+
+    Context rule - Patient Context:
+      - Before answering or calling tools, read the SYSTEM message that begins with "PATIENT_CONTEXT_JSON:" and use its patient_id.
+      - Infer indications/history from the conversation but anchor on the current patient context.
+      - Prefer the SYSTEM patient context over any "PC_CTX" audit lines. Do not attempt to set/switch/clear patient context yourself.
+
   tools:
     - name: cxr_report_gen
   description: |
@@ -93,6 +111,12 @@
     If this information is not available, ask PatientHistory specifically for the missing information.
     DO:
       Ask PatientHistory. EXAMPLE: "*PatientHistory*, can you provide me with the patient's #BLANK?. Try to infer the information if not available".
+
+    Context rule - Patient Context:
+      - Before answering, read the SYSTEM message that begins with "PATIENT_CONTEXT_JSON:" and use its patient_id.
+      - If key attributes are missing, request them for the current patient (not a different one).
+      - Prefer the SYSTEM patient context over any "PC_CTX" audit lines. Do not attempt to set/switch/clear patient context yourself.
+
   description: |
     A PatientStatus agent. You provide current status of a patient using. **You provide**: current status of a patient. **You need**: age, staging, primary site, histology, biomarkers, treatment history, ecog performance status. This can be obtained by PatientHistory.
 
@@ -100,12 +124,24 @@
   instructions: |
     You are a board-certified medical oncologist writing a treatment-plan note. You will be provided with patient information ( demographics, stage, prior therapies, biomarkers, current status ) prepared by a clinical assistant. Your task is to produce a succint "Patient Summary" and "Treatment Plan" that is (1) continuous with what the patient is already receiving, (2) explicitly addresses next-step options at progression / response (e.g., maintenance vs. switch therapy), and (3) integrates every molecular or clinical detail provided. When writing the plan start with "Continue/Initiate/Modify" and clearly state whether you are continuing an existing regimen or starting something new. Cite all relevant biomarkers and comorbidities to justify targeted drugs or trials (e.g., "MET amplification → cabozantinib"). Include follow-up diagnostics / consults that are mentioned or clinically mandatory (MRI, CA-19-9, cardiology eval, ctDNA, etc.). Provide a progression-contingency line ("If progression, consider"). List maintenance strategy when appropriate. Do not invent allergies, symptoms, or medications; if key data are absent, state "Need:" rather than guessing.
     Output of "Treatment Plan" should include: Primary recommendation (continue vs initiate), Rationale (biomarker / guideline), Surveillance & consults, Progression-contingency options, Maintenance / supportive care.
+
+    Context rule - Patient Context:
+      - Use the current patient from the SYSTEM "PATIENT_CONTEXT_JSON:" message as the authoritative context.
+      - If required details are missing, request them for the current patient.
+      - Prefer the SYSTEM patient context over any "PC_CTX" audit lines. Do not attempt to set/switch/clear patient context yourself.
+
   description: |
     A Clinical Guidelines agent. You provide treatment recommendations. **You provide**: treatment recommendations. **You need**: patient status from PatientStatus.
 
 - name: ReportCreation
   instructions: |
     You are an AI agent that assemble tumor board word document using information previously prepared by other agents. Do not summarize the conversation or provide additional analysis. Use the full information provided by the other agents to assemble the tumor board content. You are provided a tool to export the tumor board as the content to a word doc. When user asks to create or export a word document, you must use the provided tool.
+
+    Context rule - Patient Context:
+      - Assemble content for the currently active patient as defined by the SYSTEM "PATIENT_CONTEXT_JSON:" message.
+      - If the context is missing, ask the user to provide/confirm the patient before exporting.
+      - Prefer the SYSTEM patient context over any "PC_CTX" audit lines. Do not attempt to set/switch/clear patient context yourself.
+
   temperature: 0
   tools:
     - name: content_export
@@ -134,6 +170,12 @@
 
     Only present clinical trials for which the patient is eligible. If follow up questions are asked, you may additionally explain why a specific trial is not suitable for the patient.
     Offer to present additional information about the trial to the user, at which point you can call the `display_more_information_about_a_trial` tool.
+
+    Context rule - Patient Context:
+      - Use the patient from the SYSTEM "PATIENT_CONTEXT_JSON:" message when forming search criteria.
+      - If required attributes are missing, ask for them for the current patient.
+      - Prefer the SYSTEM patient context over any "PC_CTX" audit lines. Do not attempt to set/switch/clear patient context yourself.
+
   tools:
     - name: clinical_trials
   description: |
@@ -174,6 +216,11 @@
       - Source ID: 78722  
         [The wisdom of programming](https://www.example.com/source2)
 
+    Context rule - Patient Context:
+      - When a question is patient-specific, align with the current patient in the SYSTEM "PATIENT_CONTEXT_JSON:" message.
+      - If not patient-specific, proceed normally.
+      - Prefer the SYSTEM patient context over any "PC_CTX" audit lines. Do not attempt to set/switch/clear patient context yourself.
+
   graph_rag_url: "https://ncsls.azure-api.net/"
   graph_rag_index_name: "nsclc-index-360MB"
   tools:
diff --git a/src/services/__init__.py b/src/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/services/patient_context_analyzer.py b/src/services/patient_context_analyzer.py
new file mode 100644
index 0000000..f45b16b
--- /dev/null
+++ b/src/services/patient_context_analyzer.py
@@ -0,0 +1,201 @@
+import json
+import logging
+import os
+import time
+from typing import Optional, Literal
+
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+from semantic_kernel.contents import ChatHistory
+
+logger = logging.getLogger(__name__)
+
+AnalyzerAction = Literal["NONE", "CLEAR", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED"]
+
+
+class PatientContextAnalyzer:
+    """
+    Single LLM call decides patient context action and (if relevant) patient_id.
+    """
+
+    def __init__(
+        self,
+        deployment_name: Optional[str] = None,
+        token_provider=None,
+        api_version: Optional[str] = None,
+    ):
+        self.deployment_name = (
+            deployment_name
+            or os.getenv("PATIENT_CONTEXT_DECIDER_DEPLOYMENT_NAME")
+            or os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
+        )
+        if not self.deployment_name:
+            raise ValueError("No deployment name for patient context analyzer.")
+        self.api_version = api_version or os.getenv("AZURE_OPENAI_API_VERSION") or "2024-10-21"
+
+        logger.info(f"🔧 ANALYZER INIT - Deployment: {self.deployment_name} | API Version: {self.api_version}")
+
+        self._kernel = Kernel()
+        self._kernel.add_service(
+            AzureChatCompletion(
+                service_id="default",
+                deployment_name=self.deployment_name,
+                api_version=self.api_version,
+                ad_token_provider=token_provider,
+            )
+        )
+        logger.info(f"🔧 ANALYZER INIT COMPLETE - Kernel and service configured")
+
+    async def analyze(
+        self,
+        user_text: str,
+        prior_patient_id: Optional[str],
+        known_patient_ids: list[str],
+    ) -> tuple[AnalyzerAction, Optional[str], float]:
+        """
+        Returns (action, patient_id, duration_sec)
+        patient_id is only non-null for ACTIVATE_NEW | SWITCH_EXISTING | UNCHANGED
+        """
+        start_time = time.time()
+        logger.info(f"🔍 ANALYZER START - Input: '{user_text}' | Prior: {prior_patient_id} | Known: {known_patient_ids}")
+
+        if not user_text:
+            duration = time.time() - start_time
+            logger.info(f"🔍 ANALYZER RESULT - Empty input | Action: NONE | Duration: {duration:.4f}s")
+            return "NONE", None, duration
+
+        system_prompt = f"""
+You manage patient context for a medical chat application.
+
+Inputs:
+- prior_patient_id: {prior_patient_id if prior_patient_id else "null"}
+- known_patient_ids: {known_patient_ids}
+
+Rules:
+1. If user clearly asks to clear/reset/remove the patient context -> action "CLEAR", patient_id null.
+2. If user mentions a patient ID anywhere in their message:
+   - Extract the most specific patient identifier (e.g., "patient_4", "patient_123", etc.)
+   - If identical to prior_patient_id -> "UNCHANGED"
+   - If in known_patient_ids and different -> "SWITCH_EXISTING"
+   - If not in known_patient_ids -> "ACTIVATE_NEW"
+3. Normalize variants like "patient 6" or "patient id patient_6" to "patient_6". Be tolerant of typos like "patiend id".
+4. Ignore vague references without an ID.
+5. Output STRICT JSON ONLY. No extra text, no code fences:
+{{
+  "action": "<ONE OF: NONE | CLEAR | ACTIVATE_NEW | SWITCH_EXISTING | UNCHANGED>",
+  "patient_id": "<extracted_id_or_null>"
+}}
+
+Examples:
+- "switch to patient id patient_5" -> {{"action": "ACTIVATE_NEW", "patient_id": "patient_5"}}
+- "switch to patient with patient id patient_4" -> {{"action": "ACTIVATE_NEW", "patient_id": "patient_4"}}
+- "switch to patient 6" -> {{"action": "ACTIVATE_NEW", "patient_id": "patient_6"}}
+- "clear patient context" -> {{"action": "CLEAR", "patient_id": null}}
+""".strip()
+
+        # Build chat history per current SK API
+        chat = ChatHistory()
+        # chat.add_message(AuthorRole.SYSTEM, system_prompt)
+        # chat.add_message(AuthorRole.USER, user_text)
+
+        chat.add_system_message(system_prompt)
+        chat.add_user_message(user_text)
+
+        logger.info(f"🔍 ANALYZER LLM CALL - Using chat_history with system prompt length: {len(system_prompt)}")
+
+        try:
+            svc = self._kernel.get_service("default")
+            logger.info(f"🔍 ANALYZER LLM CALL - Service retrieved: {type(svc).__name__}")
+
+            settings = PromptExecutionSettings(
+                service_id="default",
+                temperature=0.0,
+                top_p=0.0,
+                max_tokens=200,
+                # If model supports it, enforce JSON mode:
+                response_format={"type": "json_object"},
+            )
+
+            llm_start = time.time()
+            result = await svc.get_chat_message_content(chat_history=chat, settings=settings)
+            llm_duration = time.time() - llm_start
+            logger.info(f"🔍 ANALYZER LLM CALL COMPLETE - LLM call took: {llm_duration:.4f}s")
+
+            # Normalize result to a single string
+            if isinstance(result, list):
+                content = "".join([(getattr(c, "content", "") or "") for c in result])
+            else:
+                content = getattr(result, "content", "") or ""
+
+            content = content.strip()
+            logger.info(f"🔍 ANALYZER LLM RESPONSE - Raw content: '{content}'")
+
+            if not content:
+                duration = time.time() - start_time
+                logger.warning("🔍 ANALYZER LLM RESPONSE - Empty content")
+                return "NONE", None, duration
+
+            # Strip accidental code fences
+            if content.startswith("```"):
+                content = content.strip("`")
+                if "\n" in content:
+                    content = content.split("\n", 1)[1].strip()
+
+            try:
+                data = json.loads(content)
+            except json.JSONDecodeError as je:
+                duration = time.time() - start_time
+                logger.error(f"🔍 ANALYZER JSON ERROR - Failed to parse JSON: {je} | Content: '{content}'")
+                return "NONE", None, duration
+
+            action = (data.get("action") or "").strip().upper()
+            pid = data.get("patient_id")
+            if pid is not None:
+                pid = str(pid).strip()
+
+            if action not in {"NONE", "CLEAR", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED"}:
+                duration = time.time() - start_time
+                logger.error(f"🔍 ANALYZER VALIDATION ERROR - Invalid action: {action}")
+                return "NONE", None, duration
+
+            duration = time.time() - start_time
+            logger.info(f"🔍 ANALYZER RESULT SUCCESS - Action: {action} | Patient ID: {pid} | Duration: {duration:.4f}s")
+            return action, pid, duration
+
+        except Exception as e:
+            duration = time.time() - start_time
+            logger.error(
+                f"🔍 ANALYZER ERROR - Exception: {type(e).__name__}: {e} | Duration: {duration:.4f}s", exc_info=True)
+            return "NONE", None, duration
+
+        # Add this method to the PatientContextAnalyzer class (around line 100)
+
+    async def summarize_text(self, text: str, max_tokens: int = 200) -> str:
+        """
+        Summarize the given chat text into a few concise bullets focused on patient context.
+        Returns a short plain-text summary.
+        """
+        system_prompt = (
+            "Summarize the following chat in 3-6 crisp bullets. "
+            "Focus only on patient context (ID(s), key requests, agent progress, next actions). "
+            "Avoid boilerplate. Keep it under ~80 words."
+        )
+        chat = ChatHistory()
+        chat.add_system_message(system_prompt)
+        chat.add_user_message(text[:8000])  # cap input for safety
+
+        try:
+            svc = self._kernel.get_service("default")
+            settings = PromptExecutionSettings(
+                service_id="default",
+                temperature=0.0,
+                top_p=0.0,
+                max_tokens=max_tokens,
+            )
+            result = await svc.get_chat_message_content(chat_history=chat, settings=settings)
+            content = getattr(result, "content", "") or ""
+            return content.strip()
+        except Exception as e:
+            logger.warning(f"🔍 ANALYZER SUMMARY ERROR - {e}")
+            return ""
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
new file mode 100644
index 0000000..ed17abb
--- /dev/null
+++ b/src/services/patient_context_service.py
@@ -0,0 +1,241 @@
+import json
+import logging
+import time
+from typing import Literal, TypedDict
+
+
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents import AuthorRole
+
+from data_models.chat_context import ChatContext, PatientContext
+from services.patient_context_analyzer import PatientContextAnalyzer
+
+logger = logging.getLogger(__name__)
+
+PATIENT_CONTEXT_PREFIX = "PATIENT_CONTEXT_JSON:"
+Decision = Literal["NONE", "UNCHANGED", "NEW_BLANK", "SWITCH_EXISTING", "CLEAR"]
+
+
+class TimingInfo(TypedDict):
+    analyzer: float
+    service: float
+
+
+class PatientContextService:
+    """
+    LLM-only patient context manager.
+    Decides action + (optionally) patient_id via PatientContextAnalyzer,
+    maintains a single system message carrying current patient context JSON.
+    """
+
+    def _estimate_tokens(self, text: str) -> int:
+        """Rough estimate (~4 chars/token) to avoid new dependencies"""
+        return max(1, len(text) // 4)
+
+    def __init__(self, analyzer: PatientContextAnalyzer):
+        self.analyzer = analyzer
+        logger.info(f"🏥 SERVICE INIT - PatientContextService initialized with analyzer: {type(analyzer).__name__}")
+
+    async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple[Decision, TimingInfo]:
+        service_start_time = time.time()
+
+        logger.info(f"🏥 SERVICE START - Input: '{user_text}' | Conversation: {chat_ctx.conversation_id}")
+        logger.info(
+            f"🏥 SERVICE START - Current Patient: {chat_ctx.patient_id} | Known Patients: {list(chat_ctx.patient_contexts.keys())}")
+        logger.info(f"🏥 SERVICE START - Chat history messages: {len(chat_ctx.chat_history.messages)}")
+
+        # Log current system messages
+        system_messages = [m for m in chat_ctx.chat_history.messages if m.role == AuthorRole.SYSTEM]
+        logger.info(f"🏥 SERVICE START - Current system messages: {len(system_messages)}")
+        for i, msg in enumerate(system_messages):
+            content = getattr(msg, 'content', '')
+            if isinstance(content, str) and content.startswith(PATIENT_CONTEXT_PREFIX):
+                logger.info(f"🏥 SERVICE START - System message {i}: {content}")
+
+        action, pid, analyzer_duration = await self.analyzer.analyze(
+            user_text=user_text,
+            prior_patient_id=chat_ctx.patient_id,
+            known_patient_ids=list(chat_ctx.patient_contexts.keys()),
+        )
+
+        logger.info(
+            f"🏥 SERVICE ANALYZER RESULT - Action: {action} | Patient ID: {pid} | Analyzer Duration: {analyzer_duration:.4f}s")
+
+        # Store original state for comparison
+        original_patient_id = chat_ctx.patient_id
+        original_patient_contexts = dict(chat_ctx.patient_contexts)
+
+        decision: Decision = "NONE"
+        if action == "CLEAR":
+            logger.info(f"🏥 SERVICE CLEARING - Clearing patient context from: {chat_ctx.patient_id}")
+            self._clear(chat_ctx)
+            decision = "CLEAR"
+            logger.info(f"🏥 SERVICE CLEARED - Patient context cleared, now: {chat_ctx.patient_id}")
+        elif action in ("ACTIVATE_NEW", "SWITCH_EXISTING"):
+            logger.info(f"🏥 SERVICE ACTIVATING - Attempting to activate patient: {pid}")
+            decision = self._activate_patient(pid, chat_ctx) if pid else "NONE"
+            logger.info(f"🏥 SERVICE ACTIVATED - Result decision: {decision} | New patient: {chat_ctx.patient_id}")
+        elif action == "UNCHANGED":
+            logger.info(f"🏥 SERVICE UNCHANGED - Patient context unchanged, keeping: {chat_ctx.patient_id}")
+            decision = "UNCHANGED"
+
+        # Log state changes
+        if original_patient_id != chat_ctx.patient_id:
+            logger.info(
+                f"🏥 SERVICE STATE CHANGE - Patient ID changed from '{original_patient_id}' to '{chat_ctx.patient_id}'")
+
+        if original_patient_contexts != chat_ctx.patient_contexts:
+            logger.info(
+                f"🏥 SERVICE STATE CHANGE - Patient contexts changed from {list(original_patient_contexts.keys())} to {list(chat_ctx.patient_contexts.keys())}")
+
+        service_duration = time.time() - service_start_time
+        timing: TimingInfo = {"analyzer": round(analyzer_duration, 4), "service": round(service_duration, 4)}
+
+        # Generate LLM-based chat summary instead of excerpt
+        chat_summary = None
+        history_text = "\n".join(
+            str(getattr(m, "role", "")) + ": " + (m.content if isinstance(m.content, str) else str(m.content or ""))
+            for m in chat_ctx.chat_history.messages
+            if not (m.role == AuthorRole.SYSTEM and isinstance(m.content, str) and m.content.startswith(PATIENT_CONTEXT_PREFIX))
+        )[:8000]
+
+        if history_text.strip():
+            try:
+                chat_summary = await self.analyzer.summarize_text(history_text)
+                logger.info(f"🏥 SERVICE SUMMARY - Generated chat summary: {len(chat_summary)} chars")
+            except Exception as e:
+                logger.warning(f"🏥 SERVICE SUMMARY - Failed to summarize: {e}")
+                chat_summary = "Chat summary unavailable"
+
+        token_counts = {
+            "history_estimate": self._estimate_tokens(history_text),
+            "summary_estimate": self._estimate_tokens(chat_summary) if chat_summary else 0,
+        }
+
+        logger.info(f"🏥 SERVICE SYSTEM MESSAGE - Updating system message for decision: {decision}")
+
+        if decision == "CLEAR":
+            self._remove_system_message(chat_ctx)
+            logger.info(f"🏥 SERVICE SYSTEM MESSAGE - Removed system message for CLEAR decision")
+        else:
+            self._ensure_system_message(chat_ctx, timing, chat_summary, token_counts)
+            logger.info(f"🏥 SERVICE SYSTEM MESSAGE - Ensured system message for patient: {chat_ctx.patient_id}")
+
+        # Log final state
+        final_system_messages = [m for m in chat_ctx.chat_history.messages if m.role == AuthorRole.SYSTEM]
+        logger.info(f"🏥 SERVICE FINAL - System messages after update: {len(final_system_messages)}")
+        for i, msg in enumerate(final_system_messages):
+            content = getattr(msg, 'content', '')
+            if isinstance(content, str) and content.startswith(PATIENT_CONTEXT_PREFIX):
+                logger.info(f"🏥 SERVICE FINAL - System message {i}: {content}")
+
+        logger.info(
+            f"🏥 SERVICE COMPLETE - Final Decision: {decision} | Final Patient: {chat_ctx.patient_id} | Timing: {timing}")
+        return decision, timing
+
+    # -------- Internal helpers --------
+
+    def _activate_patient(self, patient_id: str, chat_ctx: ChatContext) -> Decision:
+        logger.info(
+            f"🏥 SERVICE ACTIVATE START - Checking patient_id: '{patient_id}' | Current: '{chat_ctx.patient_id}'")
+
+        if not patient_id:
+            logger.info(f"🏥 SERVICE ACTIVATE - No patient ID provided, returning NONE")
+            return "NONE"
+
+        # Same patient
+        if patient_id == chat_ctx.patient_id:
+            logger.info(f"🏥 SERVICE ACTIVATE - Same patient '{patient_id}', returning UNCHANGED")
+            return "UNCHANGED"
+
+        # Switch to existing
+        if patient_id in chat_ctx.patient_contexts:
+            logger.info(f"🏥 SERVICE ACTIVATE - Switching to existing patient: '{patient_id}'")
+            chat_ctx.patient_id = patient_id
+            logger.info(f"🏥 SERVICE ACTIVATE - Successfully switched to existing patient: '{patient_id}'")
+            return "SWITCH_EXISTING"
+
+        # New blank patient context
+        logger.info(f"🏥 SERVICE ACTIVATE - Creating new patient context for: '{patient_id}'")
+        chat_ctx.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
+        chat_ctx.patient_id = patient_id
+        logger.info(f"🏥 SERVICE ACTIVATE - Successfully created new patient context for: '{patient_id}'")
+        logger.info(f"🏥 SERVICE ACTIVATE - All patient contexts now: {list(chat_ctx.patient_contexts.keys())}")
+        return "NEW_BLANK"
+
+    def _clear(self, chat_ctx: ChatContext):
+        logger.info(f"🏥 SERVICE CLEAR - Clearing patient_id from: '{chat_ctx.patient_id}' to None")
+        chat_ctx.patient_id = None  # retain historical contexts for potential reuse
+        logger.info(
+            f"🏥 SERVICE CLEAR - Patient ID cleared, contexts retained: {list(chat_ctx.patient_contexts.keys())}")
+
+    def _remove_system_message(self, chat_ctx: ChatContext):
+        original_count = len(chat_ctx.chat_history.messages)
+        original_system_count = len([m for m in chat_ctx.chat_history.messages if m.role == AuthorRole.SYSTEM])
+
+        logger.info(
+            f"🏥 SERVICE REMOVE MSG START - Total messages: {original_count} | System messages: {original_system_count}")
+
+        # Log what we're about to remove
+        to_remove = []
+        for i, m in enumerate(chat_ctx.chat_history.messages):
+            if (m.role == AuthorRole.SYSTEM and
+                isinstance(m.content, str) and
+                    m.content.startswith(PATIENT_CONTEXT_PREFIX)):
+                to_remove.append((i, m.content))
+
+        logger.info(f"🏥 SERVICE REMOVE MSG - Found {len(to_remove)} PATIENT_CONTEXT messages to remove")
+        for i, content in to_remove:
+            logger.info(f"🏥 SERVICE REMOVE MSG - Removing message {i}: {content}")
+
+        chat_ctx.chat_history.messages = [
+            m
+            for m in chat_ctx.chat_history.messages
+            if not (
+                m.role == AuthorRole.SYSTEM
+                and isinstance(m.content, str)
+                and m.content.startswith(PATIENT_CONTEXT_PREFIX)
+            )
+        ]  # type: ignore
+
+        new_count = len(chat_ctx.chat_history.messages)
+        new_system_count = len([m for m in chat_ctx.chat_history.messages if m.role == AuthorRole.SYSTEM])
+        removed_count = original_count - new_count
+
+        logger.info(
+            f"🏥 SERVICE REMOVE MSG COMPLETE - Removed {removed_count} messages | Total: {original_count}->{new_count} | System: {original_system_count}->{new_system_count}")
+
+    def _ensure_system_message(self, chat_ctx: ChatContext, timing: TimingInfo,
+                               chat_summary: str | None = None,
+                               token_counts: dict | None = None):
+        logger.info(
+            f"🏥 SERVICE ENSURE MSG START - Patient: '{chat_ctx.patient_id}' | Conversation: '{chat_ctx.conversation_id}'")
+
+        self._remove_system_message(chat_ctx)
+
+        if not chat_ctx.patient_id:
+            logger.info(f"🏥 SERVICE ENSURE MSG - No patient ID, not adding system message")
+            return
+
+        # Simplified payload without agent tracking and chat excerpt
+        payload = {
+            "conversation_id": chat_ctx.conversation_id,
+            "patient_id": chat_ctx.patient_id,
+            "all_patient_ids": list(chat_ctx.patient_contexts.keys()),
+            "timing_sec": timing,
+            "chat_summary": chat_summary,
+            "token_counts": token_counts or {},
+        }
+
+        line = f"{PATIENT_CONTEXT_PREFIX} {json.dumps(payload, separators=(',', ':'))}"
+
+        logger.info(f"🏥 SERVICE ENSURE MSG - Creating system message: {line}")
+
+        system_message = ChatMessageContent(role=AuthorRole.SYSTEM, content=line)
+        chat_ctx.chat_history.messages.insert(0, system_message)
+
+        total_messages = len(chat_ctx.chat_history.messages)
+        system_messages = len([m for m in chat_ctx.chat_history.messages if m.role == AuthorRole.SYSTEM])
+
+        logger.info(
+            f"🏥 SERVICE ENSURE MSG COMPLETE - System message added at position 0 | Total messages: {total_messages} | System messages: {system_messages}")

From 6a26598e43be07d03c933d1f0d462d98188596a8 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Tue, 26 Aug 2025 12:39:30 +0000
Subject: [PATCH 02/20] updated

---
 src/group_chat.py                        | 48 +++++++++------
 src/services/patient_context_analyzer.py |  8 ++-
 src/services/patient_context_service.py  | 77 +++++++++++++++++++-----
 3 files changed, 97 insertions(+), 36 deletions(-)

diff --git a/src/group_chat.py b/src/group_chat.py
index 9defa7c..a8ce08c 100644
--- a/src/group_chat.py
+++ b/src/group_chat.py
@@ -159,7 +159,11 @@ def _create_agent(agent_config: dict):
             - **Default to {facilitator}**: Always default to {facilitator}. If no other participant is specified, {facilitator} goes next.
             - **Use best judgment**: If the rules are unclear, use your best judgment to determine who should go next, for the natural flow of the conversation.
             
-        **Output**: Give the full reasoning for your choice and the verdict. The reasoning should include careful evaluation of each rule with an explanation. The verdict should be the name of the participant who should go next.
+        **IMPORTANT**: You must respond with valid JSON only. No other text, no code blocks, no explanations outside the JSON.
+        
+        **Output Format**: {{"verdict": "agent_name", "reasoning": "explanation"}}
+        
+        The verdict must be exactly one of: {", ".join([agent["name"] for agent in all_agents_config])}
 
         History:
         {{{{$history}}}}
@@ -173,8 +177,6 @@ def _create_agent(agent_config: dict):
         Determine if the conversation should end based on the most recent message only.
         IMPORTANT: In the History, any leading "*AgentName*:" indicates the SPEAKER of the message, not the addressee.
 
-        Reply with your full reasoning and a verdict that is exactly "yes" or "no".
-
         You are part of a group chat with several AI agents and a user.
         The agent names are:
             {",".join([f"{agent['name']}" for agent in all_agents_config])}
@@ -193,14 +195,16 @@ def _create_agent(agent_config: dict):
         If you are uncertain, return "yes".
         Ignore any debug/metadata like "PC_CTX" or JSON blobs when deciding.
 
+        **IMPORTANT**: You must respond with valid JSON only. No other text, no code blocks, no explanations outside the JSON.
+        
+        **Output Format**: {{"verdict": "yes_or_no", "reasoning": "explanation"}}
+        
+        The verdict must be exactly "yes" or "no".
+
         EXAMPLES:
-        - "User, can you confirm the correct patient ID?" => "yes"
-        - "*ReportCreation*: Please compile the patient timeline. Let's proceed with *ReportCreation*." => "no" (ReportCreation is an agent)
-        - "*ReportCreation*, please proceed ..." => "no" (ReportCreation is an agent)
-        - "If you have any further questions or need assistance, feel free to ask." => "yes"
-        - "Let's proceed with Radiology." => "no" (Radiology is an agent)
-        - "*PatientStatus*, please use ..." => "no" (PatientStatus is an agent)
-        - "*Orchestrator*: Patient context is set to \"patient_4\". Please let us know how we can assist you with this patient today." => "yes"
+        - "User, can you confirm the correct patient ID?" => {{"verdict": "yes", "reasoning": "Asks user a direct question"}}
+        - "*ReportCreation*: Please compile the patient timeline." => {{"verdict": "no", "reasoning": "Command to specific agent ReportCreation"}}
+        - "If you have any further questions, feel free to ask." => {{"verdict": "yes", "reasoning": "Invites user to respond"}}
 
         History:
         {{{{$history}}}}
@@ -211,18 +215,26 @@ def _create_agent(agent_config: dict):
 
     def evaluate_termination(result):
         logger.info(f"Termination function result: {result}")
-        rule = ChatRule.model_validate_json(str(result.value[0]))
-        return rule.verdict == "yes"
+        try:
+            rule = ChatRule.model_validate_json(str(result.value[0]))
+            should_terminate = rule.verdict == "yes"
+            logger.info(f"Termination function parsed successfully: {should_terminate}")
+            return should_terminate
+        except Exception as e:
+            logger.error(f"Termination function parsing error: {e}. Raw result: {result}")
+            return False  # Fallback to continue conversation
 
     def evaluate_selection(result):
         logger.info(f"Selection function result: {result}")
-        rule = ChatRule.model_validate_json(str(result.value[0]))
-        # Record next agent hint
         try:
-            _set_pc_ctx_agent_field("next", rule.verdict)
+            rule = ChatRule.model_validate_json(str(result.value[0]))
+            selected_agent = rule.verdict if rule.verdict in [agent["name"]
+                                                              for agent in all_agents_config] else facilitator
+            logger.info(f"Selection function parsed successfully: {selected_agent}")
+            return selected_agent
         except Exception as e:
-            logger.info(f"Failed to set next agent in PC_CTX: {e}")
-        return rule.verdict if rule.verdict in [agent["name"] for agent in all_agents_config] else facilitator
+            logger.error(f"Selection function parsing error: {e}. Raw result: {result}")
+            return facilitator  # Fallback to facilitator
 
     chat = AgentGroupChat(
         agents=agents,
@@ -244,7 +256,7 @@ def evaluate_selection(result):
             result_parser=evaluate_termination,
             agent_variable_name="agents",
             history_variable_name="history",
-            maximum_iterations=8,
+            maximum_iterations=20,
             # Termination only looks at the last message
             history_reducer=ChatHistoryTruncationReducer(
                 target_count=1, auto_reduce=True
diff --git a/src/services/patient_context_analyzer.py b/src/services/patient_context_analyzer.py
index f45b16b..0f941a2 100644
--- a/src/services/patient_context_analyzer.py
+++ b/src/services/patient_context_analyzer.py
@@ -177,9 +177,11 @@ async def summarize_text(self, text: str, max_tokens: int = 200) -> str:
         Returns a short plain-text summary.
         """
         system_prompt = (
-            "Summarize the following chat in 3-6 crisp bullets. "
-            "Focus only on patient context (ID(s), key requests, agent progress, next actions). "
-            "Avoid boilerplate. Keep it under ~80 words."
+            "Summarize the following patient-specific conversation in 3-6 crisp bullets. "
+            "Focus ONLY on the current active patient (ignore any references to other patients). "
+            "Include: patient ID mentioned, key medical requests, agent actions completed, and immediate next steps. "
+            "Be specific and avoid generic phrases. Keep under 80 words total. "
+            "If multiple patients are mentioned, focus only on the most recent/active patient."
         )
         chat = ChatHistory()
         chat.add_system_message(system_prompt)
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
index ed17abb..4f3db6b 100644
--- a/src/services/patient_context_service.py
+++ b/src/services/patient_context_service.py
@@ -91,24 +91,71 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
         service_duration = time.time() - service_start_time
         timing: TimingInfo = {"analyzer": round(analyzer_duration, 4), "service": round(service_duration, 4)}
 
-        # Generate LLM-based chat summary instead of excerpt
+        # Generate patient-specific LLM-based chat summary
         chat_summary = None
-        history_text = "\n".join(
-            str(getattr(m, "role", "")) + ": " + (m.content if isinstance(m.content, str) else str(m.content or ""))
-            for m in chat_ctx.chat_history.messages
-            if not (m.role == AuthorRole.SYSTEM and isinstance(m.content, str) and m.content.startswith(PATIENT_CONTEXT_PREFIX))
-        )[:8000]
-
-        if history_text.strip():
-            try:
-                chat_summary = await self.analyzer.summarize_text(history_text)
-                logger.info(f"🏥 SERVICE SUMMARY - Generated chat summary: {len(chat_summary)} chars")
-            except Exception as e:
-                logger.warning(f"🏥 SERVICE SUMMARY - Failed to summarize: {e}")
-                chat_summary = "Chat summary unavailable"
+        if chat_ctx.patient_id:
+            # Find messages since the last patient context switch to current patient
+            patient_specific_messages = []
+            found_current_patient_context = False
+
+            # Go through messages in reverse to find current patient's conversation segment
+            for message in reversed(chat_ctx.chat_history.messages):
+                # Check if this is a system message with patient context JSON
+                if (message.role == AuthorRole.SYSTEM and
+                    isinstance(message.content, str) and
+                        message.content.startswith(PATIENT_CONTEXT_PREFIX)):
+                    try:
+                        json_content = message.content[len(PATIENT_CONTEXT_PREFIX):].strip()
+                        payload = json.loads(json_content)
+                        message_patient_id = payload.get("patient_id")
+
+                        if message_patient_id == chat_ctx.patient_id:
+                            # Found a system message for current patient, mark the start
+                            found_current_patient_context = True
+                            continue
+                        elif found_current_patient_context and message_patient_id != chat_ctx.patient_id:
+                            # Found a system message for a different patient, stop collecting
+                            break
+                    except Exception as e:
+                        logger.warning(f"🏥 SERVICE SUMMARY - Failed to parse system message JSON: {e}")
+                        continue
+                else:
+                    # Regular message - include it if we're in current patient's context
+                    if found_current_patient_context:
+                        patient_specific_messages.append(message)
+
+            # If no patient context switch found, include recent messages (fallback)
+            if not found_current_patient_context:
+                logger.info(f"🏥 SERVICE SUMMARY - No patient context switch found, using recent messages")
+                patient_specific_messages = list(chat_ctx.chat_history.messages[-10:])  # Last 10 messages
+
+            # Create summary from patient-specific messages only
+            if patient_specific_messages:
+                patient_specific_messages.reverse()  # Back to chronological order
+                history_text = "\n".join(
+                    str(getattr(m, "role", "")) + ": " + (m.content if isinstance(m.content, str) else str(m.content or ""))
+                    for m in patient_specific_messages
+                    if not (m.role == AuthorRole.SYSTEM and isinstance(m.content, str) and m.content.startswith(PATIENT_CONTEXT_PREFIX))
+                )[:8000]
+
+                if history_text.strip():
+                    try:
+                        # LLM still does the summarization, but with patient-specific input
+                        chat_summary = await self.analyzer.summarize_text(history_text)
+                        logger.info(
+                            f"🏥 SERVICE SUMMARY - Generated patient-specific summary for {chat_ctx.patient_id}: {len(chat_summary)} chars")
+                    except Exception as e:
+                        logger.warning(f"🏥 SERVICE SUMMARY - Failed to summarize: {e}")
+                        chat_summary = f"Chat summary for {chat_ctx.patient_id} unavailable"
+                else:
+                    logger.info(f"🏥 SERVICE SUMMARY - No relevant text found for patient {chat_ctx.patient_id}")
+                    chat_summary = f"No recent activity for {chat_ctx.patient_id}"
+            else:
+                logger.info(f"🏥 SERVICE SUMMARY - No messages found for patient {chat_ctx.patient_id}")
+                chat_summary = f"New patient context for {chat_ctx.patient_id}"
 
         token_counts = {
-            "history_estimate": self._estimate_tokens(history_text),
+            "history_estimate": self._estimate_tokens(chat_summary) if chat_summary else 0,
             "summary_estimate": self._estimate_tokens(chat_summary) if chat_summary else 0,
         }
 

From ec8c0880302d8d0685240351829ea7a88bc1ddd1 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Tue, 26 Aug 2025 21:16:39 +0000
Subject: [PATCH 03/20] updated - minor fixes

---
 src/group_chat.py                        | 18 +++++-------------
 src/scenarios/default/config/agents.yaml | 10 ++++++----
 src/services/patient_context_service.py  | 20 ++++++++++++--------
 3 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/src/group_chat.py b/src/group_chat.py
index a8ce08c..3339687 100644
--- a/src/group_chat.py
+++ b/src/group_chat.py
@@ -159,11 +159,7 @@ def _create_agent(agent_config: dict):
             - **Default to {facilitator}**: Always default to {facilitator}. If no other participant is specified, {facilitator} goes next.
             - **Use best judgment**: If the rules are unclear, use your best judgment to determine who should go next, for the natural flow of the conversation.
             
-        **IMPORTANT**: You must respond with valid JSON only. No other text, no code blocks, no explanations outside the JSON.
-        
-        **Output Format**: {{"verdict": "agent_name", "reasoning": "explanation"}}
-        
-        The verdict must be exactly one of: {", ".join([agent["name"] for agent in all_agents_config])}
+        Provide your reasoning and then the verdict. The verdict must be exactly one of: {", ".join([agent["name"] for agent in all_agents_config])}
 
         History:
         {{{{$history}}}}
@@ -194,17 +190,13 @@ def _create_agent(agent_config: dict):
         Commands addressed to "you" or "User" => "yes".
         If you are uncertain, return "yes".
         Ignore any debug/metadata like "PC_CTX" or JSON blobs when deciding.
-
-        **IMPORTANT**: You must respond with valid JSON only. No other text, no code blocks, no explanations outside the JSON.
-        
-        **Output Format**: {{"verdict": "yes_or_no", "reasoning": "explanation"}}
         
-        The verdict must be exactly "yes" or "no".
+        Provide your reasoning and then the verdict. The verdict must be exactly "yes" or "no".
 
         EXAMPLES:
-        - "User, can you confirm the correct patient ID?" => {{"verdict": "yes", "reasoning": "Asks user a direct question"}}
-        - "*ReportCreation*: Please compile the patient timeline." => {{"verdict": "no", "reasoning": "Command to specific agent ReportCreation"}}
-        - "If you have any further questions, feel free to ask." => {{"verdict": "yes", "reasoning": "Invites user to respond"}}
+        - "User, can you confirm the correct patient ID?" => verdict: "yes" (Asks user a direct question)
+        - "*ReportCreation*: Please compile the patient timeline." => verdict: "no" (Command to specific agent ReportCreation)
+        - "If you have any further questions, feel free to ask." => verdict: "yes" (Invites user to respond)
 
         History:
         {{{{$history}}}}
diff --git a/src/scenarios/default/config/agents.yaml b/src/scenarios/default/config/agents.yaml
index 06fc68f..35a1f70 100644
--- a/src/scenarios/default/config/agents.yaml
+++ b/src/scenarios/default/config/agents.yaml
@@ -12,11 +12,13 @@
       The following ai experts can help with answering queries about the user.
       {{aiAgents}}
       If during the course of the conversation, information is missing, think through who could best answer it, then ask that agent for the information.
-    3. **Allow user to confirm**: Ask the user for confirmation on the plan. If the plan changes, inform the user and ask for confirmation again. If the plan progresses as expected, you can skip this step.
-    4. **Explain the Purpose and Order**: At the beginning of the conversation, explain the plan and the expected order of participants. 
-    5. **Role Limitation**: Remember, your role is to moderate and facilitate, not to provide clinical recommendations or treatment plans.
+    3. **Smart Plan Reuse**: Before creating a new plan, check if you've done this type of request before in this conversation. For identical requests (like "tumor board review"), say "I'll use the same approach as before:" and proceed without confirmation. For similar requests, briefly explain any changes and ask for confirmation.
+    4. **Allow user to confirm**: Ask the user for confirmation on new plans. If the plan changes, inform the user and ask for confirmation again. If the plan progresses as expected, you can skip this step.
+    5. **Explain the Purpose and Order**: At the beginning of the conversation, explain the plan and the expected order of participants. 
+    6. **Auto-create documents**: For tumor board reviews and comprehensive assessments, automatically proceed to ReportCreation after all clinical agents complete their work. Don't ask permission - just say "*ReportCreation*, please compile the information into a Word document."
+    7. **Role Limitation**: Remember, your role is to moderate and facilitate, not to provide clinical recommendations or treatment plans.
       DON'T: Provide clinical recommendations or treatment plans.
-    6. **Conclude the plan**: 
+    8. **Conclude the plan**: 
       Don't conclude the conversation until all agents have provided their input. Instead, address the agents that have not yet provided their input.
       When all agents have provided their input, the plan has concluded, and the user's question has been answered, summarize the response in one or two sentences.
       Ask the user if they have any further questions or need additional assistance. 
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
index 4f3db6b..5748ada 100644
--- a/src/services/patient_context_service.py
+++ b/src/services/patient_context_service.py
@@ -112,22 +112,26 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
                         if message_patient_id == chat_ctx.patient_id:
                             # Found a system message for current patient, mark the start
                             found_current_patient_context = True
-                            continue
+                            # This is the start of the last session for this patient.
+                            # The messages collected so far are the correct ones. Stop.
+                            break
                         elif found_current_patient_context and message_patient_id != chat_ctx.patient_id:
                             # Found a system message for a different patient, stop collecting
                             break
                     except Exception as e:
                         logger.warning(f"🏥 SERVICE SUMMARY - Failed to parse system message JSON: {e}")
                         continue
-                else:
-                    # Regular message - include it if we're in current patient's context
-                    if found_current_patient_context:
-                        patient_specific_messages.append(message)
 
-            # If no patient context switch found, include recent messages (fallback)
+                # Only collect messages once we are in the context of the current patient.
+                if found_current_patient_context:
+                    patient_specific_messages.append(message)
+
+            # If no patient context switch found, it's a new patient.
             if not found_current_patient_context:
-                logger.info(f"🏥 SERVICE SUMMARY - No patient context switch found, using recent messages")
-                patient_specific_messages = list(chat_ctx.chat_history.messages[-10:])  # Last 10 messages
+                logger.info(
+                    f"🏥 SERVICE SUMMARY - No prior patient context found for {chat_ctx.patient_id}, treating as new.")
+                # Ensure message list is empty so we generate a "New patient" summary
+                patient_specific_messages = []
 
             # Create summary from patient-specific messages only
             if patient_specific_messages:

From 6d2c2dc8ce58ee00ab184bc32844cd693e943aef Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Wed, 27 Aug 2025 00:39:36 +0000
Subject: [PATCH 04/20] minor fix

---
 src/services/patient_context_analyzer.py | 11 +++++------
 src/services/patient_context_service.py  | 25 +++++-------------------
 2 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/src/services/patient_context_analyzer.py b/src/services/patient_context_analyzer.py
index 0f941a2..1618da2 100644
--- a/src/services/patient_context_analyzer.py
+++ b/src/services/patient_context_analyzer.py
@@ -171,17 +171,16 @@ async def analyze(
 
         # Add this method to the PatientContextAnalyzer class (around line 100)
 
-    async def summarize_text(self, text: str, max_tokens: int = 200) -> str:
+    async def summarize_text(self, text: str, patient_id: str, max_tokens: int = 200) -> str:
         """
         Summarize the given chat text into a few concise bullets focused on patient context.
         Returns a short plain-text summary.
         """
         system_prompt = (
-            "Summarize the following patient-specific conversation in 3-6 crisp bullets. "
-            "Focus ONLY on the current active patient (ignore any references to other patients). "
-            "Include: patient ID mentioned, key medical requests, agent actions completed, and immediate next steps. "
-            "Be specific and avoid generic phrases. Keep under 80 words total. "
-            "If multiple patients are mentioned, focus only on the most recent/active patient."
+            f"Summarize the following conversation in 3-6 crisp bullets focusing SPECIFICALLY on patient '{patient_id}'. "
+            f"Include: key medical requests for {patient_id}, agent actions completed for {patient_id}, and immediate next steps for {patient_id}. "
+            f"IGNORE any mentions of other patients - focus ONLY on {patient_id}. "
+            f"Be specific and avoid generic phrases. Keep under 80 words total."
         )
         chat = ChatHistory()
         chat.add_system_message(system_prompt)
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
index 5748ada..c260c50 100644
--- a/src/services/patient_context_service.py
+++ b/src/services/patient_context_service.py
@@ -96,7 +96,6 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
         if chat_ctx.patient_id:
             # Find messages since the last patient context switch to current patient
             patient_specific_messages = []
-            found_current_patient_context = False
 
             # Go through messages in reverse to find current patient's conversation segment
             for message in reversed(chat_ctx.chat_history.messages):
@@ -109,29 +108,15 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
                         payload = json.loads(json_content)
                         message_patient_id = payload.get("patient_id")
 
-                        if message_patient_id == chat_ctx.patient_id:
-                            # Found a system message for current patient, mark the start
-                            found_current_patient_context = True
-                            # This is the start of the last session for this patient.
-                            # The messages collected so far are the correct ones. Stop.
-                            break
-                        elif found_current_patient_context and message_patient_id != chat_ctx.patient_id:
-                            # Found a system message for a different patient, stop collecting
+                        # If we find a context message for a *different* patient,
+                        # that's the boundary of the current patient's conversation.
+                        if message_patient_id != chat_ctx.patient_id:
                             break
                     except Exception as e:
                         logger.warning(f"🏥 SERVICE SUMMARY - Failed to parse system message JSON: {e}")
                         continue
 
-                # Only collect messages once we are in the context of the current patient.
-                if found_current_patient_context:
-                    patient_specific_messages.append(message)
-
-            # If no patient context switch found, it's a new patient.
-            if not found_current_patient_context:
-                logger.info(
-                    f"🏥 SERVICE SUMMARY - No prior patient context found for {chat_ctx.patient_id}, treating as new.")
-                # Ensure message list is empty so we generate a "New patient" summary
-                patient_specific_messages = []
+                patient_specific_messages.append(message)
 
             # Create summary from patient-specific messages only
             if patient_specific_messages:
@@ -145,7 +130,7 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
                 if history_text.strip():
                     try:
                         # LLM still does the summarization, but with patient-specific input
-                        chat_summary = await self.analyzer.summarize_text(history_text)
+                        chat_summary = await self.analyzer.summarize_text(history_text, chat_ctx.patient_id)
                         logger.info(
                             f"🏥 SERVICE SUMMARY - Generated patient-specific summary for {chat_ctx.patient_id}: {len(chat_summary)} chars")
                     except Exception as e:

From 34368117857173a19f8e32b5863a6764ee23305c Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Wed, 27 Aug 2025 16:20:13 +0000
Subject: [PATCH 05/20] commit updated

---
 src/bots/assistant_bot.py | 28 ----------------------------
 src/group_chat.py         |  2 +-
 2 files changed, 1 insertion(+), 29 deletions(-)

diff --git a/src/bots/assistant_bot.py b/src/bots/assistant_bot.py
index d9ce036..f3e16f5 100644
--- a/src/bots/assistant_bot.py
+++ b/src/bots/assistant_bot.py
@@ -341,31 +341,3 @@ def _append_pc_ctx(self, base: str, chat_ctx: ChatContext) -> str:
             logger.warning(f"📋 PC_CTX APPEND - JSON decode error: {e}, using raw payload")
             # Fallback to raw if JSON is malformed, but keep it simple
             return f"{base}\n\n---\n*PT_CTX (raw):* `{json_payload}`"
-
-    def _append_pc_ctx_old(self, base: str, chat_ctx: ChatContext) -> str:
-        logger.info(f"📋 PC_CTX APPEND START - Base message length: {len(base)}")
-
-        # Avoid double-tagging
-        if "\nPC_CTX" in base:
-            logger.info(f"📋 PC_CTX APPEND - Already has PC_CTX, skipping")
-            return base
-
-        # Get the actual injected system patient context JSON
-        json_payload = self._get_system_patient_context_json(chat_ctx)
-        logger.info(f"📋 PC_CTX APPEND - Retrieved JSON payload: {json_payload}")
-
-        if not json_payload:
-            logger.info(f"📋 PC_CTX APPEND - No JSON payload found, adding empty marker")
-            return base + "\nPC_CTX <em>(empty)</em>"
-
-        # Pretty-print the actual system JSON
-        try:
-            obj = json.loads(json_payload)
-            pretty = json.dumps(obj, indent=2)
-            result = f"{base}\nPC_CTX\n<pre><code class='language-json'>{pretty}</code></pre>"
-            logger.info(f"📋 PC_CTX APPEND - Successfully formatted JSON, final length: {len(result)}")
-            return result
-        except json.JSONDecodeError as e:
-            logger.warning(f"📋 PC_CTX APPEND - JSON decode error: {e}, using raw payload")
-            # Fallback to raw if JSON is malformed
-            return f"{base}\nPC_CTX {json_payload}"
diff --git a/src/group_chat.py b/src/group_chat.py
index 3339687..7059106 100644
--- a/src/group_chat.py
+++ b/src/group_chat.py
@@ -248,7 +248,7 @@ def evaluate_selection(result):
             result_parser=evaluate_termination,
             agent_variable_name="agents",
             history_variable_name="history",
-            maximum_iterations=20,
+            maximum_iterations=30,
             # Termination only looks at the last message
             history_reducer=ChatHistoryTruncationReducer(
                 target_count=1, auto_reduce=True

From 8e62471f27384cdf9ccd1392e0161d821f46df38 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Wed, 27 Aug 2025 18:18:36 +0000
Subject: [PATCH 06/20] cleaned up a few log messges

---
 src/bots/assistant_bot.py                |  32 +---
 src/group_chat.py                        |  10 +-
 src/services/patient_context_analyzer.py | 215 ++++++++++-------------
 src/services/patient_context_service.py  | 117 ++----------
 4 files changed, 123 insertions(+), 251 deletions(-)

diff --git a/src/bots/assistant_bot.py b/src/bots/assistant_bot.py
index f3e16f5..e197a3b 100644
--- a/src/bots/assistant_bot.py
+++ b/src/bots/assistant_bot.py
@@ -115,23 +115,11 @@ async def on_message_activity(self, turn_context: TurnContext) -> None:
             return
 
         # Decide & apply patient context BEFORE building group chat
-        # decision = await self.patient_context_service.decide_and_apply(raw_user_text, chat_ctx)
-        # Decide & apply patient context BEFORE building group chat
-        # Decide & apply patient context BEFORE building group chat
-        logger.info(f"🤖 BOT CONTEXT START - About to call patient context service")
-        logger.info(f"🤖 BOT CONTEXT - Conversation: {conversation_id} | Input: '{raw_user_text}'")
-        logger.info(f"🤖 BOT CONTEXT - Current patient before service: {getattr(chat_ctx, 'patient_id', None)}")
-        logger.info(
-            f"🤖 BOT CONTEXT - Known patients before service: {list(getattr(chat_ctx, 'patient_contexts', {}).keys())}")
+        logger.info(f"Processing patient context for conversation: {conversation_id}")
 
         decision, timing = await self.patient_context_service.decide_and_apply(raw_user_text, chat_ctx)
 
-        logger.info(f"🤖 BOT CONTEXT COMPLETE - Decision: {decision} | Timing: {timing}")
-        logger.info(f"🤖 BOT CONTEXT - Current patient after service: {getattr(chat_ctx, 'patient_id', None)}")
-        logger.info(
-            f"🤖 BOT CONTEXT - Known patients after service: {list(getattr(chat_ctx, 'patient_contexts', {}).keys())}")
-        logger.info(f"🤖 BOT CONTEXT - Total chat messages: {len(chat_ctx.chat_history.messages)}")
-        logger.info(f"Patient context decision: {decision} | Input: '{raw_user_text}' | Timing: {timing}")
+        logger.info(f"Patient context decision: {decision} | Patient: {chat_ctx.patient_id} | Timing: {timing}")
 
         agents = self.all_agents
         if len(chat_ctx.chat_history.messages) == 0:
@@ -149,7 +137,7 @@ async def is_part_of_conversation(agent):
                     await context.send_activity(typing_activity)
                     return True
                 except Exception as e:
-                    logger.info(f"Failed to send typing activity to {agent['name']}: {e}")
+                    logger.debug(f"Failed to send typing activity to {agent['name']}: {e}")
                     return False
 
             part_of_conversation = await asyncio.gather(*(is_part_of_conversation(agent) for agent in self.all_agents))
@@ -158,7 +146,6 @@ async def is_part_of_conversation(agent):
         (chat, chat_ctx) = create_group_chat(self.app_context, chat_ctx, participants=agents)
 
         # Add user message after context decision (no extra tagging here)
-        # chat_ctx.chat_history.add_user_message(f"{self.name}: {raw_user_text}")
         user_with_ctx = self._append_pc_ctx(f"{self.name}: {raw_user_text}", chat_ctx)
         chat_ctx.chat_history.add_user_message(user_with_ctx)
 
@@ -288,19 +275,16 @@ def _get_system_patient_context_json(self, chat_ctx: ChatContext) -> str | None:
         return None
 
     def _append_pc_ctx(self, base: str, chat_ctx: ChatContext) -> str:
-        logger.info(f"📋 PC_CTX APPEND START - Base message length: {len(base)}")
+        """Append patient context information to the message for display."""
 
         # Avoid double-tagging
         if "\nPC_CTX" in base or "\n*PT_CTX:*" in base:
-            logger.info(f"📋 PC_CTX APPEND - Already has PC_CTX, skipping")
             return base
 
         # Get the actual injected system patient context JSON
         json_payload = self._get_system_patient_context_json(chat_ctx)
-        logger.info(f"📋 PC_CTX APPEND - Retrieved JSON payload: {json_payload}")
 
         if not json_payload:
-            logger.info(f"📋 PC_CTX APPEND - No JSON payload found, not appending context.")
             return base
 
         # Format the JSON payload into a simple, readable Markdown string
@@ -330,14 +314,12 @@ def _append_pc_ctx(self, base: str, chat_ctx: ChatContext) -> str:
             # Only add the block if there's something to show besides the header
             if len(lines) > 2:
                 formatted_text = "\n".join(lines)
-                result = f"{base}{formatted_text}"
-                logger.info(f"📋 PC_CTX APPEND - Successfully formatted as text, final length: {len(result)}")
-                return result
+                logger.debug(f"Appended patient context to message | Patient: {obj.get('patient_id')}")
+                return f"{base}{formatted_text}"
             else:
-                logger.info(f"📋 PC_CTX APPEND - No relevant data to display.")
                 return base
 
         except json.JSONDecodeError as e:
-            logger.warning(f"📋 PC_CTX APPEND - JSON decode error: {e}, using raw payload")
+            logger.warning(f"Failed to parse patient context JSON: {e}")
             # Fallback to raw if JSON is malformed, but keep it simple
             return f"{base}\n\n---\n*PT_CTX (raw):* `{json_payload}`"
diff --git a/src/group_chat.py b/src/group_chat.py
index 7059106..0ef1956 100644
--- a/src/group_chat.py
+++ b/src/group_chat.py
@@ -206,26 +206,24 @@ def _create_agent(agent_config: dict):
     agents = [_create_agent(agent) for agent in all_agents_config]
 
     def evaluate_termination(result):
-        logger.info(f"Termination function result: {result}")
         try:
             rule = ChatRule.model_validate_json(str(result.value[0]))
             should_terminate = rule.verdict == "yes"
-            logger.info(f"Termination function parsed successfully: {should_terminate}")
+            logger.debug(f"Termination decision: {should_terminate}")
             return should_terminate
         except Exception as e:
-            logger.error(f"Termination function parsing error: {e}. Raw result: {result}")
+            logger.error(f"Termination function error: {e}")
             return False  # Fallback to continue conversation
 
     def evaluate_selection(result):
-        logger.info(f"Selection function result: {result}")
         try:
             rule = ChatRule.model_validate_json(str(result.value[0]))
             selected_agent = rule.verdict if rule.verdict in [agent["name"]
                                                               for agent in all_agents_config] else facilitator
-            logger.info(f"Selection function parsed successfully: {selected_agent}")
+            logger.debug(f"Selected agent: {selected_agent}")
             return selected_agent
         except Exception as e:
-            logger.error(f"Selection function parsing error: {e}. Raw result: {result}")
+            logger.error(f"Selection function error: {e}")
             return facilitator  # Fallback to facilitator
 
     chat = AgentGroupChat(
diff --git a/src/services/patient_context_analyzer.py b/src/services/patient_context_analyzer.py
index 1618da2..c51c6c1 100644
--- a/src/services/patient_context_analyzer.py
+++ b/src/services/patient_context_analyzer.py
@@ -34,7 +34,7 @@ def __init__(
             raise ValueError("No deployment name for patient context analyzer.")
         self.api_version = api_version or os.getenv("AZURE_OPENAI_API_VERSION") or "2024-10-21"
 
-        logger.info(f"🔧 ANALYZER INIT - Deployment: {self.deployment_name} | API Version: {self.api_version}")
+        logger.info(f"PatientContextAnalyzer initialized with deployment: {self.deployment_name}")
 
         self._kernel = Kernel()
         self._kernel.add_service(
@@ -45,158 +45,133 @@ def __init__(
                 ad_token_provider=token_provider,
             )
         )
-        logger.info(f"🔧 ANALYZER INIT COMPLETE - Kernel and service configured")
 
     async def analyze(
-        self,
-        user_text: str,
-        prior_patient_id: Optional[str],
-        known_patient_ids: list[str],
+        self, user_text: str, prior_patient_id: Optional[str], known_patient_ids: list[str]
     ) -> tuple[AnalyzerAction, Optional[str], float]:
-        """
-        Returns (action, patient_id, duration_sec)
-        patient_id is only non-null for ACTIVATE_NEW | SWITCH_EXISTING | UNCHANGED
-        """
         start_time = time.time()
-        logger.info(f"🔍 ANALYZER START - Input: '{user_text}' | Prior: {prior_patient_id} | Known: {known_patient_ids}")
 
-        if not user_text:
+        logger.debug(f"Analyzing user input for patient context | Prior: {prior_patient_id}")
+
+        if not user_text or not user_text.strip():
             duration = time.time() - start_time
-            logger.info(f"🔍 ANALYZER RESULT - Empty input | Action: NONE | Duration: {duration:.4f}s")
+            logger.debug(f"Empty input received | Duration: {duration:.4f}s")
             return "NONE", None, duration
 
+        # Existing system prompt and LLM call logic...
         system_prompt = f"""
-You manage patient context for a medical chat application.
-
-Inputs:
-- prior_patient_id: {prior_patient_id if prior_patient_id else "null"}
-- known_patient_ids: {known_patient_ids}
-
-Rules:
-1. If user clearly asks to clear/reset/remove the patient context -> action "CLEAR", patient_id null.
-2. If user mentions a patient ID anywhere in their message:
-   - Extract the most specific patient identifier (e.g., "patient_4", "patient_123", etc.)
-   - If identical to prior_patient_id -> "UNCHANGED"
-   - If in known_patient_ids and different -> "SWITCH_EXISTING"
-   - If not in known_patient_ids -> "ACTIVATE_NEW"
-3. Normalize variants like "patient 6" or "patient id patient_6" to "patient_6". Be tolerant of typos like "patiend id".
-4. Ignore vague references without an ID.
-5. Output STRICT JSON ONLY. No extra text, no code fences:
-{{
-  "action": "<ONE OF: NONE | CLEAR | ACTIVATE_NEW | SWITCH_EXISTING | UNCHANGED>",
-  "patient_id": "<extracted_id_or_null>"
-}}
-
-Examples:
-- "switch to patient id patient_5" -> {{"action": "ACTIVATE_NEW", "patient_id": "patient_5"}}
-- "switch to patient with patient id patient_4" -> {{"action": "ACTIVATE_NEW", "patient_id": "patient_4"}}
-- "switch to patient 6" -> {{"action": "ACTIVATE_NEW", "patient_id": "patient_6"}}
-- "clear patient context" -> {{"action": "CLEAR", "patient_id": null}}
-""".strip()
-
-        # Build chat history per current SK API
-        chat = ChatHistory()
-        # chat.add_message(AuthorRole.SYSTEM, system_prompt)
-        # chat.add_message(AuthorRole.USER, user_text)
-
-        chat.add_system_message(system_prompt)
-        chat.add_user_message(user_text)
-
-        logger.info(f"🔍 ANALYZER LLM CALL - Using chat_history with system prompt length: {len(system_prompt)}")
+You are a patient context analyzer for healthcare conversations.
+
+TASK: Analyze user input and decide the appropriate patient context action.
+
+ACTIONS:
+- NONE: No patient context needed (general questions, greetings, system commands)
+- CLEAR: User wants to clear/reset patient context
+- ACTIVATE_NEW: User mentions a new patient ID not in known_patient_ids
+- SWITCH_EXISTING: User wants to switch to a different known patient
+- UNCHANGED: Continue with current patient context
+
+CURRENT STATE:
+- Prior patient ID: {prior_patient_id}
+- Known patient IDs: {known_patient_ids}
+
+RULES:
+1. Extract patient_id ONLY if action is ACTIVATE_NEW or SWITCH_EXISTING
+2. Patient IDs are typically "patient_X" format or explicit medical record numbers
+3. For CLEAR/NONE/UNCHANGED, set patient_id to null
+4. Prioritize explicit patient mentions over implicit context
+
+RESPONSE FORMAT (JSON only):
+{{"action": "ACTION_NAME", "patient_id": "extracted_id_or_null", "reasoning": "brief_explanation"}}
+
+USER INPUT: {user_text}
+"""
 
         try:
+            chat_history = ChatHistory()
+            chat_history.add_system_message(system_prompt)
+            chat_history.add_user_message(user_text)
+
             svc = self._kernel.get_service("default")
-            logger.info(f"🔍 ANALYZER LLM CALL - Service retrieved: {type(svc).__name__}")
+            llm_start = time.time()
 
-            settings = PromptExecutionSettings(
-                service_id="default",
-                temperature=0.0,
-                top_p=0.0,
-                max_tokens=200,
-                # If model supports it, enforce JSON mode:
-                response_format={"type": "json_object"},
+            results = await svc.get_chat_message_contents(
+                chat_history=chat_history,
+                settings=PromptExecutionSettings(
+                    max_tokens=150,
+                    temperature=0.1,
+                    response_format={"type": "json_object"}
+                ),
             )
 
-            llm_start = time.time()
-            result = await svc.get_chat_message_content(chat_history=chat, settings=settings)
             llm_duration = time.time() - llm_start
-            logger.info(f"🔍 ANALYZER LLM CALL COMPLETE - LLM call took: {llm_duration:.4f}s")
 
-            # Normalize result to a single string
-            if isinstance(result, list):
-                content = "".join([(getattr(c, "content", "") or "") for c in result])
-            else:
-                content = getattr(result, "content", "") or ""
-
-            content = content.strip()
-            logger.info(f"🔍 ANALYZER LLM RESPONSE - Raw content: '{content}'")
+            if not results:
+                raise ValueError("No LLM response received")
 
+            content = results[0].content
             if not content:
+                logger.warning("Empty LLM response content")
                 duration = time.time() - start_time
-                logger.warning("🔍 ANALYZER LLM RESPONSE - Empty content")
                 return "NONE", None, duration
 
-            # Strip accidental code fences
-            if content.startswith("```"):
-                content = content.strip("`")
-                if "\n" in content:
-                    content = content.split("\n", 1)[1].strip()
-
             try:
-                data = json.loads(content)
-            except json.JSONDecodeError as je:
-                duration = time.time() - start_time
-                logger.error(f"🔍 ANALYZER JSON ERROR - Failed to parse JSON: {je} | Content: '{content}'")
-                return "NONE", None, duration
+                parsed = json.loads(content)
+                action = parsed.get("action", "NONE")
+                pid = parsed.get("patient_id")
 
-            action = (data.get("action") or "").strip().upper()
-            pid = data.get("patient_id")
-            if pid is not None:
-                pid = str(pid).strip()
+                # Validation
+                valid_actions = ["NONE", "CLEAR", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED"]
+                if action not in valid_actions:
+                    logger.error(f"Invalid action from LLM: {action}")
+                    action = "NONE"
+                    pid = None
 
-            if action not in {"NONE", "CLEAR", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED"}:
                 duration = time.time() - start_time
-                logger.error(f"🔍 ANALYZER VALIDATION ERROR - Invalid action: {action}")
-                return "NONE", None, duration
+                logger.info(
+                    f"Patient context analysis complete | Action: {action} | Patient: {pid} | Duration: {duration:.4f}s")
+                return action, pid, duration
 
-            duration = time.time() - start_time
-            logger.info(f"🔍 ANALYZER RESULT SUCCESS - Action: {action} | Patient ID: {pid} | Duration: {duration:.4f}s")
-            return action, pid, duration
+            except json.JSONDecodeError as je:
+                logger.error(f"Failed to parse LLM JSON response: {je}")
+                duration = time.time() - start_time
+                return "NONE", None, duration
 
         except Exception as e:
             duration = time.time() - start_time
-            logger.error(
-                f"🔍 ANALYZER ERROR - Exception: {type(e).__name__}: {e} | Duration: {duration:.4f}s", exc_info=True)
+            logger.error(f"Patient context analysis failed: {e} | Duration: {duration:.4f}s")
             return "NONE", None, duration
 
-        # Add this method to the PatientContextAnalyzer class (around line 100)
-
-    async def summarize_text(self, text: str, patient_id: str, max_tokens: int = 200) -> str:
-        """
-        Summarize the given chat text into a few concise bullets focused on patient context.
-        Returns a short plain-text summary.
-        """
-        system_prompt = (
-            f"Summarize the following conversation in 3-6 crisp bullets focusing SPECIFICALLY on patient '{patient_id}'. "
-            f"Include: key medical requests for {patient_id}, agent actions completed for {patient_id}, and immediate next steps for {patient_id}. "
-            f"IGNORE any mentions of other patients - focus ONLY on {patient_id}. "
-            f"Be specific and avoid generic phrases. Keep under 80 words total."
-        )
-        chat = ChatHistory()
-        chat.add_system_message(system_prompt)
-        chat.add_user_message(text[:8000])  # cap input for safety
-
+    async def summarize_text(self, text: str, patient_id: str) -> str:
+        """Generate a patient-specific summary of conversation text."""
         try:
+            system_prompt = f"""
+You are summarizing a healthcare conversation for patient {patient_id}.
+
+Create a concise summary focusing on:
+- Key medical information discussed
+- Treatment decisions or recommendations
+- Important patient updates
+- Relevant test results or findings
+
+Keep the summary under 200 words and patient-focused.
+
+TEXT TO SUMMARIZE:
+{text}
+"""
+
+            chat_history = ChatHistory()
+            chat_history.add_system_message(system_prompt)
+            chat_history.add_user_message("Please summarize this conversation.")
+
             svc = self._kernel.get_service("default")
-            settings = PromptExecutionSettings(
-                service_id="default",
-                temperature=0.0,
-                top_p=0.0,
-                max_tokens=max_tokens,
+            results = await svc.get_chat_message_contents(
+                chat_history=chat_history,
+                settings=PromptExecutionSettings(max_tokens=300, temperature=0.3),
             )
-            result = await svc.get_chat_message_content(chat_history=chat, settings=settings)
-            content = getattr(result, "content", "") or ""
-            return content.strip()
+
+            return results[0].content if results and results[0].content else f"Summary unavailable for {patient_id}"
+
         except Exception as e:
-            logger.warning(f"🔍 ANALYZER SUMMARY ERROR - {e}")
-            return ""
+            logger.warning(f"Failed to generate summary: {e}")
+            return f"Summary generation failed for {patient_id}"
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
index c260c50..76b098b 100644
--- a/src/services/patient_context_service.py
+++ b/src/services/patient_context_service.py
@@ -3,7 +3,6 @@
 import time
 from typing import Literal, TypedDict
 
-
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents import AuthorRole
 
@@ -34,23 +33,12 @@ def _estimate_tokens(self, text: str) -> int:
 
     def __init__(self, analyzer: PatientContextAnalyzer):
         self.analyzer = analyzer
-        logger.info(f"🏥 SERVICE INIT - PatientContextService initialized with analyzer: {type(analyzer).__name__}")
+        logger.info(f"PatientContextService initialized")
 
     async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple[Decision, TimingInfo]:
         service_start_time = time.time()
 
-        logger.info(f"🏥 SERVICE START - Input: '{user_text}' | Conversation: {chat_ctx.conversation_id}")
-        logger.info(
-            f"🏥 SERVICE START - Current Patient: {chat_ctx.patient_id} | Known Patients: {list(chat_ctx.patient_contexts.keys())}")
-        logger.info(f"🏥 SERVICE START - Chat history messages: {len(chat_ctx.chat_history.messages)}")
-
-        # Log current system messages
-        system_messages = [m for m in chat_ctx.chat_history.messages if m.role == AuthorRole.SYSTEM]
-        logger.info(f"🏥 SERVICE START - Current system messages: {len(system_messages)}")
-        for i, msg in enumerate(system_messages):
-            content = getattr(msg, 'content', '')
-            if isinstance(content, str) and content.startswith(PATIENT_CONTEXT_PREFIX):
-                logger.info(f"🏥 SERVICE START - System message {i}: {content}")
+        logger.info(f"Patient context decision for '{user_text}' | Current patient: {chat_ctx.patient_id}")
 
         action, pid, analyzer_duration = await self.analyzer.analyze(
             user_text=user_text,
@@ -58,35 +46,23 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
             known_patient_ids=list(chat_ctx.patient_contexts.keys()),
         )
 
-        logger.info(
-            f"🏥 SERVICE ANALYZER RESULT - Action: {action} | Patient ID: {pid} | Analyzer Duration: {analyzer_duration:.4f}s")
+        logger.info(f"Analyzer result: {action} | Patient ID: {pid}")
 
         # Store original state for comparison
         original_patient_id = chat_ctx.patient_id
-        original_patient_contexts = dict(chat_ctx.patient_contexts)
 
         decision: Decision = "NONE"
         if action == "CLEAR":
-            logger.info(f"🏥 SERVICE CLEARING - Clearing patient context from: {chat_ctx.patient_id}")
             self._clear(chat_ctx)
             decision = "CLEAR"
-            logger.info(f"🏥 SERVICE CLEARED - Patient context cleared, now: {chat_ctx.patient_id}")
         elif action in ("ACTIVATE_NEW", "SWITCH_EXISTING"):
-            logger.info(f"🏥 SERVICE ACTIVATING - Attempting to activate patient: {pid}")
             decision = self._activate_patient(pid, chat_ctx) if pid else "NONE"
-            logger.info(f"🏥 SERVICE ACTIVATED - Result decision: {decision} | New patient: {chat_ctx.patient_id}")
         elif action == "UNCHANGED":
-            logger.info(f"🏥 SERVICE UNCHANGED - Patient context unchanged, keeping: {chat_ctx.patient_id}")
             decision = "UNCHANGED"
 
-        # Log state changes
+        # Log state changes only if they occurred
         if original_patient_id != chat_ctx.patient_id:
-            logger.info(
-                f"🏥 SERVICE STATE CHANGE - Patient ID changed from '{original_patient_id}' to '{chat_ctx.patient_id}'")
-
-        if original_patient_contexts != chat_ctx.patient_contexts:
-            logger.info(
-                f"🏥 SERVICE STATE CHANGE - Patient contexts changed from {list(original_patient_contexts.keys())} to {list(chat_ctx.patient_contexts.keys())}")
+            logger.info(f"Patient context changed: '{original_patient_id}' -> '{chat_ctx.patient_id}'")
 
         service_duration = time.time() - service_start_time
         timing: TimingInfo = {"analyzer": round(analyzer_duration, 4), "service": round(service_duration, 4)}
@@ -113,7 +89,7 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
                         if message_patient_id != chat_ctx.patient_id:
                             break
                     except Exception as e:
-                        logger.warning(f"🏥 SERVICE SUMMARY - Failed to parse system message JSON: {e}")
+                        logger.warning(f"Failed to parse system message JSON: {e}")
                         continue
 
                 patient_specific_messages.append(message)
@@ -131,98 +107,52 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
                     try:
                         # LLM still does the summarization, but with patient-specific input
                         chat_summary = await self.analyzer.summarize_text(history_text, chat_ctx.patient_id)
-                        logger.info(
-                            f"🏥 SERVICE SUMMARY - Generated patient-specific summary for {chat_ctx.patient_id}: {len(chat_summary)} chars")
+                        logger.debug(f"Generated summary for {chat_ctx.patient_id}")
                     except Exception as e:
-                        logger.warning(f"🏥 SERVICE SUMMARY - Failed to summarize: {e}")
+                        logger.warning(f"Failed to summarize: {e}")
                         chat_summary = f"Chat summary for {chat_ctx.patient_id} unavailable"
-                else:
-                    logger.info(f"🏥 SERVICE SUMMARY - No relevant text found for patient {chat_ctx.patient_id}")
-                    chat_summary = f"No recent activity for {chat_ctx.patient_id}"
-            else:
-                logger.info(f"🏥 SERVICE SUMMARY - No messages found for patient {chat_ctx.patient_id}")
-                chat_summary = f"New patient context for {chat_ctx.patient_id}"
 
         token_counts = {
             "history_estimate": self._estimate_tokens(chat_summary) if chat_summary else 0,
             "summary_estimate": self._estimate_tokens(chat_summary) if chat_summary else 0,
         }
 
-        logger.info(f"🏥 SERVICE SYSTEM MESSAGE - Updating system message for decision: {decision}")
-
         if decision == "CLEAR":
             self._remove_system_message(chat_ctx)
-            logger.info(f"🏥 SERVICE SYSTEM MESSAGE - Removed system message for CLEAR decision")
         else:
             self._ensure_system_message(chat_ctx, timing, chat_summary, token_counts)
-            logger.info(f"🏥 SERVICE SYSTEM MESSAGE - Ensured system message for patient: {chat_ctx.patient_id}")
-
-        # Log final state
-        final_system_messages = [m for m in chat_ctx.chat_history.messages if m.role == AuthorRole.SYSTEM]
-        logger.info(f"🏥 SERVICE FINAL - System messages after update: {len(final_system_messages)}")
-        for i, msg in enumerate(final_system_messages):
-            content = getattr(msg, 'content', '')
-            if isinstance(content, str) and content.startswith(PATIENT_CONTEXT_PREFIX):
-                logger.info(f"🏥 SERVICE FINAL - System message {i}: {content}")
-
-        logger.info(
-            f"🏥 SERVICE COMPLETE - Final Decision: {decision} | Final Patient: {chat_ctx.patient_id} | Timing: {timing}")
+
+        logger.info(f"Patient context decision complete: {decision} | Patient: {chat_ctx.patient_id}")
         return decision, timing
 
     # -------- Internal helpers --------
 
     def _activate_patient(self, patient_id: str, chat_ctx: ChatContext) -> Decision:
-        logger.info(
-            f"🏥 SERVICE ACTIVATE START - Checking patient_id: '{patient_id}' | Current: '{chat_ctx.patient_id}'")
-
         if not patient_id:
-            logger.info(f"🏥 SERVICE ACTIVATE - No patient ID provided, returning NONE")
             return "NONE"
 
         # Same patient
         if patient_id == chat_ctx.patient_id:
-            logger.info(f"🏥 SERVICE ACTIVATE - Same patient '{patient_id}', returning UNCHANGED")
             return "UNCHANGED"
 
         # Switch to existing
         if patient_id in chat_ctx.patient_contexts:
-            logger.info(f"🏥 SERVICE ACTIVATE - Switching to existing patient: '{patient_id}'")
             chat_ctx.patient_id = patient_id
-            logger.info(f"🏥 SERVICE ACTIVATE - Successfully switched to existing patient: '{patient_id}'")
+            logger.info(f"Switched to existing patient: {patient_id}")
             return "SWITCH_EXISTING"
 
         # New blank patient context
-        logger.info(f"🏥 SERVICE ACTIVATE - Creating new patient context for: '{patient_id}'")
         chat_ctx.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
         chat_ctx.patient_id = patient_id
-        logger.info(f"🏥 SERVICE ACTIVATE - Successfully created new patient context for: '{patient_id}'")
-        logger.info(f"🏥 SERVICE ACTIVATE - All patient contexts now: {list(chat_ctx.patient_contexts.keys())}")
+        logger.info(f"Created new patient context: {patient_id}")
         return "NEW_BLANK"
 
     def _clear(self, chat_ctx: ChatContext):
-        logger.info(f"🏥 SERVICE CLEAR - Clearing patient_id from: '{chat_ctx.patient_id}' to None")
+        logger.info(f"Clearing patient context: {chat_ctx.patient_id}")
         chat_ctx.patient_id = None  # retain historical contexts for potential reuse
-        logger.info(
-            f"🏥 SERVICE CLEAR - Patient ID cleared, contexts retained: {list(chat_ctx.patient_contexts.keys())}")
 
     def _remove_system_message(self, chat_ctx: ChatContext):
         original_count = len(chat_ctx.chat_history.messages)
-        original_system_count = len([m for m in chat_ctx.chat_history.messages if m.role == AuthorRole.SYSTEM])
-
-        logger.info(
-            f"🏥 SERVICE REMOVE MSG START - Total messages: {original_count} | System messages: {original_system_count}")
-
-        # Log what we're about to remove
-        to_remove = []
-        for i, m in enumerate(chat_ctx.chat_history.messages):
-            if (m.role == AuthorRole.SYSTEM and
-                isinstance(m.content, str) and
-                    m.content.startswith(PATIENT_CONTEXT_PREFIX)):
-                to_remove.append((i, m.content))
-
-        logger.info(f"🏥 SERVICE REMOVE MSG - Found {len(to_remove)} PATIENT_CONTEXT messages to remove")
-        for i, content in to_remove:
-            logger.info(f"🏥 SERVICE REMOVE MSG - Removing message {i}: {content}")
 
         chat_ctx.chat_history.messages = [
             m
@@ -234,23 +164,16 @@ def _remove_system_message(self, chat_ctx: ChatContext):
             )
         ]  # type: ignore
 
-        new_count = len(chat_ctx.chat_history.messages)
-        new_system_count = len([m for m in chat_ctx.chat_history.messages if m.role == AuthorRole.SYSTEM])
-        removed_count = original_count - new_count
-
-        logger.info(
-            f"🏥 SERVICE REMOVE MSG COMPLETE - Removed {removed_count} messages | Total: {original_count}->{new_count} | System: {original_system_count}->{new_system_count}")
+        removed_count = original_count - len(chat_ctx.chat_history.messages)
+        if removed_count > 0:
+            logger.debug(f"Removed {removed_count} patient context system messages")
 
     def _ensure_system_message(self, chat_ctx: ChatContext, timing: TimingInfo,
                                chat_summary: str | None = None,
                                token_counts: dict | None = None):
-        logger.info(
-            f"🏥 SERVICE ENSURE MSG START - Patient: '{chat_ctx.patient_id}' | Conversation: '{chat_ctx.conversation_id}'")
-
         self._remove_system_message(chat_ctx)
 
         if not chat_ctx.patient_id:
-            logger.info(f"🏥 SERVICE ENSURE MSG - No patient ID, not adding system message")
             return
 
         # Simplified payload without agent tracking and chat excerpt
@@ -265,13 +188,7 @@ def _ensure_system_message(self, chat_ctx: ChatContext, timing: TimingInfo,
 
         line = f"{PATIENT_CONTEXT_PREFIX} {json.dumps(payload, separators=(',', ':'))}"
 
-        logger.info(f"🏥 SERVICE ENSURE MSG - Creating system message: {line}")
-
         system_message = ChatMessageContent(role=AuthorRole.SYSTEM, content=line)
         chat_ctx.chat_history.messages.insert(0, system_message)
 
-        total_messages = len(chat_ctx.chat_history.messages)
-        system_messages = len([m for m in chat_ctx.chat_history.messages if m.role == AuthorRole.SYSTEM])
-
-        logger.info(
-            f"🏥 SERVICE ENSURE MSG COMPLETE - System message added at position 0 | Total messages: {total_messages} | System messages: {system_messages}")
+        logger.debug(f"Added patient context system message for {chat_ctx.patient_id}")

From 93119bca725653495d280708d28857ede0ec991a Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Fri, 29 Aug 2025 03:30:17 +0000
Subject: [PATCH 07/20] minor updates - history messages

---
 src/scenarios/default/config/agents.yaml |  2 +-
 src/services/patient_context_analyzer.py | 17 +++++----
 src/services/patient_context_service.py  | 44 +++++++++++++++++++-----
 3 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/src/scenarios/default/config/agents.yaml b/src/scenarios/default/config/agents.yaml
index 35a1f70..ff3c41b 100644
--- a/src/scenarios/default/config/agents.yaml
+++ b/src/scenarios/default/config/agents.yaml
@@ -36,7 +36,7 @@
   facilitator: true
   description: |
     Your role is to moderate the discussion, present the order of participants, and facilitate the conversation.
-  
+    
 - name: PatientHistory
   instructions: |
     You are an AI agent tasked with loading and presenting patient data. Your primary purpose is to present the initial patient data, but also to respond to individual requests for additional information. 
diff --git a/src/services/patient_context_analyzer.py b/src/services/patient_context_analyzer.py
index c51c6c1..3f5aac1 100644
--- a/src/services/patient_context_analyzer.py
+++ b/src/services/patient_context_analyzer.py
@@ -146,17 +146,16 @@ async def summarize_text(self, text: str, patient_id: str) -> str:
         """Generate a patient-specific summary of conversation text."""
         try:
             system_prompt = f"""
-You are summarizing a healthcare conversation for patient {patient_id}.
+You are a clinical summarization assistant. Your ONLY task is to summarize the provided text for patient '{patient_id}'.
 
-Create a concise summary focusing on:
-- Key medical information discussed
-- Treatment decisions or recommendations
-- Important patient updates
-- Relevant test results or findings
+**CRITICAL RULES:**
+1.  **FOCUS EXCLUSIVELY ON `{patient_id}`**: Ignore all information, notes, or mentions related to any other patient.
+2.  **DO NOT BLEND PATIENTS**: If the text mentions other patients (e.g., 'patient_4', 'patient_12'), you must NOT include them in the summary.
+3.  **BE CONCISE**: Create a short, bulleted list of 3-5 key points.
+4.  **NO FABRICATION**: If there is no relevant information for `{patient_id}` in the text, respond with "No specific information was discussed for patient {patient_id} in this segment."
 
-Keep the summary under 200 words and patient-focused.
-
-TEXT TO SUMMARIZE:
+Summarize the following text:
+---
 {text}
 """
 
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
index 76b098b..7ede001 100644
--- a/src/services/patient_context_service.py
+++ b/src/services/patient_context_service.py
@@ -152,21 +152,47 @@ def _clear(self, chat_ctx: ChatContext):
         chat_ctx.patient_id = None  # retain historical contexts for potential reuse
 
     def _remove_system_message(self, chat_ctx: ChatContext):
-        original_count = len(chat_ctx.chat_history.messages)
+        """
+        Removes only the system message(s) for the *currently active* patient.
+        This preserves the system messages from other patients, which act as crucial
+        boundaries for the conversation history slicing logic.
+        """
+        if not chat_ctx.patient_id:
+            # If there's no active patient, there's nothing to remove.
+            return
+
+        current_patient_id = chat_ctx.patient_id
+        messages_to_keep = []
+        removed_count = 0
 
-        chat_ctx.chat_history.messages = [
-            m
-            for m in chat_ctx.chat_history.messages
-            if not (
+        for m in chat_ctx.chat_history.messages:
+            if (
                 m.role == AuthorRole.SYSTEM
                 and isinstance(m.content, str)
                 and m.content.startswith(PATIENT_CONTEXT_PREFIX)
-            )
-        ]  # type: ignore
+            ):
+                try:
+                    # Extract patient_id from the message payload
+                    json_content = m.content[len(PATIENT_CONTEXT_PREFIX):].strip()
+                    payload = json.loads(json_content)
+                    message_patient_id = payload.get("patient_id")
+
+                    # If the message is for the current patient, we skip it (i.e., remove it)
+                    if message_patient_id == current_patient_id:
+                        removed_count += 1
+                        continue
+                except (json.JSONDecodeError, KeyError):
+                    # If parsing fails, keep the message to be safe
+                    pass
+
+            # Keep all other messages
+            messages_to_keep.append(m)
 
-        removed_count = original_count - len(chat_ctx.chat_history.messages)
         if removed_count > 0:
-            logger.debug(f"Removed {removed_count} patient context system messages")
+            logger.debug(
+                f"Removed {removed_count} prior context system message(s) for current patient '{current_patient_id}'.")
+
+        chat_ctx.chat_history.messages = messages_to_keep
 
     def _ensure_system_message(self, chat_ctx: ChatContext, timing: TimingInfo,
                                chat_summary: str | None = None,

From 66b233e3f5f117ecc3b07ee7c60642a8070fc906 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Mon, 1 Sep 2025 12:17:08 +0000
Subject: [PATCH 08/20] minor fix for context  creep

---
 src/bots/assistant_bot.py                | 14 ++++--
 src/scenarios/default/config/agents.yaml | 64 +++++++++++++-----------
 src/services/patient_context_analyzer.py | 28 +++++++++++
 src/services/patient_context_service.py  |  8 ++-
 4 files changed, 78 insertions(+), 36 deletions(-)

diff --git a/src/bots/assistant_bot.py b/src/bots/assistant_bot.py
index e197a3b..dacdaff 100644
--- a/src/bots/assistant_bot.py
+++ b/src/bots/assistant_bot.py
@@ -302,11 +302,17 @@ def _append_pc_ctx(self, base: str, chat_ctx: ChatContext) -> str:
                 ids_str = ", ".join(f"`{p}`{' (active)' if p == active_id else ''}" for p in obj["all_patient_ids"])
                 lines.append(f"- **Session Patients:** {ids_str}")
 
-            if obj.get("chat_summary"):
-                # Clean up summary for display
-                summary = obj['chat_summary'].replace('\n', ' ').strip()
-                if summary:
+            summary_raw = obj.get("chat_summary", "")
+            if summary_raw and summary_raw.strip():
+                # Check if it's the default "no specific information" message
+                if "No specific information was discussed" in summary_raw:
+                    lines.append(f"- **Summary:** *Building patient context...*")
+                else:
+                    # Clean up summary for display
+                    summary = summary_raw.replace('\n', ' ').strip()
                     lines.append(f"- **Summary:** *{summary}*")
+            else:
+                lines.append(f"- **Summary:** *Building patient context...*")
 
             if not obj.get("patient_id"):
                 lines.append("- *No active patient.*")
diff --git a/src/scenarios/default/config/agents.yaml b/src/scenarios/default/config/agents.yaml
index ff3c41b..c5c7035 100644
--- a/src/scenarios/default/config/agents.yaml
+++ b/src/scenarios/default/config/agents.yaml
@@ -1,37 +1,41 @@
 - name: Orchestrator
   instructions: |
-    You are an AI agent facilitating a discussion between group of AI agent experts and the user. You are not to make clinical recommendations or treatment plans. Follow these guidelines:
-
-    1. **Moderate the Discussion**: 
-      Your primary role is to facilitate the discussion and ensure a smooth flow of conversation among the participants. 
-      When a question is asked, think through who could best answer it. Formulate a plan and present it to the user. 
-      Rely on other agents to provide missing information. First ask the agent what information they need to answer a question.
-      When asking the user for information, mention the user explicitly. "*User*, can you provide me with the patient's #BLANK?"
-      When addressing an agent, mention the agent explicitly. "*PatientHistory*, proceed with #BLANK."
-    2. **Participants**:
-      The following ai experts can help with answering queries about the user.
-      {{aiAgents}}
-      If during the course of the conversation, information is missing, think through who could best answer it, then ask that agent for the information.
-    3. **Smart Plan Reuse**: Before creating a new plan, check if you've done this type of request before in this conversation. For identical requests (like "tumor board review"), say "I'll use the same approach as before:" and proceed without confirmation. For similar requests, briefly explain any changes and ask for confirmation.
-    4. **Allow user to confirm**: Ask the user for confirmation on new plans. If the plan changes, inform the user and ask for confirmation again. If the plan progresses as expected, you can skip this step.
-    5. **Explain the Purpose and Order**: At the beginning of the conversation, explain the plan and the expected order of participants. 
-    6. **Auto-create documents**: For tumor board reviews and comprehensive assessments, automatically proceed to ReportCreation after all clinical agents complete their work. Don't ask permission - just say "*ReportCreation*, please compile the information into a Word document."
-    7. **Role Limitation**: Remember, your role is to moderate and facilitate, not to provide clinical recommendations or treatment plans.
-      DON'T: Provide clinical recommendations or treatment plans.
-    8. **Conclude the plan**: 
-      Don't conclude the conversation until all agents have provided their input. Instead, address the agents that have not yet provided their input.
-      When all agents have provided their input, the plan has concluded, and the user's question has been answered, summarize the response in one or two sentences.
-      Ask the user if they have any further questions or need additional assistance. 
-      For follow up questions, formulate a new plan and suggest the order of participants.
-    
+    You are an AI agent facilitating a discussion between a group of AI agent experts and the user. You are not to make clinical recommendations or treatment plans. Follow these guidelines:
+
+    **CORE DIRECTIVES (Follow in this exact order):**
+
+    1.  **Identify Current Patient**: First, read the SYSTEM message starting with "PATIENT_CONTEXT_JSON:" to identify the current `patient_id`. All subsequent actions concern THIS patient only.
+
+    2.  **Check for Recently Completed Work (for this patient)**: Next, read the `chat_summary` within that same JSON message.
+        - If the summary indicates the user's request has already been completed for the **current patient** (e.g., "A tumor board review was generated"), you MUST inform the user and STOP. Do not create a new plan.
+        - Example Response: "Based on our recent conversation, a tumor board review for `patient_X` has already been completed. Would you like me to generate a new one?"
+
+    3.  **Plan Generation and Reuse Logic**: If the work isn't already done for the current patient, decide on a plan.
+        - **First Plan of the Session**: If this is the very first time you are creating a plan in this entire conversation (for any patient), you **MUST** present the plan and ask the user for confirmation before proceeding.
+        - **Reusing a Confirmed Plan**: If the user has confirmed a plan for a similar task (like a "tumor board review") at any point earlier in this conversation (even for a different patient), you can reuse that plan's structure.
+        - **Transparency is Key**: When reusing a plan, you **MUST** announce it. Say something like: "This is a similar request to one we've handled before. I will reuse the same plan to proceed." Then, execute the plan directly without asking for confirmation.
+
+    4.  **Moderate the Discussion**:
+        - When creating a new plan, explain the purpose and order of agents.
+        - Keep track of which agents have completed their tasks.
+        - When asking the user for information, mention the user explicitly. "*User*, can you provide me with the patient's #BLANK?"
+        - When addressing an agent, mention the agent explicitly. "*PatientHistory*, proceed with #BLANK."
+
+    5.  **Participants**:
+        The following AI experts can help with answering queries.
+        {{aiAgents}}
+
+    6.  **Role Limitation**: Remember, your role is to moderate and facilitate.
+        - DON'T: Provide clinical recommendations or treatment plans.
+
+    7.  **Conclude the plan**:
+        - Don't conclude the conversation until all agents have provided their input.
+        - When all agents have provided their input and the user's question has been answered, summarize the response in one or two sentences.
+        - Ask the user if they have any further questions or need additional assistance.
+
     **IMPORTANT**:
       When presenting the plan, ALWAYS specify the following rule:
-      Each agent, after completing their task, should yield the chat back to you (Orchestrator). Specifically instruct each agent to say "back to you: *Orchestrator*" after their response.      
-
-    Context rule - Patient Context:
-      - Before planning or delegating, read the SYSTEM message that begins with "PATIENT_CONTEXT_JSON:" and treat it as the active patient context.
-      - If the context is missing or unclear, ask PatientHistory to obtain/confirm the patient ID, then proceed.
-      - Remind other agents to use the current system patient context; they should not set/switch/clear it themselves.
+      Each agent, after completing their task, should yield the chat back to you (Orchestrator). Specifically instruct each agent to say "back to you: *Orchestrator*" after their response.
 
   facilitator: true
   description: |
diff --git a/src/services/patient_context_analyzer.py b/src/services/patient_context_analyzer.py
index 3f5aac1..1c6306e 100644
--- a/src/services/patient_context_analyzer.py
+++ b/src/services/patient_context_analyzer.py
@@ -174,3 +174,31 @@ async def summarize_text(self, text: str, patient_id: str) -> str:
         except Exception as e:
             logger.warning(f"Failed to generate summary: {e}")
             return f"Summary generation failed for {patient_id}"
+
+    # Add this method to the PatientContextAnalyzer class
+
+    def reset_kernel(self):
+        """Reset the kernel and service instance to prevent LLM state contamination between patients."""
+        try:
+            if hasattr(self, '_kernel') and self._kernel:
+                # Store current configuration
+                current_deployment = self.deployment_name
+                current_api_version = self.api_version
+
+                # Create fresh kernel instance
+                self._kernel = Kernel()
+
+                # Re-add the service with same configuration
+                self._kernel.add_service(
+                    AzureChatCompletion(
+                        service_id="default",
+                        deployment_name=current_deployment,
+                        api_version=current_api_version,
+                        ad_token_provider=None,  # Adjust if you use token provider
+                    )
+                )
+
+                logger.info("Kernel reset to prevent patient context contamination")
+
+        except Exception as e:
+            logger.warning(f"Error during kernel reset: {e}")
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
index 7ede001..e329ecf 100644
--- a/src/services/patient_context_service.py
+++ b/src/services/patient_context_service.py
@@ -60,9 +60,13 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
         elif action == "UNCHANGED":
             decision = "UNCHANGED"
 
-        # Log state changes only if they occurred
         if original_patient_id != chat_ctx.patient_id:
-            logger.info(f"Patient context changed: '{original_patient_id}' -> '{chat_ctx.patient_id}'")
+            logger.warning(
+                f"Patient context changed: '{original_patient_id}' -> '{chat_ctx.patient_id}'. "
+                "Resetting analyzer kernel to prevent context leak."
+            )
+            if hasattr(self.analyzer, "reset_kernel"):
+                self.analyzer.reset_kernel()
 
         service_duration = time.time() - service_start_time
         timing: TimingInfo = {"analyzer": round(analyzer_duration, 4), "service": round(service_duration, 4)}

From eeb13050217bd20953b1c0adbbdcfab04e9b98e7 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Mon, 1 Sep 2025 20:30:04 +0000
Subject: [PATCH 09/20] minor update

---
 src/scenarios/default/config/agents.yaml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/scenarios/default/config/agents.yaml b/src/scenarios/default/config/agents.yaml
index c5c7035..06a6b52 100644
--- a/src/scenarios/default/config/agents.yaml
+++ b/src/scenarios/default/config/agents.yaml
@@ -28,11 +28,11 @@
     6.  **Role Limitation**: Remember, your role is to moderate and facilitate.
         - DON'T: Provide clinical recommendations or treatment plans.
 
-    7.  **Conclude the plan**:
-        - Don't conclude the conversation until all agents have provided their input.
-        - When all agents have provided their input and the user's question has been answered, summarize the response in one or two sentences.
-        - Ask the user if they have any further questions or need additional assistance.
-
+    7. **Conclude the plan**:
+      - Don't conclude the conversation until all agents have provided their input. Instead, address the agents that have not yet provided their input.
+      - When all agents have provided their input and the user's question has been answered, summarize the response in one or two sentences.
+      - Ask the user if they have any further questions or need additional assistance.
+      
     **IMPORTANT**:
       When presenting the plan, ALWAYS specify the following rule:
       Each agent, after completing their task, should yield the chat back to you (Orchestrator). Specifically instruct each agent to say "back to you: *Orchestrator*" after their response.
@@ -40,7 +40,7 @@
   facilitator: true
   description: |
     Your role is to moderate the discussion, present the order of participants, and facilitate the conversation.
-    
+
 - name: PatientHistory
   instructions: |
     You are an AI agent tasked with loading and presenting patient data. Your primary purpose is to present the initial patient data, but also to respond to individual requests for additional information. 

From cc1c140d53b98ae5886364236aaeea6fc39a78ce Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Wed, 3 Sep 2025 13:16:19 +0000
Subject: [PATCH 10/20] updated webinterface as well

---
 src/routes/api/chats.py | 223 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 196 insertions(+), 27 deletions(-)

diff --git a/src/routes/api/chats.py b/src/routes/api/chats.py
index 10fb08d..bcf6fb7 100644
--- a/src/routes/api/chats.py
+++ b/src/routes/api/chats.py
@@ -11,12 +11,18 @@
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 
+from semantic_kernel.contents import AuthorRole, ChatMessageContent
+from services.patient_context_service import PatientContextService, PATIENT_CONTEXT_PREFIX
+from services.patient_context_analyzer import PatientContextAnalyzer
+
 from data_models.app_context import AppContext
 import group_chat
 
 logger = logging.getLogger(__name__)
 
 # Custom JSON encoder that handles datetime
+
+
 class DateTimeEncoder(json.JSONEncoder):
     def default(self, obj: Any) -> Any:
         if isinstance(obj, datetime):
@@ -24,12 +30,15 @@ def default(self, obj: Any) -> Any:
         return super().default(obj)
 
 # Pydantic models for request/response
+
+
 class MessageRequest(BaseModel):
     content: str
     sender: str
     mentions: Optional[List[str]] = None
     channelData: Optional[Dict] = None
 
+
 class Message(BaseModel):
     id: str
     content: str
@@ -37,7 +46,7 @@ class Message(BaseModel):
     timestamp: datetime
     isBot: bool
     mentions: Optional[List[str]] = None
-    
+
     def dict(self, *args, **kwargs):
         # Override dict method to handle datetime serialization
         d = super().dict(*args, **kwargs)
@@ -46,19 +55,24 @@ def dict(self, *args, **kwargs):
             d['timestamp'] = d['timestamp'].isoformat()
         return d
 
+
 class MessageResponse(BaseModel):
     message: Message
     error: Optional[str] = None
 
+
 class MessagesResponse(BaseModel):
     messages: List[Message]
     error: Optional[str] = None
 
+
 class AgentsResponse(BaseModel):
     agents: List[str]
     error: Optional[str] = None
 
 # Create a helper function to create JSON responses with datetime handling
+
+
 def create_json_response(content, headers=None):
     """Create a JSONResponse with proper datetime handling."""
     return JSONResponse(
@@ -67,17 +81,140 @@ def create_json_response(content, headers=None):
         encoder=DateTimeEncoder
     )
 
+
 def chats_routes(app_context: AppContext):
     router = APIRouter()
-    
+
     # Extract needed values from app_context
     agent_config = app_context.all_agent_configs
     data_access = app_context.data_access
-    
+
+    # Initialize patient context service
+    analyzer = PatientContextAnalyzer(token_provider=app_context.cognitive_services_token_provider)
+    patient_context_service = PatientContextService(analyzer=analyzer)
+
     # Find the facilitator agent
     facilitator_agent = next((agent for agent in agent_config if agent.get("facilitator")), agent_config[0])
     facilitator = facilitator_agent["name"]
-    
+
+    def _append_pc_ctx_system(chat_history_messages: List[ChatMessageContent], patient_context: str) -> None:
+        """Append patient context to chat history at position 0 (system message)."""
+        if len(chat_history_messages) > 0 and chat_history_messages[0].role == AuthorRole.SYSTEM:
+            # Update existing system message
+            existing_content = chat_history_messages[0].content
+            if PATIENT_CONTEXT_PREFIX not in existing_content:
+                chat_history_messages[0].content = f"{existing_content}\n\n{patient_context}"
+        else:
+            # Insert new system message at position 0
+            system_message = ChatMessageContent(
+                role=AuthorRole.SYSTEM,
+                content=patient_context
+            )
+            chat_history_messages.insert(0, system_message)
+
+    def _get_system_patient_context_json(chat_context) -> str | None:
+        """Extract the JSON payload from the current PATIENT_CONTEXT_JSON system message."""
+        # Fix: Use .messages instead of .history
+        for msg in chat_context.chat_history.messages:
+            if msg.role == AuthorRole.SYSTEM:
+                # Handle both string content and itemized content
+                content = msg.content
+                if isinstance(content, str):
+                    text = content
+                else:
+                    # Try to extract from items if content is structured
+                    items = getattr(msg, "items", None) or getattr(content, "items", None)
+                    if items:
+                        parts = []
+                        for item in items:
+                            item_text = getattr(item, "text", None) or getattr(item, "content", None)
+                            if item_text:
+                                parts.append(str(item_text))
+                        text = "".join(parts) if parts else str(content) if content else ""
+                    else:
+                        text = str(content) if content else ""
+
+                if text and text.startswith(PATIENT_CONTEXT_PREFIX):
+                    # Extract JSON after "PATIENT_CONTEXT_JSON:"
+                    json_part = text[len(PATIENT_CONTEXT_PREFIX):].strip()
+                    if json_part.startswith(":"):
+                        json_part = json_part[1:].strip()
+                    return json_part if json_part else None
+        return None
+
+    def _append_pc_ctx_display(base: str, chat_context) -> str:
+        """Append patient context information to the message for display."""
+
+        # Avoid double-tagging
+        if "\nPC_CTX" in base or "\n*PT_CTX:*" in base:
+            return base
+
+        # Get the actual injected system patient context JSON
+        json_payload = _get_system_patient_context_json(chat_context)
+
+        if not json_payload:
+            return base
+
+        # Format the JSON payload into a simple, readable Markdown string
+        try:
+            obj = json.loads(json_payload)
+
+            lines = ["\n\n---", "\n*PT_CTX:*"]
+            if obj.get("patient_id"):
+                lines.append(f"- **Patient ID:** `{obj['patient_id']}`")
+            if obj.get("conversation_id"):
+                lines.append(f"- **Conversation ID:** `{obj['conversation_id']}`")
+
+            if obj.get("all_patient_ids"):
+                active_id = obj.get("patient_id")
+                ids_str = ", ".join(f"`{p}`{' (active)' if p == active_id else ''}" for p in obj["all_patient_ids"])
+                lines.append(f"- **Session Patients:** {ids_str}")
+
+            summary_raw = obj.get("chat_summary", "")
+            if summary_raw and summary_raw.strip():
+                # Check if it's the default "no specific information" message
+                if "No specific information was discussed" in summary_raw:
+                    lines.append(f"- **Summary:** *Building patient context...*")
+                else:
+                    # Clean up summary for display
+                    summary = summary_raw.replace('\n', ' ').strip()
+                    lines.append(f"- **Summary:** *{summary}*")
+            else:
+                lines.append(f"- **Summary:** *Building patient context...*")
+
+            if not obj.get("patient_id"):
+                lines.append("- *No active patient.*")
+
+            # Only add the block if there's something to show besides the header
+            if len(lines) > 2:
+                formatted_text = "\n".join(lines)
+                logger.debug(f"Appended patient context to message | Patient: {obj.get('patient_id')}")
+                return f"{base}{formatted_text}"
+            else:
+                return base
+
+        except json.JSONDecodeError as e:
+            logger.warning(f"Failed to parse patient context JSON: {e}")
+            # Fallback to raw if JSON is malformed, but keep it simple
+            return f"{base}\n\n---\n*PT_CTX (raw):* `{json_payload}`"
+
+    def _format_patient_context_json(patient_context: str) -> str:
+        """Convert patient context to JSON format for system message."""
+        return json.dumps({
+            "patient_context": patient_context,
+            "instruction": "Use this patient context to provide relevant responses. Always consider the patient's current medical status, history, and any active conditions when responding."
+        }, indent=2)
+
+    async def _handle_clear_command(content: str, chat_context) -> bool:
+        """Handle patient context clear commands."""
+        content_lower = content.lower().strip()
+        if content_lower in ["clear", "clear patient", "clear context", "clear patient context"]:
+            # Clear patient context
+            chat_context.patient_context = None
+            logger.info("Patient context cleared via WebSocket clear command")
+            return True
+        return False
+
     @router.get("/api/agents", response_model=AgentsResponse)
     async def get_available_agents():
         """
@@ -86,7 +223,7 @@ async def get_available_agents():
         try:
             # Extract agent names from the agent_config
             agent_names = [agent["name"] for agent in agent_config if "name" in agent]
-            
+
             # Return the list of agent names
             return JSONResponse(
                 content={"agents": agent_names, "error": None}
@@ -97,87 +234,119 @@ async def get_available_agents():
                 content={"agents": [], "error": str(e)},
                 status_code=500
             )
-    
+
     @router.websocket("/api/ws/chats/{chat_id}/messages")
     async def websocket_chat_endpoint(websocket: WebSocket, chat_id: str):
         """WebSocket endpoint for streaming chat messages"""
         try:
             await websocket.accept()
             logger.info(f"WebSocket connection established for chat: {chat_id}")
-            
+
             # Wait for the first message from the client
             client_message = await websocket.receive_json()
             logger.info(f"Received message over WebSocket: {client_message}")
-            
+
             # Extract message content, sender and mentions
             content = client_message.get("content", "")
             sender = client_message.get("sender", "User")
             mentions = client_message.get("mentions", [])
-            
+
             # Try to read existing chat context or create a new one if it doesn't exist
             try:
                 chat_context = await data_access.chat_context_accessor.read(chat_id)
             except:
                 # If the chat doesn't exist, create a new one
                 chat_context = await data_access.chat_context_accessor.create_new(chat_id)
-            
+
+            # Handle clear commands
+            if await _handle_clear_command(content, chat_context):
+                # Send confirmation message
+                clear_message = Message(
+                    id=str(uuid.uuid4()),
+                    content="Patient context has been cleared.",
+                    sender="System",
+                    timestamp=datetime.now(timezone.utc),
+                    isBot=True,
+                    mentions=[]
+                )
+                await websocket.send_json(clear_message.dict())
+                await websocket.send_json({"type": "done"})
+
+                # Save updated context
+                await data_access.chat_context_accessor.write(chat_context)
+                return
+
             # Add user message to history
             chat_context.chat_history.add_user_message(content)
-            
+
+            # Apply patient context using the service - FIX: Use correct method signature
+            try:
+                decision, timing = await patient_context_service.decide_and_apply(
+                    content,  # user_text parameter
+                    chat_context  # chat_ctx parameter
+                )
+
+                logger.info(f"Patient context decision: {decision}, timing: {timing}")
+
+            except Exception as e:
+                logger.warning(f"Error applying patient context to WebSocket message: {e}")
+                # Continue without patient context
+
             # Create group chat instance
             chat, chat_context = group_chat.create_group_chat(app_context, chat_context)
-            
+
             # Process the message - determine target agent based on mentions
             target_agent_name = facilitator  # Default to facilitator agent
-            
+
             if mentions and len(mentions) > 0:
                 # Use the first mentioned agent
                 target_agent_name = mentions[0]
 
             # Find the agent by name
             target_agent = next(
-                (agent for agent in chat.agents if agent.name.lower() == target_agent_name.lower()), 
+                (agent for agent in chat.agents if agent.name.lower() == target_agent_name.lower()),
                 chat.agents[0]  # Fallback to first agent
             )
-            
+
             logger.info(f"Using agent: {target_agent.name} to respond to WebSocket message")
-            
-            
+
             # Check if the agent is the facilitator
             if target_agent.name == facilitator:
                 target_agent = None  # Force facilitator mode when target is the facilitator
-            
+
             response_sent = False
-            
+
             # Get responses from the target agent
             async for response in chat.invoke(agent=target_agent):
                 # Skip responses with no content
                 if not response or not response.content:
                     continue
-                    
+
+                # Add patient context display to response content
+                response_content_with_pc = _append_pc_ctx_display(response.content, chat_context)
+
                 # Create bot response message for each response
                 bot_message = Message(
                     id=str(uuid.uuid4()),
-                    content=response.content,
+                    content=response_content_with_pc,  # Use content with PC_CTX display
                     sender=response.name,
                     timestamp=datetime.now(timezone.utc),
                     isBot=True,
                     mentions=[]
                 )
-                
+
                 # Convert to dict for JSON serialization
                 message_dict = bot_message.dict()
-                
+
                 # Send message over WebSocket
                 await websocket.send_json(message_dict)
-            
+
             # Save chat context after all messages are processed
             await data_access.chat_context_accessor.write(chat_context)
-            
+
             # Send done signal
             await websocket.send_json({"type": "done"})
-            
-                
+
         except WebSocketDisconnect:
             logger.info(f"WebSocket client disconnected from chat: {chat_id}")
         except Exception as e:

From d71812f03545da42f8e9476a93cbd82c1ad9d4e2 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Thu, 4 Sep 2025 01:54:19 +0000
Subject: [PATCH 11/20] fix active context

---
 src/scenarios/default/config/agents.yaml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/scenarios/default/config/agents.yaml b/src/scenarios/default/config/agents.yaml
index 06a6b52..20458e3 100644
--- a/src/scenarios/default/config/agents.yaml
+++ b/src/scenarios/default/config/agents.yaml
@@ -15,6 +15,19 @@
         - **Reusing a Confirmed Plan**: If the user has confirmed a plan for a similar task (like a "tumor board review") at any point earlier in this conversation (even for a different patient), you can reuse that plan's structure.
         - **Transparency is Key**: When reusing a plan, you **MUST** announce it. Say something like: "This is a similar request to one we've handled before. I will reuse the same plan to proceed." Then, execute the plan directly without asking for confirmation.
 
+    **BEHAVIORAL EXAMPLE (Patient Context Handling):**
+    This is an example of how to correctly answer a direct question about the active patient by strictly following the `PATIENT_CONTEXT_JSON` message.
+    ---
+    *PREVIOUS CONVERSATION was about patient_A.*
+    **USER:** @Orchestrator get me the report for patient_B
+    **ORCHESTRATOR:** *...starts process for patient_B...*
+    *(The latest SYSTEM message is now: PATIENT_CONTEXT_JSON:{"patient_id": "patient_B", ...})*
+    **USER:** @Orchestrator who is the active patient?
+    **CORRECT REASONING (Internal Thought):** My memory suggests we were just talking about patient_A, but the latest `PATIENT_CONTEXT_JSON` message clearly states the `patient_id` is "patient_B". The JSON is the single source of truth, so I must use "patient_B".
+    **CORRECT RESPONSE:** The current active patient is patient_B.
+    ---
+    *You MUST follow this reasoning. The `PATIENT_CONTEXT_JSON` is the only source of truth.*
+
     4.  **Moderate the Discussion**:
         - When creating a new plan, explain the purpose and order of agents.
         - Keep track of which agents have completed their tasks.

From 9c0d04b4ebbf0bdb8145c6f0d5edbf5918cd2991 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Thu, 18 Sep 2025 15:39:11 +0000
Subject: [PATCH 12/20] updated - individual patient context files

---
 infra/main.bicep                            |  11 +-
 scripts/generateTeamsApp.ps1                |   4 +-
 scripts/generateTeamsApp.sh                 |   8 +-
 scripts/uploadPatientData.ps1               |   2 +-
 scripts/uploadPatientData.sh                |  19 +-
 src/app.py                                  |  30 +-
 src/bots/assistant_bot.py                   | 200 +++++--
 src/data_models/chat_context.py             |  15 +-
 src/data_models/chat_context_accessor.py    | 291 +++++++---
 src/data_models/data_access.py              |  15 +-
 src/data_models/patient_context_accessor.py | 117 ++++
 src/data_models/patient_context_models.py   | 137 +++++
 src/evaluation/chat_simulator.py            |  51 +-
 src/group_chat.py                           | 277 +++++++++-
 src/healthcare_agents/agent.py              |  40 +-
 src/magentic_chat.py                        |   2 +-
 src/requirements.txt                        |   2 +-
 src/routes/api/chats.py                     | 381 ++++++-------
 src/scenarios/default/config/agents.yaml    |  98 ++--
 src/services/patient_context_analyzer.py    | 252 +++++----
 src/services/patient_context_service.py     | 573 +++++++++++++++-----
 teamsApp/manifest.json                      |  18 +-
 22 files changed, 1842 insertions(+), 701 deletions(-)
 create mode 100644 src/data_models/patient_context_accessor.py
 create mode 100644 src/data_models/patient_context_models.py

diff --git a/infra/main.bicep b/infra/main.bicep
index fe2b9c6..6bd13c6 100644
--- a/infra/main.bicep
+++ b/infra/main.bicep
@@ -34,13 +34,13 @@ param appPlanName string = ''
 param appName string = ''
 
 @description('Gen AI model name and version to deploy')
-@allowed(['gpt-4o;2024-08-06', 'gpt-4.1;2025-04-14'])
+@allowed(['gpt-4o;2024-08-06','gpt-4.1;2025-04-14','gpt-5;2025-08-07','gpt-5-mini;2025-08-07','gpt-5-nano;2025-08-07','model-router;2025-08-07'])
 param model string
 @description('Tokens per minute capacity for the model. Units of 1000 (capacity = 100 means 100K tokens per minute)')
 param modelCapacity int
 // https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/deployment-types
-@description('Specify the deployment type of the model. Only allow deployment types where data processing and data storage is within the specified Azure geography.')
-@allowed(['Standard', 'DataZoneStandard'])
+@description('Specify the deployment type of the model. "Standard" & "DataZoneStandard" only allow data processing and data storage within the specified Azure geography. GPT5 only supports "GlobalStandard" as of now. Please be aware that this can lead to data storage and data processing outside of your azure region!')
+@allowed(['DataZoneStandard', 'Standard','GlobalStandard'])
 param modelSku string
 
 @description('Location to deploy AI Services')
@@ -194,8 +194,9 @@ var names = {
   app: !empty(appName) ? appName : '${abbrs.webSitesAppService}${environmentName}-${uniqueSuffix}'
   aiServices: !empty(aiServicesName) ? aiServicesName : '${abbrs.cognitiveServicesAccounts}${environmentName}-${uniqueSuffix}'
   aiHub: !empty(aiHubName) ? aiHubName : '${abbrs.cognitiveServicesAccounts}hub-${environmentName}-${uniqueSuffix}'
-  storage: !empty(storageName) ? storageName : replace(replace('${abbrs.storageStorageAccounts}${environmentName}${uniqueSuffix}', '-', ''), '_', '')
-  appStorage: !empty(appStorageName) ? appStorageName : replace(replace('${abbrs.storageStorageAccounts}app${environmentName}${uniqueSuffix}', '-', ''), '_', '')
+  // Modified to next two lines to lower case the string per azure storage account naming conventions. This was causing a deployment error
+  storage: toLower(!empty(storageName) ? storageName : replace(replace('${abbrs.storageStorageAccounts}${environmentName}${uniqueSuffix}', '-', ''), '_', ''))
+  appStorage: toLower(!empty(appStorageName) ? appStorageName : replace(replace('${abbrs.storageStorageAccounts}app${environmentName}${uniqueSuffix}', '-', ''), '_', ''))
   keyVault: !empty(keyVaultName) ? keyVaultName : '${abbrs.keyVaultVaults}${environmentName}-${uniqueSuffix}'
   appInsights: !empty(appInsightsName) ? appInsightsName : '${abbrs.insightsComponents}${environmentName}-${uniqueSuffix}'
   ahdsWorkspaceName: replace('ahds${environmentName}${uniqueSuffix}', '-', '')
diff --git a/scripts/generateTeamsApp.ps1 b/scripts/generateTeamsApp.ps1
index 4649211..139edf7 100755
--- a/scripts/generateTeamsApp.ps1
+++ b/scripts/generateTeamsApp.ps1
@@ -60,7 +60,9 @@ foreach ($bot in $azureBotsContent) {
     $manifestContent.description.full = $bot.name    
     $manifestContent.icons.color = $bot.name + ".png"
     $manifestContent.icons.outline = $bot.name + ".png"
-
+    $manifestContent.webApplicationInfo.id = $bot.botId
+    $manifestContent.webApplicationInfo.resource = "api://botid-$($bot.botId)"
+    
     # Define the new manifest file path
     $newManifestFilePath = Join-Path -Path $botOutputDirectory -ChildPath "manifest.json"
 
diff --git a/scripts/generateTeamsApp.sh b/scripts/generateTeamsApp.sh
index 380e446..fd5b24a 100755
--- a/scripts/generateTeamsApp.sh
+++ b/scripts/generateTeamsApp.sh
@@ -37,8 +37,8 @@ fi
 # Ensure the output directory is created
 mkdir -p "$output"
 
-scriptDirectory=$(dirname "$(readlink -f "$0")")
-rootDirectory=$(dirname "$scriptDirectory")
+scriptDirectory="$(cd "$(dirname "$0")" && pwd)"
+rootDirectory="$(dirname "$scriptDirectory")"
 
 azure_bots=$(azd env get-value AZURE_BOTS)
 
@@ -78,7 +78,9 @@ echo "$azureBotsContent" | while IFS= read -r bot; do
         .description.short = $botName |
         .description.full = $botName |
         .icons.outline = ($botName + ".png")  |
-        .icons.color = ($botName + ".png") 
+        .icons.color = ($botName + ".png") |
+        .webApplicationInfo.id = $botId |
+        .webApplicationInfo.resource = "api://botid-$botId"
     ')
 
     # Define the new manifest file path
diff --git a/scripts/uploadPatientData.ps1 b/scripts/uploadPatientData.ps1
index 341a7bb..c0689d1 100644
--- a/scripts/uploadPatientData.ps1
+++ b/scripts/uploadPatientData.ps1
@@ -77,4 +77,4 @@ Get-ChildItem -Path $localFolderPath | ForEach-Object {
         Write-Output "Uploading patient data from $path"
         az storage blob upload-batch --account-name $storageAccountName --destination "$containerName/$patientFolder" --source $path --auth-mode login --overwrite true
     }
-}
+}
\ No newline at end of file
diff --git a/scripts/uploadPatientData.sh b/scripts/uploadPatientData.sh
index 6836dfd..324d2db 100755
--- a/scripts/uploadPatientData.sh
+++ b/scripts/uploadPatientData.sh
@@ -9,6 +9,11 @@ if [ "$AZURE_PRINCIPAL_TYPE" == "ServicePrincipal" ]; then
     exit 0
 fi
 
+if ! command -v az &> /dev/null; then
+    echo "Azure CLI (az) is not installed. Please install it first."
+    exit 1
+fi
+
 # Check if user is logged in
 if ! az account show &>/dev/null; then
     echo "You are not logged in to Azure. Please log in and try again."
@@ -16,8 +21,8 @@ if ! az account show &>/dev/null; then
 fi
 
 # Define the script and root directories
-scriptDirectory=$(dirname "$(realpath "$0")")
-rootDirectory=$(dirname "$scriptDirectory")
+scriptDirectory="$(cd "$(dirname "$0")" && pwd)"
+rootDirectory="$(dirname "$scriptDirectory")"
 
 # Define the path to your .env file
 envFilePath="$rootDirectory/src/.env"
@@ -34,7 +39,7 @@ if [ "$CLINICAL_NOTES_SOURCE" == "fhir" ]; then
     echo "CLINICAL_NOTES_SOURCE is set to \"fhir\". Uploading patient data to FHIR service..."
 
     # Check if Python is installed
-    pythonVersion=$(python -V 2>&1 | grep -Po '(?<=Python )(.+)')
+    pythonVersion=$(python -V 2>&1 | awk '{print $2}')
     if [[ -z "$pythonVersion" ]]; then
         echo "Python version 3.12 or higher is required. Please install Python and try again."
         exit 1
@@ -57,6 +62,14 @@ if [ "$CLINICAL_NOTES_SOURCE" == "fhir" ]; then
 
     # Run the Python script to upload patient data to FHIR service
     echo "  Uploading FHIR resources into the FHIR service..."
+    
+    # Get tenant ID from current Azure CLI context
+    tenantId=$(az account show --query tenantId -o tsv 2>/dev/null)
+    if [ -z "$tenantId" ]; then
+        echo "Unable to determine tenant ID from current Azure CLI context."
+        exit 1
+    fi
+    
     authToken=$(az account get-access-token --resource "$FHIR_SERVICE_ENDPOINT" --tenant "$tenantId" --query accessToken -o tsv)
     if [ $? -ne 0 ]; then
         echo "Failed to obtain access token for FHIR service. If you're running from a device outside of your organization, such as Github Codespace, you'll need to obtain the access token from an approved device by your organization."
diff --git a/src/app.py b/src/app.py
index 9fc0c6b..ad936cd 100644
--- a/src/app.py
+++ b/src/app.py
@@ -31,14 +31,13 @@
 
 load_dotenv(".env")
 
-# Setup default logging and minimum log level severity for your environment that you want to consume
+# Setup default logging and minimum log level severity
 log_level = logging.INFO
 setup_logging(log_level=log_level)
 
 
 def create_app_context():
-    '''Create the application context for commonly used object used in application.'''
-
+    """Create the application context for commonly used objects in the application."""
     # Load agent configuration
     scenario = os.getenv("SCENARIO")
     agent_config = load_agent_config(scenario)
@@ -67,7 +66,10 @@ def create_app(
     bots: dict,
     app_context: AppContext,
 ) -> FastAPI:
+    """Create the FastAPI application with all routes and middleware."""
     app = FastAPI()
+
+    # Add API routes
     app.include_router(messages_routes(adapters, bots))
     app.include_router(chats_routes(app_context))
     app.include_router(user_routes())
@@ -79,17 +81,17 @@ def create_app(
     # Serve static files from the React build directory
     static_files_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
     if os.path.exists(static_files_path):
-        app.mount("/static", StaticFiles(directory=os.path.join(static_files_path, "static")), name="static")
+        app.mount("/static", StaticFiles(directory=static_files_path), name="static")
 
-        # Mount assets directory for Vite-generated assets like /assets/index-abc123.js
-        assets_path = os.path.join(static_files_path, "static", "assets")
+        # Mount assets directory for Vite-generated assets
+        assets_path = os.path.join(static_files_path, "assets")
         if os.path.exists(assets_path):
             app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
 
         # Add a route for the root URL to serve index.html
         @app.get("/")
         async def serve_root():
-            index_path = os.path.join(static_files_path, "static", "index.html")
+            index_path = os.path.join(static_files_path, "index.html")
             if os.path.exists(index_path):
                 return FileResponse(index_path)
             return {"detail": "React app not built yet"}
@@ -97,7 +99,7 @@ async def serve_root():
         # Add a catch-all route to serve index.html for client-side routing
         @app.get("/{full_path:path}")
         async def serve_react_app(full_path: str):
-            index_path = os.path.join(static_files_path, "static", "index.html")
+            index_path = os.path.join(static_files_path, "index.html")
             if os.path.exists(index_path):
                 return FileResponse(index_path)
             return {"detail": "React app not built yet"}
@@ -105,11 +107,11 @@ async def serve_react_app(full_path: str):
     return app
 
 
+# Initialize application context
 app_context = create_app_context()
 
 # Setup Application Insights logging
-setup_app_insights_logging(credential=app_context.credential,
-                           log_level=log_level)
+setup_app_insights_logging(credential=app_context.credential, log_level=log_level)
 
 # Create Teams specific objects
 adapters = {
@@ -117,22 +119,28 @@ async def serve_react_app(full_path: str):
         DefaultConfig(botId=agent["bot_id"]))).use(ShowTypingMiddleware()).use(AccessControlMiddleware())
     for agent in app_context.all_agent_configs
 }
+
 bot_config = {
     "adapters": adapters,
     "app_context": app_context,
     "turn_contexts": {}
 }
+
 bots = {
     agent["name"]: AssistantBot(agent, **bot_config) if agent["name"] != "magentic"
     else MagenticBot(agent, **bot_config)
     for agent in app_context.all_agent_configs
 }
 
+# Create applications
 teams_app = create_app(bots, app_context)
 fast_mcp_app, lifespan = create_fast_mcp_app(app_context)
 
+# Main application with routing
 app = Starlette(
     routes=[
         Mount('/mcp', app=fast_mcp_app),
         Mount('/', teams_app),
-    ], lifespan=lifespan)
+    ],
+    lifespan=lifespan
+)
diff --git a/src/bots/assistant_bot.py b/src/bots/assistant_bot.py
index dacdaff..d203154 100644
--- a/src/bots/assistant_bot.py
+++ b/src/bots/assistant_bot.py
@@ -5,7 +5,7 @@
 import json
 import logging
 import os
-import json
+from datetime import datetime, timezone
 
 from botbuilder.core import MessageFactory, TurnContext
 from botbuilder.core.teams import TeamsActivityHandler
@@ -13,12 +13,12 @@
 from botbuilder.schema import Activity, ActivityTypes
 from semantic_kernel.agents import AgentGroupChat
 
-
-from semantic_kernel.contents import AuthorRole
+from semantic_kernel.contents import AuthorRole, ChatMessageContent, TextContent
 from services.patient_context_service import PATIENT_CONTEXT_PREFIX
 
 from data_models.app_context import AppContext
 from data_models.chat_context import ChatContext
+
 from errors import NotAuthorizedError
 from group_chat import create_group_chat
 from services.patient_context_service import PatientContextService
@@ -45,8 +45,13 @@ def __init__(
         self.data_access = app_context.data_access
         self.root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
+        # Add patient context service
         analyzer = PatientContextAnalyzer(token_provider=app_context.cognitive_services_token_provider)
-        self.patient_context_service = PatientContextService(analyzer=analyzer)
+        self.patient_context_service = PatientContextService(
+            analyzer=analyzer,
+            registry_accessor=app_context.data_access.patient_context_registry_accessor,
+            context_accessor=app_context.data_access.chat_context_accessor
+        )
 
     async def get_bot_context(
         self, conversation_id: str, bot_name: str, turn_context: TurnContext
@@ -56,7 +61,6 @@ async def get_bot_context(
 
         if bot_name not in self.turn_contexts[conversation_id]:
             context = await self.create_turn_context(bot_name, turn_context)
-
             self.turn_contexts[conversation_id][bot_name] = context
 
         return self.turn_contexts[conversation_id][bot_name]
@@ -96,33 +100,143 @@ async def logic(context: TurnContext):
 
         return context
 
+    async def _handle_clear_command(self, content: str, chat_ctx: ChatContext, conversation_id: str) -> bool:
+        """Handle patient context clear commands - aligned with web interface."""
+        content_lower = content.lower().strip()
+        if content_lower in ["clear", "clear patient", "clear context", "clear patient context"]:
+            logger.info(f"Processing clear command for conversation: {conversation_id}")
+
+            # Archive everything before clearing (same as web interface)
+            timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S-%f")
+            archive_folder = f"archive/{timestamp}"
+
+            try:
+                logger.info(f"Starting archive to folder: {archive_folder}")
+
+                # Archive session context (this creates the archive folder structure)
+                await self.data_access.chat_context_accessor.archive_to_folder(conversation_id, None, archive_folder)
+                logger.info(f"Archived session context to {archive_folder}")
+
+                # Archive ALL patient contexts (not just from chat_ctx.patient_contexts)
+                # We need to get the list from the registry like the web interface does
+                try:
+                    patient_registry, _ = await self.patient_context_service.registry_accessor.read_registry(conversation_id)
+                    if patient_registry:
+                        for patient_id in patient_registry.keys():
+                            await self.data_access.chat_context_accessor.archive_to_folder(conversation_id, patient_id, archive_folder)
+                            logger.info(f"Archived patient context for {patient_id} to {archive_folder}")
+                except Exception as registry_error:
+                    logger.warning(f"Could not read registry for archiving patient contexts: {registry_error}")
+                    # Fallback: use patient_contexts from chat_ctx if available
+                    if hasattr(chat_ctx, 'patient_contexts') and chat_ctx.patient_contexts:
+                        for patient_id in chat_ctx.patient_contexts.keys():
+                            await self.data_access.chat_context_accessor.archive_to_folder(conversation_id, patient_id, archive_folder)
+                            logger.info(f"Archived patient context for {patient_id} to {archive_folder} (fallback)")
+
+                # Archive patient registry (this renames it, doesn't create folder structure)
+                await self.patient_context_service.registry_accessor.archive_registry(conversation_id)
+                logger.info(f"Archived patient registry for {conversation_id}")
+
+                # Clear chat context (same as web interface)
+                chat_ctx.patient_context = None
+                if hasattr(chat_ctx, 'patient_contexts'):
+                    chat_ctx.patient_contexts.clear()
+                chat_ctx.chat_history.clear()
+                chat_ctx.patient_id = None
+
+                # Save the cleared context
+                await self.data_access.chat_context_accessor.write(chat_ctx)
+                logger.info(f"Saved cleared context for {conversation_id}")
+
+                logger.info(f"Successfully archived and cleared all contexts to {archive_folder}")
+                return True
+
+            except Exception as e:
+                logger.error(f"Failed to archive contexts during clear: {e}")
+                # Still clear the context even if archiving fails
+                chat_ctx.patient_context = None
+                if hasattr(chat_ctx, 'patient_contexts'):
+                    chat_ctx.patient_contexts.clear()
+                chat_ctx.chat_history.clear()
+                chat_ctx.patient_id = None
+
+                # Save the cleared context
+                try:
+                    await self.data_access.chat_context_accessor.write(chat_ctx)
+                    logger.info(f"Saved cleared context after archive failure")
+                except Exception as save_error:
+                    logger.error(f"Failed to save cleared context: {save_error}")
+
+                return True
+
+        return False
+
     async def on_message_activity(self, turn_context: TurnContext) -> None:
         conversation_id = turn_context.activity.conversation.id
         chat_context_accessor = self.data_access.chat_context_accessor
-        chat_artifact_accessor = self.data_access.chat_artifact_accessor
-
-        chat_ctx = await chat_context_accessor.read(conversation_id)
+        chat_artifact_accessor = self.data_access.chat_artifact_accessor  # Main branch addition
 
         # Extract raw user text (without bot mention) once
         raw_user_text = turn_context.remove_recipient_mention(turn_context.activity).strip()
 
-        # Full conversation clear (existing behavior)
-        if raw_user_text.endswith("clear"):
-            chat_ctx.chat_history.add_user_message(raw_user_text)
-            await chat_context_accessor.archive(chat_ctx)
+        # STEP 1: Load session context first
+        try:
+            chat_ctx = await chat_context_accessor.read(conversation_id, None)
+            if not chat_ctx:
+                chat_ctx = ChatContext(conversation_id)
+                logger.info(f"Created new session context for: {conversation_id}")
+            else:
+                logger.info(f"Loaded existing session context for: {conversation_id}")
+        except Exception as e:
+            logger.error(f"Failed to load session context: {e}")
+            chat_ctx = ChatContext(conversation_id)
+
+        # STEP 1.5: Handle clear commands (main branch logic enhanced with patient context)
+        if await self._handle_clear_command(raw_user_text, chat_ctx, conversation_id):
+            # Also archive chat artifacts (main branch functionality)
             await chat_artifact_accessor.archive(conversation_id)
             await turn_context.send_activity("Conversation cleared!")
             return
 
-        # Decide & apply patient context BEFORE building group chat
-        logger.info(f"Processing patient context for conversation: {conversation_id}")
-
+        # STEP 2: Patient context decision and application
         decision, timing = await self.patient_context_service.decide_and_apply(raw_user_text, chat_ctx)
 
         logger.info(f"Patient context decision: {decision} | Patient: {chat_ctx.patient_id} | Timing: {timing}")
 
+        # STEP 3: Handle special decision outcomes
+        if decision == "CLEAR":
+            # This should now be handled by _handle_clear_command above, but keep as fallback
+            await chat_artifact_accessor.archive(conversation_id)
+            await turn_context.send_activity("All contexts have been archived and cleared. How can I assist you today?")
+            return
+        elif decision == "NEEDS_PATIENT_ID":
+            await turn_context.send_activity(
+                "I need a patient ID to proceed. Please provide the patient ID in the format 'patient_X' "
+                "(e.g., '@Orchestrator start tumor board review for patient_4')."
+            )
+            return
+        elif decision == "RESTORED_FROM_STORAGE":
+            logger.info(f"Restored patient context from storage: {chat_ctx.patient_id}")
+
+        # NEW: If active patient exists, load ONLY that patient's isolated context file
+        if chat_ctx.patient_id:
+            try:
+                # Load the patient-specific file (isolated history)
+                isolated_ctx = await chat_context_accessor.read(conversation_id, chat_ctx.patient_id)
+                if isolated_ctx and isolated_ctx.chat_history.messages:
+                    # Replace with isolated chat history
+                    chat_ctx.chat_history = isolated_ctx.chat_history
+                    logger.info(
+                        f"Loaded isolated history for {chat_ctx.patient_id} ({len(isolated_ctx.chat_history.messages)} messages)")
+                else:
+                    logger.info(f"No existing history for {chat_ctx.patient_id}, starting fresh")
+            except Exception as e:
+                logger.debug(f"Could not load isolated context for {chat_ctx.patient_id}: {e}")
+
+        # STEP 4: Continue with normal group chat processing
         agents = self.all_agents
         if len(chat_ctx.chat_history.messages) == 0:
+            # new conversation. Let's see which agents are available.
             async def is_part_of_conversation(agent):
                 context = await self.get_bot_context(turn_context.activity.conversation.id, agent["name"], turn_context)
                 typing_activity = Activity(
@@ -137,24 +251,28 @@ async def is_part_of_conversation(agent):
                     await context.send_activity(typing_activity)
                     return True
                 except Exception as e:
-                    logger.debug(f"Failed to send typing activity to {agent['name']}: {e}")
+                    logger.info(f"Failed to send typing activity to {agent['name']}: {e}")
+                    # This happens if the agent is not part of the group chat.
+                    # Remove the agent from the list of available agents
                     return False
 
             part_of_conversation = await asyncio.gather(*(is_part_of_conversation(agent) for agent in self.all_agents))
-            agents = [agent for agent, include in zip(self.all_agents, part_of_conversation) if include]
+            agents = [agent for agent, should_include in zip(self.all_agents, part_of_conversation) if should_include]
 
         (chat, chat_ctx) = create_group_chat(self.app_context, chat_ctx, participants=agents)
 
-        # Add user message after context decision (no extra tagging here)
-        user_with_ctx = self._append_pc_ctx(f"{self.name}: {raw_user_text}", chat_ctx)
-        chat_ctx.chat_history.add_user_message(user_with_ctx)
+        # Add user message with patient context
+        user_message_with_context = self._append_pc_ctx(f"{self.name}: {raw_user_text}", chat_ctx)
+        chat_ctx.chat_history.add_user_message(user_message_with_context)
 
         chat.is_complete = False
         await self.process_chat(chat, chat_ctx, turn_context)
 
+        # Save chat context
         try:
             await chat_context_accessor.write(chat_ctx)
-        except:
+            logger.info(f"Saved context for conversation: {conversation_id} | Patient: {chat_ctx.patient_id}")
+        except Exception as e:
             logger.exception("Failed to save chat context.")
 
     async def on_error(self, context: TurnContext, error: Exception):
@@ -185,17 +303,20 @@ async def process_chat(
             if response.content.strip() == "":
                 continue
 
-            # msgText = self._append_links_to_msg(response.content, chat_ctx)
+            # Add patient context to response
+            response_with_context = self._append_pc_ctx(response.content, chat_ctx)
 
-            # Add this code right before the existing `response.content = self._append_pc_ctx(response.content, chat_ctx)` line:
-            # Record active agent in PATIENT_CONTEXT_JSON
-            # try:
-            #    self._set_system_pc_ctx_agent(chat_ctx, "active", response.name)
-            # except Exception as e:
-            #    logger.info(f"Failed to set active agent in PC_CTX: {e}")
+            # Update response properly with ChatMessageContent v2 format
+            if hasattr(response, 'items') and response.items:
+                response.items[0].text = response_with_context
+            else:
+                # If no items structure, recreate with proper format
+                response = ChatMessageContent(
+                    role=response.role,
+                    items=[TextContent(text=response_with_context)],
+                    name=getattr(response, 'name', None)
+                )
 
-            # Attach current patient context snapshot to assistant output+
-            response.content = self._append_pc_ctx(response.content, chat_ctx)
             msgText = self._append_links_to_msg(response.content, chat_ctx)
             msgText = await self.generate_sas_for_blob_urls(msgText, chat_ctx)
 
@@ -213,7 +334,8 @@ async def process_chat(
     def _append_links_to_msg(self, msgText: str, chat_ctx: ChatContext) -> str:
         # Add patient data links to response
         try:
-            image_urls = chat_ctx.display_image_urls
+            # Handle both main branch format (direct access) and patient context format (getattr)
+            image_urls = getattr(chat_ctx, 'display_image_urls', [])
             clinical_trial_urls = chat_ctx.display_clinical_trials
 
             # Display loaded images
@@ -232,7 +354,9 @@ def _append_links_to_msg(self, msgText: str, chat_ctx: ChatContext) -> str:
 
             return msgText
         finally:
-            chat_ctx.display_image_urls = []
+            # Handle both formats for cleanup
+            if hasattr(chat_ctx, 'display_image_urls'):
+                chat_ctx.display_image_urls = []
             chat_ctx.display_clinical_trials = []
 
     async def generate_sas_for_blob_urls(self, msgText: str, chat_ctx: ChatContext) -> str:
@@ -302,18 +426,6 @@ def _append_pc_ctx(self, base: str, chat_ctx: ChatContext) -> str:
                 ids_str = ", ".join(f"`{p}`{' (active)' if p == active_id else ''}" for p in obj["all_patient_ids"])
                 lines.append(f"- **Session Patients:** {ids_str}")
 
-            summary_raw = obj.get("chat_summary", "")
-            if summary_raw and summary_raw.strip():
-                # Check if it's the default "no specific information" message
-                if "No specific information was discussed" in summary_raw:
-                    lines.append(f"- **Summary:** *Building patient context...*")
-                else:
-                    # Clean up summary for display
-                    summary = summary_raw.replace('\n', ' ').strip()
-                    lines.append(f"- **Summary:** *{summary}*")
-            else:
-                lines.append(f"- **Summary:** *Building patient context...*")
-
             if not obj.get("patient_id"):
                 lines.append("- *No active patient.*")
 
diff --git a/src/data_models/chat_context.py b/src/data_models/chat_context.py
index 9a23e02..ea06c17 100644
--- a/src/data_models/chat_context.py
+++ b/src/data_models/chat_context.py
@@ -3,7 +3,7 @@
 
 import os
 from dataclasses import dataclass, field
-from typing import Dict, Any
+from typing import Dict, Any, Optional
 
 from semantic_kernel.contents.chat_history import ChatHistory
 
@@ -11,10 +11,10 @@
 @dataclass
 class PatientContext:
     """
-    Minimal per-patient context (future expansion point: facts, summary, provenance).
+    Minimal per-patient context for patient isolation.
     """
     patient_id: str
-    facts: Dict[str, Any] = field(default_factory=dict)  # placeholder for future enrichment
+    facts: Dict[str, Any] = field(default_factory=dict)
 
 
 class ChatContext:
@@ -22,17 +22,16 @@ def __init__(self, conversation_id: str):
         self.conversation_id = conversation_id
         self.chat_history = ChatHistory()
 
-        # Active patient (single pointer)
+        # Patient context fields
         self.patient_id = None
-
-        # All encountered patient contexts (allows switching back without re-extraction)
         self.patient_contexts: Dict[str, PatientContext] = {}
+        self.workflow_summary: Optional[str] = None
 
-        # Existing fields
+        # Legacy fields (preserved for compatibility)
         self.patient_data = []
         self.display_blob_urls = []
         self.display_image_urls = []
         self.display_clinical_trials = []
         self.output_data = []
+        self.healthcare_agents = {}
         self.root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-        self.healthcare_agents = {}
\ No newline at end of file
diff --git a/src/data_models/chat_context_accessor.py b/src/data_models/chat_context_accessor.py
index ef76159..ae26211 100644
--- a/src/data_models/chat_context_accessor.py
+++ b/src/data_models/chat_context_accessor.py
@@ -8,123 +8,278 @@
 
 from azure.core.exceptions import ResourceNotFoundError
 from azure.storage.blob.aio import BlobServiceClient
-from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents import ChatMessageContent, AuthorRole, TextContent
 
 from data_models.chat_context import ChatContext, PatientContext
 
 logger = logging.getLogger(__name__)
 
+# Current schema version for migration support
+CURRENT_SCHEMA_VERSION = 2
+
 
 class ChatContextAccessor:
     """
-    Accessor for reading and writing chat context to Azure Blob Storage.
+    Hybrid context accessor - supports both session-only and patient-specific contexts.
 
     ChatContext lifecycle:
 
+    **Session Context (no patient isolation):**
     1. User sends a message to Agent.
-    2. Agent load ChatContext from blob storage using conversation_id.
-        - If found, it reads the existing ChatContext from blob storage.
-        - Otherwise, it creates a new ChatContext with the given conversation_id.
-    2. Agent sends responses to User.
-    3. Save ChatContext to blob storage as `chat_context.json`.
-    4. Repeat steps 1-3 for the entire conversation.
-    5. User sends a "clear" message.
-    6. Archive ChatHistory to the blob storage.
-        - Append the "clear" message to chat history.
-        - Save ChatContext to `{datetime}_chat_context.json`.
-        - Delete `chat_context.json`
+    2. Agent loads ChatContext from blob storage using conversation_id only.
+    - File: `{conversation_id}/session_context.json`
+    - If found, reads existing ChatContext; otherwise creates new one.
+    3. Agent processes message and sends responses to User.
+    4. Save ChatContext to `session_context.json`.
+    5. Repeat steps 1-4 for the entire conversation.
+    6. User sends a "clear" message.
+    7. Archive ChatContext:
+    - Save to `{timestamp}_session_archived.json`
+    - Delete original `session_context.json`
+
+    **Patient Context (with patient isolation):**
+    1. User mentions a patient ID or system detects patient context.
+    2. Agent loads ChatContext using conversation_id AND patient_id.
+    - File: `{conversation_id}/patient_{patient_id}_context.json`
+    - If found, reads existing patient-specific context; otherwise creates new one.
+    3. Agent processes message with patient context isolation.
+    4. Save ChatContext to `patient_{patient_id}_context.json`.
+    5. Repeat steps 1-4 for patient-specific conversation.
+    6. When switching patients or clearing:
+    - Archive current patient context to `{timestamp}_patient_{patient_id}_archived.json`
+    - Delete original patient context file.
+
+    Key functionality:
+    - Patient isolation: separate files for each patient (patient_{id}_context.json)
+    - Session context: shared conversation state (session_context.json)
+    - Automatic patient context detection and switching
+    - Chat history isolation per patient
+    - Migration support for legacy files
+    - Backward compatibility with main branch structure
     """
 
-    def __init__(self, blob_service_client: BlobServiceClient, container_name: str = "chat-sessions",):
+    def __init__(
+        self,
+        blob_service_client: BlobServiceClient,
+        container_name: str = "chat-sessions",
+        cognitive_services_token_provider=None,
+    ):
         self.blob_service_client = blob_service_client
         self.container_client = blob_service_client.get_container_client(container_name)
+        self.cognitive_services_token_provider = cognitive_services_token_provider
 
-    def get_blob_path(self, conversation_id: str) -> str:
-        return f"{conversation_id}/chat_context.json"
+    def get_blob_path(self, conversation_id: str, patient_id: str = None) -> str:
+        """Get blob path for patient-specific or session context."""
+        if patient_id:
+            return f"{conversation_id}/patient_{patient_id}_context.json"
+        return f"{conversation_id}/session_context.json"
 
-    async def read(self, conversation_id: str) -> ChatContext:
-        """Read the chat context for a given conversation ID."""
+    async def read(self, conversation_id: str, patient_id: str = None) -> ChatContext:
+        """Read chat context for conversation/patient."""
         start = time()
         try:
-            blob_path = self.get_blob_path(conversation_id)
+            blob_path = self.get_blob_path(conversation_id, patient_id)
             blob_client = self.container_client.get_blob_client(blob_path)
             blob = await blob_client.download_blob()
             blob_str = await blob.readall()
             decoded_str = blob_str.decode("utf-8")
-            return self.deserialize(decoded_str)
-        except:
-            return ChatContext(conversation_id)
+            context = self.deserialize(decoded_str)
+
+            # Ensure patient context is properly set up
+            if patient_id:
+                context.patient_id = patient_id
+                if patient_id not in context.patient_contexts:
+                    context.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
+            else:
+                context.patient_id = None
+
+            return context
+
+        except ResourceNotFoundError:
+            logger.info(f"Creating new context for {conversation_id}/{patient_id or 'session'}")
+            context = ChatContext(conversation_id)
+            if patient_id:
+                context.patient_id = patient_id
+                context.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
+            return context
+        except Exception as e:
+            logger.warning(f"Failed to read context for {conversation_id}/{patient_id or 'session'}: {e}")
+            context = ChatContext(conversation_id)
+            if patient_id:
+                context.patient_id = patient_id
+                context.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
+            return context
         finally:
-            logger.info(f"Read ChatContext for {conversation_id}. Duration: {time() - start}s")
+            logger.info(
+                f"Read ChatContext for {conversation_id}/{patient_id or 'session'}. Duration: {time() - start}s"
+            )
 
     async def write(self, chat_ctx: ChatContext) -> None:
-        """Write the chat context for a given conversation ID."""
+        """Write chat context to appropriate file."""
         start = time()
         try:
-            blob_path = self.get_blob_path(chat_ctx.conversation_id)
+            blob_path = self.get_blob_path(chat_ctx.conversation_id, chat_ctx.patient_id)
             blob_client = self.container_client.get_blob_client(blob_path)
             blob_str = self.serialize(chat_ctx)
             await blob_client.upload_blob(blob_str, overwrite=True)
         finally:
-            logger.info(f"Wrote ChatContext for {chat_ctx.conversation_id}. Duration: {time() - start}s")
+            logger.info(
+                f"Wrote ChatContext for {chat_ctx.conversation_id}/{chat_ctx.patient_id or 'session'}. Duration: {time() - start}s"
+            )
 
     async def archive(self, chat_ctx: ChatContext) -> None:
-        """Archive the chat context for a given conversation ID by renaming the blob."""
+        """Archive chat context with timestamp."""
         start = time()
         try:
-            # Archive the chat context
             timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
-            archive_blob_path = f"{chat_ctx.conversation_id}/{timestamp}_chat_context.json"
+            if chat_ctx.patient_id:
+                archive_blob_path = f"{chat_ctx.conversation_id}/{timestamp}_patient_{chat_ctx.patient_id}_archived.json"
+            else:
+                archive_blob_path = f"{chat_ctx.conversation_id}/{timestamp}_session_archived.json"
+
             archive_blob_str = self.serialize(chat_ctx)
             await self.container_client.upload_blob(archive_blob_path, archive_blob_str, overwrite=True)
 
-            # Delete the original chat context
-            blob_path = self.get_blob_path(chat_ctx.conversation_id)
+            blob_path = self.get_blob_path(chat_ctx.conversation_id, chat_ctx.patient_id)
             await self.container_client.delete_blob(blob_path)
         except ResourceNotFoundError:
-            # If the blob is not found, it means it has already been deleted or never existed.
-            pass
+            pass  # File already deleted or never existed
         finally:
-            logger.info(f"Archive ran for {chat_ctx.conversation_id}. Duration: {time() - start}s")
+            logger.info(
+                f"Archived ChatContext for {chat_ctx.conversation_id}/{chat_ctx.patient_id or 'session'}. Duration: {time() - start}s"
+            )
+
+    async def archive_to_folder(self, conversation_id: str, patient_id: str, archive_folder: str) -> None:
+        """Archive context to specific folder structure."""
+        start = time()
+        try:
+            current_blob_path = self.get_blob_path(conversation_id, patient_id)
+            try:
+                blob_client = self.container_client.get_blob_client(current_blob_path)
+                blob = await blob_client.download_blob()
+                blob_str = await blob.readall()
+
+                timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
+                if patient_id:
+                    archive_blob_path = "%s/%s/%s_patient_%s_archived.json" % (
+                        archive_folder, conversation_id, timestamp, patient_id)
+                else:
+                    archive_blob_path = "%s/%s/%s_session_archived.json" % (archive_folder, conversation_id, timestamp)
+
+                await self.container_client.upload_blob(archive_blob_path, blob_str, overwrite=True)
+                await blob_client.delete_blob()
+
+                logger.info("Archived context to %s", archive_blob_path)
+            except ResourceNotFoundError:
+                logger.warning("No context found to archive for %s/%s", conversation_id, patient_id or 'session')
+        except Exception as e:
+            logger.error("Failed to archive context for %s/%s: %s", conversation_id, patient_id or 'session', e)
+        finally:
+            logger.info("Archive operation for %s/%s completed. Duration: %ss",
+                        conversation_id, patient_id or 'session', time() - start)
 
     @staticmethod
     def serialize(chat_ctx: ChatContext) -> str:
-        """Serialize the chat context to a string."""
-        return json.dumps(
-            {
-                "conversation_id": chat_ctx.conversation_id,
-                "chat_history": chat_ctx.chat_history.serialize(),
-                "patient_id": chat_ctx.patient_id,
-                "patient_data": chat_ctx.patient_data,
-                "patient_contexts": {
-                    pid: {
-                        "patient_id": pctx.patient_id,
-                        "facts": pctx.facts
-                    } for pid, pctx in chat_ctx.patient_contexts.items()
-                },                
-                "display_blob_urls": chat_ctx.display_blob_urls,
-                "display_clinical_trials": chat_ctx.display_clinical_trials,
-                "output_data": chat_ctx.output_data,
-                "healthcare_agents": chat_ctx.healthcare_agents,
-            },
-            indent=2,
-        )
+        """Serialize chat context to JSON."""
+        # Extract chat history with proper schema
+        chat_messages = []
+        for msg in chat_ctx.chat_history.messages:
+            if hasattr(msg, 'items') and msg.items:
+                content = msg.items[0].text if hasattr(msg.items[0], 'text') else str(msg.items[0])
+            else:
+                content = str(msg.content) if hasattr(msg, 'content') else ""
+
+            chat_messages.append({
+                "role": msg.role.value,
+                "content": content,
+                "name": getattr(msg, 'name', None)
+            })
+
+        # Build patient contexts
+        patient_contexts = {}
+        for pid, pctx in chat_ctx.patient_contexts.items():
+            patient_contexts[pid] = {
+                "patient_id": pctx.patient_id,
+                "facts": pctx.facts,
+            }
+
+        data = {
+            "schema_version": CURRENT_SCHEMA_VERSION,
+            "conversation_id": chat_ctx.conversation_id,
+            "patient_id": chat_ctx.patient_id,
+            "patient_contexts": patient_contexts,
+            "workflow_summary": getattr(chat_ctx, 'workflow_summary', None),
+            "chat_history": chat_messages,
+            "patient_data": chat_ctx.patient_data,
+            "display_blob_urls": chat_ctx.display_blob_urls,
+            "display_image_urls": getattr(chat_ctx, 'display_image_urls', []),
+            "display_clinical_trials": chat_ctx.display_clinical_trials,
+            "output_data": chat_ctx.output_data,
+            "healthcare_agents": chat_ctx.healthcare_agents,
+        }
+        return json.dumps(data, indent=2, default=str)
 
     @staticmethod
     def deserialize(data_str: str) -> ChatContext:
-        """Deserialize the chat context from a string."""
+        """Deserialize chat context from JSON with migration support."""
         data = json.loads(data_str)
-        ctx = ChatContext(data["conversation_id"])
-        ctx.chat_history = ChatHistory.restore_chat_history(data["chat_history"])
-        ctx.patient_id = data["patient_id"]
-        for pid, stored in (data.get("patient_contexts") or {}).items():
-            ctx.patient_contexts[pid] = PatientContext(
-                patient_id=stored.get("patient_id", pid),
-                facts=stored.get("facts", {}) or {}
+        schema_version = data.get("schema_version", 1)  # Default to v1 for legacy files
+
+        context = ChatContext(data["conversation_id"])
+        context.patient_id = data.get("patient_id")
+
+        # Restore patient contexts
+        for pid, pc_data in data.get("patient_contexts", {}).items():
+            context.patient_contexts[pid] = PatientContext(
+                patient_id=pc_data["patient_id"],
+                facts=pc_data.get("facts", {}),
+            )
+
+        context.workflow_summary = data.get("workflow_summary")
+
+        # Process chat history with migration support
+        for msg_data in data.get("chat_history", []):
+            # Skip messages with insufficient data
+            if "role" not in msg_data:
+                logger.warning(f"Skipping message with no role: {msg_data.keys()}")
+                continue
+
+            role = AuthorRole(msg_data["role"])
+            name = msg_data.get("name")
+
+            # Handle both legacy (v1) and new (v2) formats
+            if "content" in msg_data:
+                content_str = msg_data["content"]
+            elif "items" in msg_data and msg_data["items"]:
+                # Legacy v1 format with items array
+                content_str = msg_data["items"][0].get("text", "")
+            else:
+                logger.warning(f"Skipping message with no content: {msg_data}")
+                continue
+
+            # Skip tool messages with empty content (can't reconstruct)
+            if role == AuthorRole.TOOL and not content_str:
+                logger.warning(f"Skipping empty tool message")
+                continue
+
+            msg = ChatMessageContent(
+                role=role,
+                items=[TextContent(text=str(content_str))],
             )
-        ctx.patient_data = data["patient_data"]
-        ctx.display_blob_urls = data["display_blob_urls"]
-        ctx.display_clinical_trials = data["display_clinical_trials"]
-        ctx.output_data = data["output_data"]
-        ctx.healthcare_agents = data.get("healthcare_agents", {})
-        return ctx
+            if name:
+                msg.name = name
+            context.chat_history.messages.append(msg)
+
+        # Restore other fields
+        context.patient_data = data.get("patient_data", [])
+        context.display_blob_urls = data.get("display_blob_urls", [])
+        context.display_image_urls = data.get("display_image_urls", [])
+        context.display_clinical_trials = data.get("display_clinical_trials", [])
+        context.output_data = data.get("output_data", [])
+        context.healthcare_agents = data.get("healthcare_agents", {})
+
+        # Log migration info
+        if schema_version < CURRENT_SCHEMA_VERSION:
+            logger.info(f"Migrated context from schema v{schema_version} to v{CURRENT_SCHEMA_VERSION}")
+
+        return context
diff --git a/src/data_models/data_access.py b/src/data_models/data_access.py
index cabcffc..e5bfa14 100644
--- a/src/data_models/data_access.py
+++ b/src/data_models/data_access.py
@@ -16,9 +16,12 @@
 from data_models.fabric.fabric_clinical_note_accessor import FabricClinicalNoteAccessor
 from data_models.fhir.fhir_clinical_note_accessor import FhirClinicalNoteAccessor
 from data_models.image_accessor import ImageAccessor
+from data_models.patient_context_accessor import PatientContextRegistryAccessor
+from azure.identity.aio import get_bearer_token_provider
 
 logger = logging.getLogger(__name__)
 
+
 class UserDelegationKeyDelegate:
     def __init__(self, blob_service_client: BlobServiceClient):
         self.blob_service_client = blob_service_client
@@ -81,19 +84,20 @@ async def get_blob_sas_url(
 
 @dataclass(frozen=True)
 class DataAccess:
-    """ Data access layer for the application. """
+    """Data access layer for the application."""
     blob_sas_delegate: BlobSasDelegate
     chat_artifact_accessor: ChatArtifactAccessor
     chat_context_accessor: ChatContextAccessor
     clinical_note_accessor: ClinicalNoteAccessor
     image_accessor: ImageAccessor
+    patient_context_registry_accessor: PatientContextRegistryAccessor
 
 
 def create_data_access(
     blob_service_client: BlobServiceClient,
     credential: AsyncTokenCredential
 ) -> DataAccess:
-    """ Factory function to create a DataAccess object. """
+    """Factory function to create a DataAccess object."""
     # Create clinical note accessor based on the source
     clinical_notes_source = os.getenv("CLINICAL_NOTES_SOURCE")
     if clinical_notes_source == "fhir":
@@ -113,7 +117,12 @@ def create_data_access(
     return DataAccess(
         blob_sas_delegate=BlobSasDelegate(blob_service_client),
         chat_artifact_accessor=ChatArtifactAccessor(blob_service_client),
-        chat_context_accessor=ChatContextAccessor(blob_service_client),
+        chat_context_accessor=ChatContextAccessor(
+            blob_service_client,
+            cognitive_services_token_provider=get_bearer_token_provider(
+                credential, "https://cognitiveservices.azure.com/.default")
+        ),
         clinical_note_accessor=clinical_note_accessor,
         image_accessor=ImageAccessor(blob_service_client),
+        patient_context_registry_accessor=PatientContextRegistryAccessor(blob_service_client),
     )
diff --git a/src/data_models/patient_context_accessor.py b/src/data_models/patient_context_accessor.py
new file mode 100644
index 0000000..dcc4a1d
--- /dev/null
+++ b/src/data_models/patient_context_accessor.py
@@ -0,0 +1,117 @@
+import json
+import logging
+from datetime import datetime, timezone
+from time import time
+from typing import Dict, Optional, Tuple
+
+from azure.core.exceptions import ResourceNotFoundError
+from azure.storage.blob.aio import BlobServiceClient
+
+logger = logging.getLogger(__name__)
+
+
+class PatientContextRegistryAccessor:
+    """
+    Manages patient context registry JSON files in blob storage.
+    Tracks which patients have been encountered in each conversation session.
+    """
+
+    def __init__(self, blob_service_client: BlobServiceClient, container_name: str = "chat-sessions"):
+        self.blob_service_client = blob_service_client
+        self.container_client = blob_service_client.get_container_client(container_name)
+
+    def get_registry_blob_path(self, conversation_id: str) -> str:
+        """Get blob path for patient context registry file."""
+        return f"{conversation_id}/patient_context_registry.json"
+
+    async def _write_json_to_blob(self, blob_path: str, data: dict) -> None:
+        """Write JSON data to blob storage."""
+        json_str = json.dumps(data, indent=2)
+        blob_client = self.container_client.get_blob_client(blob_path)
+        await blob_client.upload_blob(json_str, overwrite=True)
+
+    async def read_registry(self, conversation_id: str) -> Tuple[Dict[str, Dict], Optional[str]]:
+        """Read patient context registry. Returns (patient_registry, active_patient_id)."""
+        start = time()
+        try:
+            blob_path = self.get_registry_blob_path(conversation_id)
+            blob_client = self.container_client.get_blob_client(blob_path)
+            blob = await blob_client.download_blob()
+            blob_str = await blob.readall()
+            decoded_str = blob_str.decode("utf-8")
+            registry_data = json.loads(decoded_str)
+
+            logger.info(f"Read patient context registry for {conversation_id}. Duration: {time() - start}s")
+            return registry_data.get("patient_registry", {}), registry_data.get("active_patient_id")
+
+        except ResourceNotFoundError:
+            logger.info(f"No existing patient context registry for {conversation_id}")
+            return {}, None
+        except Exception as e:
+            logger.warning(f"Failed to read patient context registry for {conversation_id}: {e}")
+            return {}, None
+
+    async def write_registry(self, conversation_id: str, patient_registry: Dict[str, Dict], active_patient_id: str = None):
+        """Write patient context registry to blob storage."""
+        try:
+            registry_data = {
+                "conversation_id": conversation_id,
+                "active_patient_id": active_patient_id,
+                "patient_registry": patient_registry,
+                "last_updated": datetime.utcnow().isoformat()
+            }
+
+            blob_path = self.get_registry_blob_path(conversation_id)
+            await self._write_json_to_blob(blob_path, registry_data)
+            logger.info(f"Wrote patient registry for conversation {conversation_id}")
+
+        except Exception as e:
+            logger.error(f"Failed to write patient registry: {e}")
+            raise
+
+    async def update_patient_registry(self, conversation_id: str, patient_id: str, registry_entry: Dict, active_patient_id: str = None) -> None:
+        """Update registry entry for a specific patient in the conversation."""
+        current_registry, current_active = await self.read_registry(conversation_id)
+        current_registry[patient_id] = {
+            **registry_entry,
+            "last_updated": datetime.now(timezone.utc).isoformat()
+        }
+        # Use provided active_patient_id or keep current
+        final_active = active_patient_id if active_patient_id is not None else current_active
+        await self.write_registry(conversation_id, current_registry, final_active)
+
+    async def archive_registry(self, conversation_id: str) -> None:
+        """Archive patient context registry before clearing."""
+        start = time()
+        try:
+            # Read current registry
+            current_registry, active_patient_id = await self.read_registry(conversation_id)
+            if not current_registry:
+                logger.info("No patient context registry to archive for %s", conversation_id)
+                return
+
+            # Create archive
+            timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
+            archive_blob_path = "%s/%s_patient_context_registry_archived.json" % (conversation_id, timestamp)
+
+            archive_data = {
+                "conversation_id": conversation_id,
+                "archived_at": datetime.now(timezone.utc).isoformat(),
+                "active_patient_id": active_patient_id,
+                "patient_registry": current_registry
+            }
+
+            await self._write_json_to_blob(archive_blob_path, archive_data)
+
+            # Clear current registry by deleting the blob
+            try:
+                blob_path = self.get_registry_blob_path(conversation_id)
+                await self.container_client.delete_blob(blob_path)
+                logger.info("Cleared patient context registry for %s", conversation_id)
+            except ResourceNotFoundError:
+                logger.info("No patient context registry to clear for %s", conversation_id)
+
+            logger.info("Archived patient context registry for %s. Duration: %ss", conversation_id, time() - start)
+        except Exception as e:
+            logger.error("Failed to archive patient context registry for %s: %s", conversation_id, e)
+            raise
diff --git a/src/data_models/patient_context_models.py b/src/data_models/patient_context_models.py
new file mode 100644
index 0000000..36631cf
--- /dev/null
+++ b/src/data_models/patient_context_models.py
@@ -0,0 +1,137 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from typing import Optional, List, Literal
+from semantic_kernel.kernel_pydantic import KernelBaseModel
+from pydantic import BaseModel, Field
+
+# Update this to match your current analyzer actions
+AnalyzerAction = Literal["NONE", "CLEAR", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED"]
+
+
+class TimingInfo(BaseModel):
+    """Timing information for patient context operations."""
+    analyzer: float = Field(0.0, description="Time spent in analyzer (seconds)")
+    storage_fallback: float = Field(0.0, description="Time spent in storage fallback (seconds)")
+    service: float = Field(0.0, description="Total service processing time (seconds)")
+
+
+class PatientContextDecision(KernelBaseModel):
+    """
+    Structured output for patient context analysis decisions.
+    Uses KernelBaseModel for proper JSON schema generation with Semantic Kernel.
+    """
+    action: AnalyzerAction = Field(
+        ...,
+        description="The action to take: NONE, CLEAR, ACTIVATE_NEW, SWITCH_EXISTING, or UNCHANGED"
+    )
+    patient_id: Optional[str] = Field(
+        None,
+        description="The patient ID if the action involves a specific patient (format: patient_X)"
+    )
+    reasoning: str = Field(
+        ...,
+        description="Brief explanation of why this decision was made (max 50 words)"
+    )
+
+
+class WorkflowStep(BaseModel):
+    """A single step in the workflow plan."""
+    agent: str = Field(..., description="Name of the agent responsible for this step")
+    task: str = Field(..., description="Description of the task to be performed")
+    status: str = Field("pending", description="Status: pending, in_progress, completed")
+
+
+class WorkflowSummary(KernelBaseModel):
+    """
+    Structured workflow summary for tracking conversation progress.
+    Uses KernelBaseModel for proper JSON schema generation with Semantic Kernel.
+    """
+    patient_id: Optional[str] = Field(None, description="The patient ID this workflow relates to (if applicable)")
+    objective: str = Field(..., description="The main objective of this workflow")
+    steps: List[WorkflowStep] = Field(..., description="List of workflow steps")
+    current_step: int = Field(0, description="Index of the current step being executed")
+    reasoning: str = Field(..., description="Summary of the workflow reasoning")
+
+
+class PatientContextSystemMessage(KernelBaseModel):
+    """System message payload for patient context."""
+    conversation_id: str = Field(..., description="Conversation identifier")
+    patient_id: str = Field(..., description="Active patient identifier")
+    all_patient_ids: List[str] = Field(..., description="All known patient IDs in this session")
+    timing_sec: TimingInfo = Field(..., description="Timing information for this operation")
+
+
+class PatientRegistryEntry(BaseModel):
+    """Registry entry for patient tracking."""
+    patient_id: str = Field(..., description="Patient identifier")
+    created_at: str = Field(..., description="ISO timestamp when patient was created")
+    last_accessed: str = Field(..., description="ISO timestamp when patient was last accessed")
+    message_count: int = Field(0, description="Number of messages for this patient")
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+
+# Update this to match your current analyzer actions
+AnalyzerAction = Literal["NONE", "CLEAR", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED"]
+
+
+class TimingInfo(BaseModel):
+    """Timing information for patient context operations."""
+    analyzer: float = Field(0.0, description="Time spent in analyzer (seconds)")
+    storage_fallback: float = Field(0.0, description="Time spent in storage fallback (seconds)")
+    service: float = Field(0.0, description="Total service processing time (seconds)")
+
+
+class PatientContextDecision(KernelBaseModel):
+    """
+    Structured output for patient context analysis decisions.
+    Uses KernelBaseModel for proper JSON schema generation with Semantic Kernel.
+    """
+    action: AnalyzerAction = Field(
+        ...,
+        description="The action to take: NONE, CLEAR, ACTIVATE_NEW, SWITCH_EXISTING, or UNCHANGED"
+    )
+    patient_id: Optional[str] = Field(
+        None,
+        description="The patient ID if the action involves a specific patient (format: patient_X)"
+    )
+    reasoning: str = Field(
+        ...,
+        description="Brief explanation of why this decision was made (max 50 words)"
+    )
+
+
+class WorkflowStep(BaseModel):
+    """A single step in the workflow plan."""
+    agent: str = Field(..., description="Name of the agent responsible for this step")
+    task: str = Field(..., description="Description of the task to be performed")
+    status: str = Field("pending", description="Status: pending, in_progress, completed")
+
+
+class WorkflowSummary(KernelBaseModel):
+    """
+    Structured workflow summary for tracking conversation progress.
+    Uses KernelBaseModel for proper JSON schema generation with Semantic Kernel.
+    """
+    patient_id: Optional[str] = Field(None, description="The patient ID this workflow relates to (if applicable)")
+    objective: str = Field(..., description="The main objective of this workflow")
+    steps: List[WorkflowStep] = Field(..., description="List of workflow steps")
+    current_step: int = Field(0, description="Index of the current step being executed")
+    reasoning: str = Field(..., description="Summary of the workflow reasoning")
+
+
+class PatientContextSystemMessage(KernelBaseModel):
+    """System message payload for patient context."""
+    conversation_id: str = Field(..., description="Conversation identifier")
+    patient_id: str = Field(..., description="Active patient identifier")
+    all_patient_ids: List[str] = Field(..., description="All known patient IDs in this session")
+    timing_sec: TimingInfo = Field(..., description="Timing information for this operation")
+
+
+class PatientRegistryEntry(BaseModel):
+    """Registry entry for patient tracking."""
+    patient_id: str = Field(..., description="Patient identifier")
+    created_at: str = Field(..., description="ISO timestamp when patient was created")
+    last_accessed: str = Field(..., description="ISO timestamp when patient was last accessed")
+    message_count: int = Field(0, description="Number of messages for this patient")
diff --git a/src/evaluation/chat_simulator.py b/src/evaluation/chat_simulator.py
index 59fc60f..829772c 100644
--- a/src/evaluation/chat_simulator.py
+++ b/src/evaluation/chat_simulator.py
@@ -51,11 +51,6 @@ def setup(self, patient_id: str, initial_query: str, followup_questions: list[st
         self.followup_asked = False
 
     async def generate_user_message(self, chat_history: ChatHistory) -> str:
-        if not self.followup_asked and self.followup_questions:
-            self.followup_asked = True
-            if self.followup_questions:
-                next_question = self.followup_questions.pop(0)
-                return f"Orchestrator: {next_question}"
         if not self.followup_asked and self.followup_questions:
             self.followup_asked = True
             if self.followup_questions:
@@ -72,7 +67,7 @@ def __init__(self):
         self.chat_history = ChatHistory()
         self.chat_completion_service = AzureChatCompletion(
             deployment_name=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
-            api_version="2024-12-01-preview",
+            api_version="2025-04-01-preview",
             endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
         )
 
@@ -142,7 +137,7 @@ def _extract_new_messages(self, chat_history: ChatHistory) -> list[ChatMessageCo
             ChatHistory chat_history: The chat history containing messages from the user and agents.
 
         Returns:
-            str: String representation of the latest messages.
+            list[ChatMessageContent]: List of new messages since the last user message.
         """
         last_user_message_idx = -1
         for i, message in enumerate(chat_history.messages):
@@ -156,7 +151,7 @@ def _transform_chat_history(self, messages: list[ChatMessageContent]) -> str:
         Transforms the chat history into a format suitable for the LLM simulation.
 
         Args:
-            ChatHistory chat_history: The chat history containing messages from the user and agents.
+            messages: List of chat messages to transform.
 
         Returns:
             str: String representation of the chat history.
@@ -173,6 +168,9 @@ class ChatSimulator:
     """
     Class to simulate a chat with a group of agents.
 
+    Handles patient context isolation during simulation by ensuring proper
+    patient context setup and cleanup between simulation runs.
+
     Attributes:
         simulated_user: The simulated user to interact with the agents.
         group_chat_kwargs: Additional arguments for the group chat.
@@ -183,7 +181,6 @@ class ChatSimulator:
             Can be loaded from a CSV file with `load_initial_queries`.
         followup_questions: Optional list of follow-up questions for the simulation.
             Can be loaded from a CSV file with `load_initial_queries`.
-        group_followups: Whether to group follow-up questions by initial query.
         trial_count: Number of trials for each initial query.
         max_turns: Maximum number of turns in the conversation.
         output_folder_path: Path to the folder where chat history will be saved.
@@ -242,7 +239,18 @@ def setup_group_chat(self, chat_id: str, **kwargs) -> None:
             kwargs: Additional arguments to be passed to the group chat creation function.
         """
         if "chat_ctx" not in kwargs:
-            kwargs["chat_ctx"] = ChatContext(chat_id)
+            # Create chat context with patient context support
+            chat_context = ChatContext(chat_id)
+            # Extract patient ID from chat_id if it follows pattern
+            if "patient_" in chat_id:
+                parts = chat_id.split("_")
+                for i, part in enumerate(parts):
+                    if part == "patient" and i + 1 < len(parts):
+                        patient_id = f"patient_{parts[i + 1]}"
+                        chat_context.patient_id = patient_id
+                        logging.debug(f"Set patient context for simulation: {patient_id}")
+                        break
+            kwargs["chat_ctx"] = chat_context
         self.group_chat, self.chat_context = create_group_chat(**kwargs)
 
         return self
@@ -265,6 +273,7 @@ def load_initial_queries(
             initial_queries_column: Name of the column containing initial queries.
             followup_column: Name of the column containing follow-up questions.
             delimiter: Delimiter used in the CSV file (default is comma).
+            group_followups: Whether to group follow-up questions by initial query.
 
         Returns:
             self: Returns the instance for method chaining.
@@ -313,14 +322,16 @@ async def simulate_chats(self):
                         f"Setting up simulated user with initial query: {initial_query} and followups: {followup_questions}"
                     )
 
-                    self.setup_group_chat(checkpoint_key, **self.group_chat_kwargs)
+                    # Create chat ID that includes patient information for context isolation
+                    chat_id = f"sim_{patient_id}_{trial}_{checkpoint_key[:8]}"
+                    self.setup_group_chat(chat_id, **self.group_chat_kwargs)
 
                     await self.chat(patient_id, initial_query, followup_questions, self.max_turns)
                     self.save(f"chat_context_trial{trial}_{checkpoint_key}.json",
                               save_readable_history=self.save_readable_history)
                 except Exception as e:
                     logging.error(
-                        f"Error during conversation with initial query: {initial_query} and followup: {followup_questions[0]}: {e}")
+                        f"Error during conversation with initial query: {initial_query} and followup: {followup_questions[0] if followup_questions else 'None'}: {e}")
                     if self.raise_errors:
                         raise e
                     else:
@@ -357,7 +368,7 @@ async def chat(self, patient_id: str, initial_query: str, followup_questions: li
             try:
                 new_user_message = await self.simulated_user.generate_user_message(self.group_chat.history)
             except Exception as e:
-                print(f"Error generating user message: {e}")
+                logging.error(f"Error generating user message: {e}")
                 break
 
             if self.simulated_user.is_complete:
@@ -414,16 +425,18 @@ def save(self, output_filename: str = None, save_readable_history: bool = False)
             output_filename
         )
 
-        with open(output_file_path, 'w') as f:
+        with open(output_file_path, 'w', encoding="utf-8") as f:
             # Save the chat history to a file
             f.write(group_chat_context)
 
         if save_readable_history:
             messages = chat_history_to_readable_text(self.group_chat.history)
             readable_filename = output_file_path.replace(".json", "_readable.txt")
-            with open(readable_filename, 'w') as f:
+            with open(readable_filename, 'w', encoding="utf-8") as f:
                 f.write(messages)
 
+        logging.info(f"Saved simulation results to {output_file_path}")
+
         return self
 
     def _print_message(self, message: ChatMessageContent):
@@ -449,6 +462,7 @@ def _process_csv_content(
             patients_id_column: Name of the column containing patient IDs.
             initial_queries_column: Name of the column containing initial queries.
             followup_column: Name of the column containing follow-up questions.
+            group_followups: Whether to group follow-up questions by initial query.
 
         Raises:
             ValueError: If the specified columns are not found in the CSV file.
@@ -457,7 +471,7 @@ def _process_csv_content(
             raise ValueError(f"Column '{initial_queries_column}' not found in the CSV file.")
 
         if patients_id_column not in reader.fieldnames:
-            raise ValueError(f"Columns '{patients_id_column}' not found in the CSV file.")
+            raise ValueError(f"Column '{patients_id_column}' not found in the CSV file.")
 
         followup_column_available = followup_column is not None and followup_column in reader.fieldnames
 
@@ -507,10 +521,13 @@ def _load_checkpoint(self) -> set:
 
     def _save_checkpoint(self, query: str):
         """Save a completed query to the checkpoint file."""
-        with open(self.checkpoint_file, "w+", encoding="utf-8") as f:
+        with open(self.checkpoint_file, "a", encoding="utf-8") as f:
             f.write(f"{query}\n")
 
     def _generate_chat_unique_id(self, patient_id: str, initial_query: str, followup_questions: list[str]) -> str:
         """Generate a unique ID for the chat based on patient ID, initial query, and follow-up questions."""
         return hashlib.sha256(
             f"{patient_id}{initial_query}{"".join(followup_questions)}{type(self.simulated_user).__name__}".encode()).hexdigest()
+
+
+logger = logging.getLogger(__name__)
diff --git a/src/group_chat.py b/src/group_chat.py
index 0ef1956..2a891b9 100644
--- a/src/group_chat.py
+++ b/src/group_chat.py
@@ -4,11 +4,12 @@
 import importlib
 import logging
 import os
-from typing import Any, Awaitable, Callable, Tuple
+from typing import Any, Awaitable, Callable, Tuple, override
 
 from pydantic import BaseModel
 from semantic_kernel import Kernel
 from semantic_kernel.agents import AgentGroupChat, ChatCompletionAgent
+from semantic_kernel.agents.channels.chat_history_channel import ChatHistoryChannel
 from semantic_kernel.agents.strategies.selection.kernel_function_selection_strategy import \
     KernelFunctionSelectionStrategy
 from semantic_kernel.agents.strategies.termination.kernel_function_termination_strategy import \
@@ -18,13 +19,18 @@
     AzureChatPromptExecutionSettings
 from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
 from semantic_kernel.connectors.openapi_plugin import OpenAPIFunctionExecutionParameters
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
 from semantic_kernel.functions.kernel_function_from_prompt import KernelFunctionFromPrompt
 from semantic_kernel.kernel import Kernel, KernelArguments
+from semantic_kernel.contents import AuthorRole, ChatMessageContent
+from semantic_kernel.contents import TextContent
 
 from data_models.app_context import AppContext
 from data_models.chat_context import ChatContext
 from data_models.plugin_configuration import PluginConfiguration
+from data_models.patient_context_models import WorkflowSummary
 from healthcare_agents import HealthcareAgent
 from healthcare_agents import config as healthcare_agent_config
 
@@ -35,7 +41,35 @@
 logger = logging.getLogger(__name__)
 
 
+class CustomHistoryChannel(ChatHistoryChannel):
+    @override
+    async def receive(self, history: list[ChatMessageContent],) -> None:
+        await super().receive(history)
+        for message in history[:-1]:
+            await self.thread.on_new_message(message)
+
+
+class CustomChatCompletionAgent(ChatCompletionAgent):
+    """Custom ChatCompletionAgent to override the create_channel method."""
+
+    @override
+    async def create_channel(
+        self, chat_history: ChatHistory | None = None, thread_id: str | None = None
+    ) -> CustomHistoryChannel:
+        from semantic_kernel.agents.chat_completion.chat_completion_agent import ChatHistoryAgentThread
+
+        CustomHistoryChannel.model_rebuild()
+        thread = ChatHistoryAgentThread(chat_history=chat_history, thread_id=thread_id)
+
+        if thread.id is None:
+            await thread.create()
+
+        messages = [message async for message in thread.get_messages()]
+        return CustomHistoryChannel(messages=messages, thread=thread)
+
+
 class ChatRule(BaseModel):
+    """Structured output model for group chat selection and termination decisions."""
     verdict: str
     reasoning: str
 
@@ -44,27 +78,195 @@ def create_auth_callback(chat_ctx: ChatContext) -> Callable[..., Awaitable[Any]]
     """
     Creates an authentication callback for the plugin configuration.
 
-    :param chat_ctx: The chat context to be used in the authentication.
-    :return: A callable that returns an authentication token.
+    Args:
+        chat_ctx: The chat context to be used in the authentication.
+
+    Returns:
+        A callable that returns an authentication token.
     """
-    # TODO - get key or secret from Azure Key Vault for OpenAPI services.
-    # Send the conversation ID as a header to the OpenAPI service.
     return lambda: {'conversation-id': chat_ctx.conversation_id, }
 
 
+def inject_workflow_summary(chat_ctx: ChatContext) -> None:
+    """Inject workflow summary if available."""
+    if (hasattr(chat_ctx, 'workflow_summary') and
+        chat_ctx.workflow_summary and
+            chat_ctx.patient_id):
+
+        # Check if already injected
+        for msg in chat_ctx.chat_history.messages:
+            if (msg.role == AuthorRole.SYSTEM and
+                isinstance(msg.content, str) and
+                    "WORKFLOW_SUMMARY:" in msg.content):
+                return
+
+        # Inject summary with proper items initialization
+        summary_message = ChatMessageContent(
+            role=AuthorRole.SYSTEM,
+            items=[TextContent(text=f"WORKFLOW_SUMMARY: {chat_ctx.workflow_summary}")]
+        )
+        chat_ctx.chat_history.messages.insert(1, summary_message)
+        logger.info(f"Injected workflow summary for patient {chat_ctx.patient_id}")
+
+
+async def generate_workflow_summary(
+    chat_ctx: ChatContext,
+    kernel: Kernel,
+    patient_id: str,
+    objective: str
+) -> WorkflowSummary:
+    """
+    Generate structured workflow summary using WorkflowSummary model.
+    This implements structured output for workflow planning.
+
+    Args:
+        chat_ctx: The chat context for conversation history
+        kernel: Semantic kernel instance for LLM interaction
+        patient_id: The patient identifier
+        objective: The main workflow objective
+
+    Returns:
+        WorkflowSummary: Structured workflow with agent assignments and tasks
+    """
+
+    # Build context from chat history
+    recent_messages = chat_ctx.chat_history.messages[-10:] if len(
+        chat_ctx.chat_history.messages) > 10 else chat_ctx.chat_history.messages
+    context = "\n".join([f"{msg.role}: {msg.content}" for msg in recent_messages])
+
+    workflow_prompt = f"""
+    You are a healthcare workflow coordinator. Analyze the conversation and create a structured workflow summary.
+    
+    CONTEXT:
+    - Patient ID: {patient_id}
+    - Objective: {objective}
+    - Recent conversation: {context}
+    
+    Create a workflow with specific steps for each agent to follow. Each step should:
+    1. Assign a specific agent (PatientHistory, ClinicalGuidelines, MedicalResearch, etc.)
+    2. Define a clear task for that agent
+    3. Set appropriate status (pending, in_progress, completed)
+    
+    Focus on the main healthcare objective and break it into logical agent-specific steps.
+    Keep reasoning concise and actionable.
+    """
+
+    try:
+        chat_history = ChatHistory()
+        chat_history.add_system_message(workflow_prompt)
+
+        # Use structured output for workflow planning
+        execution_settings = AzureChatPromptExecutionSettings(
+            service_id="default",
+            max_tokens=500,
+            temperature=0.2,
+            response_format=WorkflowSummary,  # This generates the JSON schema automatically
+        )
+
+        svc = kernel.get_service("default")
+        results = await svc.get_chat_message_contents(
+            chat_history=chat_history,
+            settings=execution_settings,
+        )
+
+        if not results or not results[0].content:
+            logger.warning("No workflow summary generated")
+            # Fallback workflow
+            from data_models.patient_context_models import WorkflowStep
+            return WorkflowSummary(
+                patient_id=patient_id,
+                objective=objective,
+                steps=[
+                    WorkflowStep(agent="Orchestrator", task="Coordinate healthcare workflow", status="pending")
+                ],
+                current_step=0,
+                reasoning="Fallback workflow due to generation failure"
+            )
+
+        content = results[0].content
+
+        # Parse structured response
+        if isinstance(content, str):
+            try:
+                workflow = WorkflowSummary.model_validate_json(content)
+            except Exception as e:
+                logger.error(f"Failed to parse workflow summary: {e}")
+                # Return fallback
+                from data_models.patient_context_models import WorkflowStep
+                return WorkflowSummary(
+                    patient_id=patient_id,
+                    objective=objective,
+                    steps=[WorkflowStep(agent="Orchestrator", task="Coordinate workflow", status="pending")],
+                    current_step=0,
+                    reasoning=f"Parse error: {str(e)[:30]}..."
+                )
+        elif isinstance(content, dict):
+            try:
+                workflow = WorkflowSummary.model_validate(content)
+            except Exception as e:
+                logger.error(f"Failed to validate workflow summary: {e}")
+                from data_models.patient_context_models import WorkflowStep
+                return WorkflowSummary(
+                    patient_id=patient_id,
+                    objective=objective,
+                    steps=[WorkflowStep(agent="Orchestrator", task="Coordinate workflow", status="pending")],
+                    current_step=0,
+                    reasoning=f"Validation error: {str(e)[:30]}..."
+                )
+        else:
+            logger.warning(f"Unexpected workflow response type: {type(content)}")
+            from data_models.patient_context_models import WorkflowStep
+            return WorkflowSummary(
+                patient_id=patient_id,
+                objective=objective,
+                steps=[WorkflowStep(agent="Orchestrator", task="Coordinate workflow", status="pending")],
+                current_step=0,
+                reasoning="Unexpected response format"
+            )
+
+        logger.info(f"Generated workflow summary with {len(workflow.steps)} steps for patient {patient_id}")
+        return workflow
+
+    except Exception as e:
+        logger.error(f"Workflow summary generation failed: {e}")
+        from data_models.patient_context_models import WorkflowStep
+        return WorkflowSummary(
+            patient_id=patient_id,
+            objective=objective,
+            steps=[WorkflowStep(agent="Orchestrator", task="Coordinate workflow", status="pending")],
+            current_step=0,
+            reasoning=f"Generation error: {str(e)[:30]}..."
+        )
+
+
 def create_group_chat(
     app_ctx: AppContext, chat_ctx: ChatContext, participants: list[dict] = None
 ) -> Tuple[AgentGroupChat, ChatContext]:
+    """
+    Create a multi-agent group chat with structured output strategies.
+
+    Args:
+        app_ctx: Application context containing shared resources
+        chat_ctx: Chat context for conversation state
+        participants: Optional list of participant configurations
+
+    Returns:
+        Tuple of AgentGroupChat instance and updated ChatContext
+    """
     participant_configs = participants or app_ctx.all_agent_configs
     participant_names = [cfg.get("name") for cfg in participant_configs]
     logger.info(f"Creating group chat with participants: {participant_names}")
 
-    # Remove magentic agent from the list of agents. In the future, we could add agent type to deal with agents that should not be included in the Semantic Kernel group chat.
+    # Inject workflow summary before creating agents
+    inject_workflow_summary(chat_ctx)
+
+    # Remove magentic agent from the list of agents
     all_agents_config = [
         agent for agent in participant_configs if agent.get("name") != "magentic"
     ]
 
     def _create_kernel_with_chat_completion() -> Kernel:
+        """Create a kernel instance with Azure OpenAI chat completion service."""
         kernel = Kernel()
         kernel.add_service(
             AzureChatCompletion(
@@ -77,6 +279,7 @@ def _create_kernel_with_chat_completion() -> Kernel:
         return kernel
 
     def _create_agent(agent_config: dict):
+        """Create an agent instance based on configuration."""
         agent_kernel = _create_kernel_with_chat_completion()
         plugin_config = PluginConfiguration(
             kernel=agent_kernel,
@@ -98,7 +301,6 @@ def _create_agent(agent_config: dict):
                 tool_module = importlib.import_module(f"scenarios.{scenario}.tools.{tool_name}")
                 agent_kernel.add_plugin(tool_module.create_plugin(plugin_config), plugin_name=tool_name)
             # Add OpenAPI tools
-            # See https://github.com/Azure-Samples/healthcare-agent-orchestrator/blob/main/docs/agent_development.md#agent-with-a-openapi-plugin-example
             elif tool_type == "openapi":
                 openapi_document_path = tool.get("openapi_document_path")
                 server_url_override = tool.get("server_url_override")
@@ -123,21 +325,52 @@ def _create_agent(agent_config: dict):
             instructions = instructions.replace(
                 "{{aiAgents}}", "\n\t\t".join([f"- {agent['name']}: {agent["description"]}" for agent in all_agents_config]))
 
-        return (ChatCompletionAgent(service_id="default",
-                                    kernel=agent_kernel,
-                                    name=agent_config["name"],
-                                    instructions=instructions,
-                                    arguments=arguments) if not is_healthcare_agent else
+        return (CustomChatCompletionAgent(kernel=agent_kernel,
+                                          name=agent_config["name"],
+                                          instructions=instructions,
+                                          description=agent_config.get("description", ""),
+                                          arguments=arguments) if not is_healthcare_agent else
                 HealthcareAgent(name=agent_config["name"],
                                 chat_ctx=chat_ctx,
                                 app_ctx=app_ctx))
 
-    settings = AzureChatPromptExecutionSettings(
-        function_choice_behavior=FunctionChoiceBehavior.Auto(), temperature=DEFAULT_MODEL_TEMP, seed=42, response_format=ChatRule)
-    arguments = KernelArguments(settings=settings)
+    # Create kernel for orchestrator functions
+    orchestrator_kernel = _create_kernel_with_chat_completion()
 
+    # Find facilitator agent
     facilitator_agent = next((agent for agent in all_agents_config if agent.get("facilitator")), all_agents_config[0])
     facilitator = facilitator_agent["name"]
+
+    # Structured output for selection/termination decisions
+    settings = AzureChatPromptExecutionSettings(
+        function_choice_behavior=FunctionChoiceBehavior.Auto(),
+        temperature=DEFAULT_MODEL_TEMP,
+        seed=42,
+        response_format=ChatRule
+    )
+    arguments = KernelArguments(settings=settings)
+
+    async def create_workflow_summary_if_needed():
+        """Generate workflow summary for new patient workflows."""
+        if chat_ctx.patient_id and not hasattr(chat_ctx, 'workflow_summary'):
+            # Determine objective from recent conversation
+            objective = "Provide comprehensive healthcare assistance"
+            if len(chat_ctx.chat_history.messages) > 0:
+                last_msg = chat_ctx.chat_history.messages[-1].content
+                if isinstance(last_msg, str) and len(last_msg) > 10:
+                    objective = f"Address user request: {last_msg[:100]}..."
+
+            workflow = await generate_workflow_summary(
+                chat_ctx=chat_ctx,
+                kernel=orchestrator_kernel,
+                patient_id=chat_ctx.patient_id,
+                objective=objective
+            )
+
+            # Store workflow summary in chat context
+            chat_ctx.workflow_summary = workflow.model_dump_json()
+            logger.info(f"Generated new workflow summary for patient {chat_ctx.patient_id}")
+
     selection_function = KernelFunctionFromPrompt(
         function_name="selection",
         prompt=f"""
@@ -149,6 +382,8 @@ def _create_agent(agent_config: dict):
 
         2. **General Rules**:
             - **{facilitator} Always Starts**: {facilitator} always goes first to formulate a plan. If the only message is from the user, {facilitator} goes next.
+            - **Check Workflow Progress**: Look for WORKFLOW_SUMMARY messages to understand what stage of the process we're in
+            - **Avoid Repetition**: If an agent has already completed their task (according to workflow summary), don't select them again unless specifically requested
             - **Interactions between agents**: Agents may talk among themselves. If an agent requires information from another agent, that agent should go next.
                 EXAMPLE:
                     "*agent_name*, please provide ..." then agent_name goes next.
@@ -203,24 +438,27 @@ def _create_agent(agent_config: dict):
         """,
         prompt_execution_settings=settings
     )
+
     agents = [_create_agent(agent) for agent in all_agents_config]
 
     def evaluate_termination(result):
+        """Evaluate termination decision from structured output."""
         try:
             rule = ChatRule.model_validate_json(str(result.value[0]))
             should_terminate = rule.verdict == "yes"
-            logger.debug(f"Termination decision: {should_terminate}")
+            logger.debug(f"Termination decision: {should_terminate} | Reasoning: {rule.reasoning}")
             return should_terminate
         except Exception as e:
             logger.error(f"Termination function error: {e}")
             return False  # Fallback to continue conversation
 
     def evaluate_selection(result):
+        """Evaluate agent selection from structured output."""
         try:
             rule = ChatRule.model_validate_json(str(result.value[0]))
             selected_agent = rule.verdict if rule.verdict in [agent["name"]
                                                               for agent in all_agents_config] else facilitator
-            logger.debug(f"Selected agent: {selected_agent}")
+            logger.debug(f"Selected agent: {selected_agent} | Reasoning: {rule.reasoning}")
             return selected_agent
         except Exception as e:
             logger.error(f"Selection function error: {e}")
@@ -231,7 +469,7 @@ def evaluate_selection(result):
         chat_history=chat_ctx.chat_history,
         selection_strategy=KernelFunctionSelectionStrategy(
             function=selection_function,
-            kernel=_create_kernel_with_chat_completion(),
+            kernel=orchestrator_kernel,
             result_parser=evaluate_selection,
             agent_variable_name="agents",
             history_variable_name="history",
@@ -242,7 +480,7 @@ def evaluate_selection(result):
                 agent for agent in agents if agent.name == facilitator
             ],  # Only facilitator decides if the conversation ends
             function=termination_function,
-            kernel=_create_kernel_with_chat_completion(),
+            kernel=orchestrator_kernel,
             result_parser=evaluate_termination,
             agent_variable_name="agents",
             history_variable_name="history",
@@ -255,4 +493,5 @@ def evaluate_selection(result):
         ),
     )
 
+    logger.info(f"Group chat created successfully with {len(agents)} agents")
     return (chat, chat_ctx)
diff --git a/src/healthcare_agents/agent.py b/src/healthcare_agents/agent.py
index e05a5dd..b3b1dba 100644
--- a/src/healthcare_agents/agent.py
+++ b/src/healthcare_agents/agent.py
@@ -9,7 +9,7 @@
 from azure.keyvault.secrets.aio import SecretClient
 from semantic_kernel.agents.agent import Agent
 from semantic_kernel.agents.channels.agent_channel import AgentChannel
-from semantic_kernel.contents import AuthorRole, ChatMessageContent
+from semantic_kernel.contents import AuthorRole, ChatMessageContent, TextContent
 from semantic_kernel.exceptions import AgentInvokeException
 
 from data_models.app_context import AppContext
@@ -26,12 +26,12 @@ class HealthcareAgentChannel(AgentChannel):
     def __init__(self):
         super().__init__()
         self.history: list[ChatMessageContent] = []
-        logger.debug("HealthcareAgentChannel initialized.")
+        logger.debug("HealthcareAgentChannel initialized")
 
     @override
     async def receive(self, history: list[ChatMessageContent]) -> None:
         for message in history:
-            logger.debug("[history] Received message: %s", message.content)
+            logger.debug("Received message: %s", message.content)
             if message.content.strip() != "":
                 self.history.append(message)
 
@@ -39,8 +39,10 @@ async def receive(self, history: list[ChatMessageContent]) -> None:
     async def invoke(self, agent: "HealthcareAgent") -> AsyncIterable[tuple[bool, ChatMessageContent]]:
         logger.debug("Invoking agent: %s, with user input: %s", agent.name, self.history[-1].content)
         user_input = self.history[-1].content
-        user_message = ChatMessageContent(role=AuthorRole.USER,
-                                          content=user_input)
+        user_message = ChatMessageContent(
+            role=AuthorRole.USER,
+            items=[TextContent(text=str(user_input))]
+        )
         self.history.append(user_message)
 
         if agent.client:
@@ -49,7 +51,8 @@ async def invoke(self, agent: "HealthcareAgent") -> AsyncIterable[tuple[bool, Ch
             response_message = ChatMessageContent(
                 role=AuthorRole.ASSISTANT,
                 name=agent.name,
-                content=response_dict.get("text", ""))
+                items=[TextContent(text=response_dict.get("text", ""))]
+            )
             self.history.append(response_message)
             yield True, response_message
         else:
@@ -57,17 +60,17 @@ async def invoke(self, agent: "HealthcareAgent") -> AsyncIterable[tuple[bool, Ch
 
     @override
     async def invoke_stream(self, agent: "HealthcareAgent", history: "list[ChatMessageContent]"):
-        raise NotImplementedError("invoke_stream is not implemented yet.")
+        raise NotImplementedError("invoke_stream is not implemented yet")
 
     @override
     async def get_history(self) -> AsyncIterable[ChatMessageContent]:
-        logger.debug("Getting history from HealthcareAgentChannel.")
+        logger.debug("Getting history from HealthcareAgentChannel")
         for message in reversed(self.history):
             yield message
 
     @override
     async def reset(self) -> None:
-        logger.debug("Resetting HealthcareAgentChannel.")
+        logger.debug("Resetting HealthcareAgentChannel")
         self.history.clear()
 
 
@@ -88,14 +91,14 @@ def __init__(self,
         self._client: HealthcareAgentServiceClient = None
 
         if not name:
-            raise ValueError("Agent name is required.")
+            raise ValueError("Agent name is required")
         if not chat_ctx:
-            raise ValueError("Chat context is required.")
+            raise ValueError("Chat context is required")
         if not app_ctx:
-            raise ValueError("Application context is required.")
+            raise ValueError("Application context is required")
 
         # Initialize the HealthcareAgentServiceClient
-        logger.debug("Initializing HealthcareAgentServiceClient.")
+        logger.debug("Initializing HealthcareAgentServiceClient")
         self._client: HealthcareAgentServiceClient = HealthcareAgentServiceClient(
             agent_name=name,
             chat_ctx=chat_ctx,
@@ -109,18 +112,19 @@ def __init__(self,
             retry_delay=config.retry_delay,
             timeout=config.timeout
         )
+
         # Restore conversation ID if it exists
         if name in self._chat_ctx.healthcare_agents:
             self._client.set_conversation_id(
                 self._chat_ctx.healthcare_agents[name].get("conversation_id", None))
-        logger.debug(f"HealthcareAgent initialized: {name}")
+        logger.debug("HealthcareAgent initialized: %s", name)
 
     @property
     def client(self):
         return self._client
 
     async def create_channel(self) -> AgentChannel:
-        logger.debug("Creating HealthcareAgentChannel.")
+        logger.debug("Creating HealthcareAgentChannel")
         return HealthcareAgentChannel()
 
     @override
@@ -131,7 +135,7 @@ async def get_response(self, message: str) -> ChatMessageContent:
         return ChatMessageContent(
             role=AuthorRole.ASSISTANT,
             name=self.name,
-            content=response_dict.get("text", "")
+            items=[TextContent(text=response_dict.get("text", ""))]
         )
 
     @override
@@ -140,14 +144,14 @@ async def invoke(self, *args, **kwargs) -> AsyncIterable[ChatMessageContent]:
         message = kwargs.get("message")
         logger.debug("Invoking HealthcareAgent with message: %s", message)
         if not message:
-            raise AgentInvokeException("Message is required to invoke the agent.")
+            raise AgentInvokeException("Message is required to invoke the agent")
         response = await self.get_response(message)
         yield response
 
     @override
     async def invoke_stream(self, *args, **kwargs) -> AsyncIterable[ChatMessageContent]:
         """Invoke the agent as a stream."""
-        raise NotImplementedError("invoke_stream is not implemented.")
+        raise NotImplementedError("invoke_stream is not implemented")
 
     async def get_attachments(self) -> list[dict]:
         """Get the attachments from the conversation history."""
diff --git a/src/magentic_chat.py b/src/magentic_chat.py
index 4789675..75f8651 100644
--- a/src/magentic_chat.py
+++ b/src/magentic_chat.py
@@ -24,7 +24,7 @@ def create_magentic_chat(chat: AgentGroupChat, app_context: AppContext, input_fu
     agent_config = app_context.all_agent_configs
     az_model_client = AzureOpenAIChatCompletionClient(
         azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
-        model="gpt-4o",
+        model=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
         api_version="2024-10-21",
         azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
         azure_ad_token_provider=app_context.cognitive_services_token_provider,
diff --git a/src/requirements.txt b/src/requirements.txt
index bf50ef4..bcd1d50 100644
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -7,7 +7,7 @@ botbuilder-core==4.17.0
 botbuilder-dialogs==4.17.0
 botbuilder-integration-aiohttp==4.17.0
 python-dotenv==1.0.1
-semantic-kernel==1.20.0
+semantic-kernel==1.29.0
 numpy==1.26.4
 opencv-python-headless==4.11.0.86
 matplotlib==3.10.1
diff --git a/src/routes/api/chats.py b/src/routes/api/chats.py
index bcf6fb7..64a3a83 100644
--- a/src/routes/api/chats.py
+++ b/src/routes/api/chats.py
@@ -11,26 +11,25 @@
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 
-from semantic_kernel.contents import AuthorRole, ChatMessageContent
+from semantic_kernel.contents import AuthorRole, ChatMessageContent, TextContent
 from services.patient_context_service import PatientContextService, PATIENT_CONTEXT_PREFIX
 from services.patient_context_analyzer import PatientContextAnalyzer
 
 from data_models.app_context import AppContext
+
 import group_chat
 
 logger = logging.getLogger(__name__)
 
-# Custom JSON encoder that handles datetime
-
 
 class DateTimeEncoder(json.JSONEncoder):
+    """Custom JSON encoder that handles datetime objects."""
+
     def default(self, obj: Any) -> Any:
         if isinstance(obj, datetime):
             return obj.isoformat()
         return super().default(obj)
 
-# Pydantic models for request/response
-
 
 class MessageRequest(BaseModel):
     content: str
@@ -48,9 +47,8 @@ class Message(BaseModel):
     mentions: Optional[List[str]] = None
 
     def dict(self, *args, **kwargs):
-        # Override dict method to handle datetime serialization
+        """Override dict method to handle datetime serialization."""
         d = super().dict(*args, **kwargs)
-        # Convert datetime to ISO format string
         if isinstance(d.get('timestamp'), datetime):
             d['timestamp'] = d['timestamp'].isoformat()
         return d
@@ -70,8 +68,6 @@ class AgentsResponse(BaseModel):
     agents: List[str]
     error: Optional[str] = None
 
-# Create a helper function to create JSON responses with datetime handling
-
 
 def create_json_response(content, headers=None):
     """Create a JSONResponse with proper datetime handling."""
@@ -89,32 +85,20 @@ def chats_routes(app_context: AppContext):
     agent_config = app_context.all_agent_configs
     data_access = app_context.data_access
 
-    # Initialize patient context service
+    # Initialize patient context service with both accessors
     analyzer = PatientContextAnalyzer(token_provider=app_context.cognitive_services_token_provider)
-    patient_context_service = PatientContextService(analyzer=analyzer)
+    patient_context_service = PatientContextService(
+        analyzer=analyzer,
+        registry_accessor=app_context.data_access.patient_context_registry_accessor,
+        context_accessor=app_context.data_access.chat_context_accessor
+    )
 
     # Find the facilitator agent
     facilitator_agent = next((agent for agent in agent_config if agent.get("facilitator")), agent_config[0])
     facilitator = facilitator_agent["name"]
 
-    def _append_pc_ctx_system(chat_history_messages: List[ChatMessageContent], patient_context: str) -> None:
-        """Append patient context to chat history at position 0 (system message)."""
-        if len(chat_history_messages) > 0 and chat_history_messages[0].role == AuthorRole.SYSTEM:
-            # Update existing system message
-            existing_content = chat_history_messages[0].content
-            if PATIENT_CONTEXT_PREFIX not in existing_content:
-                chat_history_messages[0].content = f"{existing_content}\n\n{patient_context}"
-        else:
-            # Insert new system message at position 0
-            system_message = ChatMessageContent(
-                role=AuthorRole.SYSTEM,
-                content=patient_context
-            )
-            chat_history_messages.insert(0, system_message)
-
     def _get_system_patient_context_json(chat_context) -> str | None:
         """Extract the JSON payload from the current PATIENT_CONTEXT_JSON system message."""
-        # Fix: Use .messages instead of .history
         for msg in chat_context.chat_history.messages:
             if msg.role == AuthorRole.SYSTEM:
                 # Handle both string content and itemized content
@@ -144,7 +128,6 @@ def _get_system_patient_context_json(chat_context) -> str | None:
 
     def _append_pc_ctx_display(base: str, chat_context) -> str:
         """Append patient context information to the message for display."""
-
         # Avoid double-tagging
         if "\nPC_CTX" in base or "\n*PT_CTX:*" in base:
             return base
@@ -161,26 +144,15 @@ def _append_pc_ctx_display(base: str, chat_context) -> str:
 
             lines = ["\n\n---", "\n*PT_CTX:*"]
             if obj.get("patient_id"):
-                lines.append(f"- **Patient ID:** `{obj['patient_id']}`")
+                lines.append("- **Patient ID:** `%s`" % obj['patient_id'])
             if obj.get("conversation_id"):
-                lines.append(f"- **Conversation ID:** `{obj['conversation_id']}`")
+                lines.append("- **Conversation ID:** `%s`" % obj['conversation_id'])
 
             if obj.get("all_patient_ids"):
                 active_id = obj.get("patient_id")
-                ids_str = ", ".join(f"`{p}`{' (active)' if p == active_id else ''}" for p in obj["all_patient_ids"])
-                lines.append(f"- **Session Patients:** {ids_str}")
-
-            summary_raw = obj.get("chat_summary", "")
-            if summary_raw and summary_raw.strip():
-                # Check if it's the default "no specific information" message
-                if "No specific information was discussed" in summary_raw:
-                    lines.append(f"- **Summary:** *Building patient context...*")
-                else:
-                    # Clean up summary for display
-                    summary = summary_raw.replace('\n', ' ').strip()
-                    lines.append(f"- **Summary:** *{summary}*")
-            else:
-                lines.append(f"- **Summary:** *Building patient context...*")
+                ids_str = ", ".join("`%s`%s" % (p, ' (active)' if p == active_id else '')
+                                    for p in obj["all_patient_ids"])
+                lines.append("- **Session Patients:** %s" % ids_str)
 
             if not obj.get("patient_id"):
                 lines.append("- *No active patient.*")
@@ -188,174 +160,217 @@ def _append_pc_ctx_display(base: str, chat_context) -> str:
             # Only add the block if there's something to show besides the header
             if len(lines) > 2:
                 formatted_text = "\n".join(lines)
-                logger.debug(f"Appended patient context to message | Patient: {obj.get('patient_id')}")
-                return f"{base}{formatted_text}"
+                return "%s%s" % (base, formatted_text)
             else:
                 return base
 
         except json.JSONDecodeError as e:
-            logger.warning(f"Failed to parse patient context JSON: {e}")
+            logger.warning("Failed to parse patient context JSON: %s", e)
             # Fallback to raw if JSON is malformed, but keep it simple
-            return f"{base}\n\n---\n*PT_CTX (raw):* `{json_payload}`"
-
-    def _format_patient_context_json(patient_context: str) -> str:
-        """Convert patient context to JSON format for system message."""
-        return json.dumps({
-            "patient_context": patient_context,
-            "instruction": "Use this patient context to provide relevant responses. Always consider the patient's current medical status, history, and any active conditions when responding."
-        }, indent=2)
+            return "%s\n\n---\n*PT_CTX (raw):* `%s`" % (base, json_payload)
 
     async def _handle_clear_command(content: str, chat_context) -> bool:
         """Handle patient context clear commands."""
         content_lower = content.lower().strip()
         if content_lower in ["clear", "clear patient", "clear context", "clear patient context"]:
-            # Clear patient context
-            chat_context.patient_context = None
-            logger.info("Patient context cleared via WebSocket clear command")
-            return True
+            logger.info("Processing clear command for conversation: %s", chat_context.conversation_id)
+
+            # Archive everything before clearing
+            timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S-%f")
+            archive_folder = "archive/%s" % timestamp
+
+            try:
+                logger.info("Starting archive to folder: %s", archive_folder)
+
+                # Archive session context
+                await data_access.chat_context_accessor.archive_to_folder(chat_context.conversation_id, None, archive_folder)
+                logger.info("Archived session context to %s", archive_folder)
+
+                # Archive ALL patient contexts from registry
+                try:
+                    patient_registry, _ = await patient_context_service.registry_accessor.read_registry(chat_context.conversation_id)
+                    if patient_registry:
+                        for patient_id in patient_registry.keys():
+                            await data_access.chat_context_accessor.archive_to_folder(chat_context.conversation_id, patient_id, archive_folder)
+                            logger.info("Archived patient context for %s to %s", patient_id, archive_folder)
+                except Exception as registry_error:
+                    logger.warning("Could not read registry for archiving patient contexts: %s", registry_error)
+                    # Fallback: use patient_contexts from chat_context if available
+                    if hasattr(chat_context, 'patient_contexts') and chat_context.patient_contexts:
+                        for patient_id in chat_context.patient_contexts.keys():
+                            await data_access.chat_context_accessor.archive_to_folder(chat_context.conversation_id, patient_id, archive_folder)
+                            logger.info("Archived patient context for %s to %s (fallback)", patient_id, archive_folder)
+
+                # Archive patient registry
+                await patient_context_service.registry_accessor.archive_registry(chat_context.conversation_id)
+                logger.info("Archived patient registry for %s", chat_context.conversation_id)
+
+                # Clear chat context
+                chat_context.patient_context = None
+                if hasattr(chat_context, 'patient_contexts'):
+                    chat_context.patient_contexts.clear()
+                chat_context.chat_history.messages.clear()
+                chat_context.patient_id = None
+
+                logger.info("Successfully archived and cleared all contexts to %s", archive_folder)
+                return True
+
+            except Exception as e:
+                logger.error("Failed to archive contexts during clear: %s", e)
+                # Still clear the context even if archiving fails
+                chat_context.patient_context = None
+                if hasattr(chat_context, 'patient_contexts'):
+                    chat_context.patient_contexts.clear()
+                chat_context.chat_history.messages.clear()
+                chat_context.patient_id = None
+                return True
+
         return False
 
     @router.get("/api/agents", response_model=AgentsResponse)
     async def get_available_agents():
-        """
-        Returns a list of all available agents that can be mentioned in messages.
-        """
+        """Returns a list of all available agents that can be mentioned in messages."""
         try:
-            # Extract agent names from the agent_config
-            agent_names = [agent["name"] for agent in agent_config if "name" in agent]
-
-            # Return the list of agent names
-            return JSONResponse(
-                content={"agents": agent_names, "error": None}
-            )
+            agent_names = [agent["name"] for agent in agent_config]
+            return AgentsResponse(agents=agent_names)
         except Exception as e:
-            logger.exception(f"Error getting available agents: {e}")
-            return JSONResponse(
-                content={"agents": [], "error": str(e)},
-                status_code=500
-            )
+            logger.error("Error getting agents: %s", e)
+            return AgentsResponse(agents=[], error=str(e))
 
     @router.websocket("/api/ws/chats/{chat_id}/messages")
     async def websocket_chat_endpoint(websocket: WebSocket, chat_id: str):
-        """WebSocket endpoint for streaming chat messages"""
-        try:
-            await websocket.accept()
-            logger.info(f"WebSocket connection established for chat: {chat_id}")
-
-            # Wait for the first message from the client
-            client_message = await websocket.receive_json()
-            logger.info(f"Received message over WebSocket: {client_message}")
-
-            # Extract message content, sender and mentions
-            content = client_message.get("content", "")
-            sender = client_message.get("sender", "User")
-            mentions = client_message.get("mentions", [])
-
-            # Try to read existing chat context or create a new one if it doesn't exist
-            try:
-                chat_context = await data_access.chat_context_accessor.read(chat_id)
-            except:
-                # If the chat doesn't exist, create a new one
-                chat_context = await data_access.chat_context_accessor.create_new(chat_id)
-
-            # Handle clear commands
-            if await _handle_clear_command(content, chat_context):
-                # Send confirmation message
-                clear_message = Message(
-                    id=str(uuid.uuid4()),
-                    content="Patient context has been cleared.",
-                    sender="System",
-                    timestamp=datetime.now(timezone.utc),
-                    isBot=True,
-                    mentions=[]
-                )
-                await websocket.send_json(clear_message.dict())
-                await websocket.send_json({"type": "done"})
-
-                # Save updated context
-                await data_access.chat_context_accessor.write(chat_context)
-                return
-
-            # Add user message to history
-            chat_context.chat_history.add_user_message(content)
-
-            # Apply patient context using the service - FIX: Use correct method signature
-            try:
-                decision, timing = await patient_context_service.decide_and_apply(
-                    content,  # user_text parameter
-                    chat_context  # chat_ctx parameter
-                )
-
-                logger.info(f"Patient context decision: {decision}, timing: {timing}")
-
-            except Exception as e:
-                logger.warning(f"Error applying patient context to WebSocket message: {e}")
-                # Continue without patient context
-
-            # Create group chat instance
-            chat, chat_context = group_chat.create_group_chat(app_context, chat_context)
-
-            # Process the message - determine target agent based on mentions
-            target_agent_name = facilitator  # Default to facilitator agent
+        """WebSocket endpoint with patient isolation support."""
+        await websocket.accept()
+        logger.info("WebSocket connection established for chat: %s", chat_id)
 
-            if mentions and len(mentions) > 0:
-                # Use the first mentioned agent
-                target_agent_name = mentions[0]
-
-            # Find the agent by name
-            target_agent = next(
-                (agent for agent in chat.agents if agent.name.lower() == target_agent_name.lower()),
-                chat.agents[0]  # Fallback to first agent
-            )
-
-            logger.info(f"Using agent: {target_agent.name} to respond to WebSocket message")
-
-            # Check if the agent is the facilitator
-            if target_agent.name == facilitator:
-                target_agent = None  # Force facilitator mode when target is the facilitator
-
-            response_sent = False
+        try:
+            while True:
+                data = await websocket.receive_json()
+                content = data.get("content", "").strip()
 
-            # Get responses from the target agent
-            async for response in chat.invoke(agent=target_agent):
-                # Skip responses with no content
-                if not response or not response.content:
+                if not content:
+                    await websocket.send_json({"error": "Empty message content"})
                     continue
 
-                # Add patient context display to response content
-                response_content_with_pc = _append_pc_ctx_display(response.content, chat_context)
-
-                # Create bot response message for each response
-                bot_message = Message(
-                    id=str(uuid.uuid4()),
-                    content=response_content_with_pc,  # Use content with PC_CTX display
-                    sender=response.name,
-                    timestamp=datetime.now(timezone.utc),
-                    isBot=True,
-                    mentions=[]
-                )
+                try:
+                    # STEP 1: Load session context
+                    chat_context = await data_access.chat_context_accessor.read(chat_id, None)
+                    logger.info("Loaded session context for: %s", chat_id)
+
+                    # STEP 2: Handle clear commands BEFORE patient context processing
+                    if await _handle_clear_command(content, chat_context):
+                        clear_message = Message(
+                            id=str(uuid.uuid4()),
+                            content="The conversation has been cleared. How can I assist you today?",
+                            sender="Orchestrator",
+                            timestamp=datetime.now(timezone.utc),
+                            isBot=True,
+                            mentions=[]
+                        )
+                        await websocket.send_json(clear_message.dict())
+                        await websocket.send_json({"type": "done"})
+
+                        # Save to appropriate context file
+                        await data_access.chat_context_accessor.write(chat_context)
+                        continue
+
+                    # STEP 3: Patient context decision and application
+                    try:
+                        decision, timing = await patient_context_service.decide_and_apply(content, chat_context)
+                        logger.info("Patient context decision: %s | Patient: %s", decision, chat_context.patient_id)
+                    except Exception as e:
+                        logger.warning("Error applying patient context: %s", e)
+                        decision = "NONE"
+
+                    # STEP 4: Handle special decision outcomes
+                    if decision == "NEEDS_PATIENT_ID":
+                        error_message = Message(
+                            id=str(uuid.uuid4()),
+                            content="I need a patient ID to proceed. Please provide the patient ID in the format 'patient_X' (e.g., 'start tumor board review for patient_4').",
+                            sender="Orchestrator",
+                            timestamp=datetime.now(timezone.utc),
+                            isBot=True,
+                            mentions=[]
+                        )
+                        await websocket.send_json(error_message.dict())
+                        await websocket.send_json({"type": "done"})
+                        continue
+
+                    # STEP 5: If active patient exists, load ONLY that patient's isolated context file
+                    if chat_context.patient_id:
+                        try:
+                            isolated_ctx = await data_access.chat_context_accessor.read(chat_id, chat_context.patient_id)
+                            if isolated_ctx and isolated_ctx.chat_history.messages:
+                                # Replace with isolated chat history
+                                chat_context.chat_history = isolated_ctx.chat_history
+                                logger.info("Loaded isolated history for %s (%s messages)",
+                                            chat_context.patient_id, len(isolated_ctx.chat_history.messages))
+                            else:
+                                logger.info("No existing history for %s, starting fresh", chat_context.patient_id)
+                        except Exception as e:
+                            logger.debug("Could not load isolated context for %s: %s", chat_context.patient_id, e)
+
+                    # STEP 6: Create group chat and add user message
+                    chat, chat_context = group_chat.create_group_chat(app_context, chat_context)
+
+                    # Add user message to chat history
+                    user_message = ChatMessageContent(
+                        role=AuthorRole.USER,
+                        items=[TextContent(text=content)]
+                    )
+                    chat_context.chat_history.add_message(user_message)
+
+                    # STEP 7: Get target agent from message
+                    target_agent_name = facilitator
+                    if ":" in content:
+                        mentioned = content.split(":", 1)[0].strip()
+                        if any(agent.name.lower() == mentioned.lower() for agent in chat.agents):
+                            target_agent_name = mentioned
+
+                    target_agent = next(
+                        (agent for agent in chat.agents if agent.name.lower() == target_agent_name.lower()),
+                        chat.agents[0]
+                    )
+
+                    logger.info("Using agent: %s", target_agent.name)
+
+                    if target_agent.name == facilitator:
+                        target_agent = None
+
+                    # STEP 8: Get responses
+                    async for response in chat.invoke(agent=target_agent):
+                        if not response or not response.content:
+                            continue
+
+                        response_content_with_pc = _append_pc_ctx_display(response.content, chat_context)
+
+                        bot_message = Message(
+                            id=str(uuid.uuid4()),
+                            content=response_content_with_pc,
+                            sender=response.name,
+                            timestamp=datetime.now(timezone.utc),
+                            isBot=True,
+                            mentions=[]
+                        )
+                        await websocket.send_json(bot_message.dict())
+
+                    # STEP 9: Save to appropriate context file (patient-specific OR session-only)
+                    await data_access.chat_context_accessor.write(chat_context)
+                    logger.info("Saved context for conversation: %s | Patient: %s", chat_id, chat_context.patient_id)
+
+                except Exception as e:
+                    logger.error("Error in WebSocket chat: %s", e)
+                    await websocket.send_json({"error": str(e)})
 
-                # Convert to dict for JSON serialization
-                message_dict = bot_message.dict()
-
-                # Send message over WebSocket
-                await websocket.send_json(message_dict)
-
-            # Save chat context after all messages are processed
-            await data_access.chat_context_accessor.write(chat_context)
-
-            # Send done signal
-            await websocket.send_json({"type": "done"})
+                await websocket.send_json({"type": "done"})
 
         except WebSocketDisconnect:
-            logger.info(f"WebSocket client disconnected from chat: {chat_id}")
+            logger.info("WebSocket disconnected for chat: %s", chat_id)
         except Exception as e:
-            logger.exception(f"Error in WebSocket chat: {e}")
+            logger.error("WebSocket error: %s", e)
             try:
-                # Try to send error message to client
                 await websocket.send_json({"error": str(e)})
-                await websocket.send_json({"type": "done"})
-            except:
+            except Exception:
                 pass
 
     return router
diff --git a/src/scenarios/default/config/agents.yaml b/src/scenarios/default/config/agents.yaml
index 20458e3..bae958c 100644
--- a/src/scenarios/default/config/agents.yaml
+++ b/src/scenarios/default/config/agents.yaml
@@ -1,51 +1,59 @@
 - name: Orchestrator
   instructions: |
-    You are an AI agent facilitating a discussion between a group of AI agent experts and the user. You are not to make clinical recommendations or treatment plans. Follow these guidelines:
-
-    **CORE DIRECTIVES (Follow in this exact order):**
-
-    1.  **Identify Current Patient**: First, read the SYSTEM message starting with "PATIENT_CONTEXT_JSON:" to identify the current `patient_id`. All subsequent actions concern THIS patient only.
-
-    2.  **Check for Recently Completed Work (for this patient)**: Next, read the `chat_summary` within that same JSON message.
-        - If the summary indicates the user's request has already been completed for the **current patient** (e.g., "A tumor board review was generated"), you MUST inform the user and STOP. Do not create a new plan.
-        - Example Response: "Based on our recent conversation, a tumor board review for `patient_X` has already been completed. Would you like me to generate a new one?"
-
-    3.  **Plan Generation and Reuse Logic**: If the work isn't already done for the current patient, decide on a plan.
-        - **First Plan of the Session**: If this is the very first time you are creating a plan in this entire conversation (for any patient), you **MUST** present the plan and ask the user for confirmation before proceeding.
-        - **Reusing a Confirmed Plan**: If the user has confirmed a plan for a similar task (like a "tumor board review") at any point earlier in this conversation (even for a different patient), you can reuse that plan's structure.
-        - **Transparency is Key**: When reusing a plan, you **MUST** announce it. Say something like: "This is a similar request to one we've handled before. I will reuse the same plan to proceed." Then, execute the plan directly without asking for confirmation.
-
-    **BEHAVIORAL EXAMPLE (Patient Context Handling):**
-    This is an example of how to correctly answer a direct question about the active patient by strictly following the `PATIENT_CONTEXT_JSON` message.
-    ---
-    *PREVIOUS CONVERSATION was about patient_A.*
-    **USER:** @Orchestrator get me the report for patient_B
-    **ORCHESTRATOR:** *...starts process for patient_B...*
-    *(The latest SYSTEM message is now: PATIENT_CONTEXT_JSON:{"patient_id": "patient_B", ...})*
-    **USER:** @Orchestrator who is the active patient?
-    **CORRECT REASONING (Internal Thought):** My memory suggests we were just talking about patient_A, but the latest `PATIENT_CONTEXT_JSON` message clearly states the `patient_id` is "patient_B". The JSON is the single source of truth, so I must use "patient_B".
-    **CORRECT RESPONSE:** The current active patient is patient_B.
-    ---
-    *You MUST follow this reasoning. The `PATIENT_CONTEXT_JSON` is the only source of truth.*
-
-    4.  **Moderate the Discussion**:
-        - When creating a new plan, explain the purpose and order of agents.
-        - Keep track of which agents have completed their tasks.
-        - When asking the user for information, mention the user explicitly. "*User*, can you provide me with the patient's #BLANK?"
-        - When addressing an agent, mention the agent explicitly. "*PatientHistory*, proceed with #BLANK."
-
-    5.  **Participants**:
-        The following AI experts can help with answering queries.
-        {{aiAgents}}
-
-    6.  **Role Limitation**: Remember, your role is to moderate and facilitate.
-        - DON'T: Provide clinical recommendations or treatment plans.
-
-    7. **Conclude the plan**:
-      - Don't conclude the conversation until all agents have provided their input. Instead, address the agents that have not yet provided their input.
-      - When all agents have provided their input and the user's question has been answered, summarize the response in one or two sentences.
-      - Ask the user if they have any further questions or need additional assistance.
+    You are an AI agent facilitating a discussion between group of AI agent experts and the user. You are not to make clinical recommendations or treatment plans. Follow these guidelines:
+
+    **Patient Context Awareness**:
+    When you receive a message with `PATIENT_CONTEXT_JSON`, extract the patient_id and use it to maintain patient-specific conversations.
+    IMPORTANT: Always check the actual chat history in the current conversation to determine what has been discussed with this patient. 
+    Do not assume previous actions based on patient metadata alone - review the current chat history to understand the context.
+
+    **Conversation State Tracking**:
+    Before responding, always review the chat history to understand:
+    - What was the original request/goal?
+    - Which agents have already provided their input?
+    - What is the next logical step in the plan?
+    - If the user says "proceed", "continue", or similar, determine what the next step should be based on the current progress.
+    - If a task has been completed (like report creation), do NOT restart the process unless explicitly asked.
+
+    **CRITICAL**: Do not restart completed processes. If ReportCreation has already created a report, the tumor board review is complete.
+
+    1. **Moderate the Discussion**: 
+      Your primary role is to facilitate the discussion and ensure a smooth flow of conversation among the participants. 
+      When a question is asked, think through who could best answer it. Formulate a plan and present it to the user. 
+      Rely on other agents to provide missing information. First ask the agent what information they need to answer a question.
+      When asking the user for information, mention the user explicitly. "*User*, can you provide me with the patient's #BLANK?"
+      When addressing an agent, mention the agent explicitly. "*PatientHistory*, proceed with #BLANK."
       
+    2. **Participants**:
+      The following ai experts can help with answering queries about the user.
+      {{aiAgents}}
+      If during the course of the conversation, information is missing, think through, who could be the best to answer it, then ask that agent explicitly for
+      the information by mentioning the agent. Only ask the user for plan confirmation!
+      When an agent has provided their input, acknowledge it and move on to the next agent in the plan.
+      
+    3. **Handle User Commands**:
+      - When the user says "proceed", "continue", or confirms to move forward, check the chat history to see what has been completed and what should happen next.
+      - Do NOT repeat previous agent responses or ask the same questions again.
+      - Do NOT restart completed processes.
+      - Move to the next logical step in your plan based on what has already been accomplished.
+      
+    4. **Allow user to confirm**: When you create a plan with a step by step execution, ask the user for confirmation on the plan. If the plan changes, 
+      inform the user and ask for confirmation again. Stick to the confirmed plan and as the plan progresses as expected, you can skip the confirmation step.
+      
+    5. **Explain the Purpose and Order**: At the beginning of the conversation, explain the plan and the expected order of participants. 
+      Please think hard about the order of the individual agents called. For example, the current status and the historical background should be clarified 
+      early in the discussion such that the other agents can make use of that knowledge. Treatment recommendation and research agents should be called later 
+      in the discussion. Report creation should always happen in the end.
+      
+    6. **Role Limitation**: Remember, your role is to moderate and facilitate, not to provide clinical recommendations or treatment plans.
+      DON'T: Provide clinical recommendations or treatment plans. Please only call ONE agent at a time.
+      
+    7. **Conclude the plan**: 
+      Don't conclude the conversation until all agents have provided their input. Instead, address the agents that have not yet provided their input.
+      When all agents have provided their input, the plan has concluded, and the user's question has been answered, summarize the response in one or two sentences.
+      Ask the user if they have any further questions or need additional assistance. 
+      For follow up questions, formulate a new plan and suggest the order of participants.
+    
     **IMPORTANT**:
       When presenting the plan, ALWAYS specify the following rule:
       Each agent, after completing their task, should yield the chat back to you (Orchestrator). Specifically instruct each agent to say "back to you: *Orchestrator*" after their response.
diff --git a/src/services/patient_context_analyzer.py b/src/services/patient_context_analyzer.py
index 1c6306e..8721620 100644
--- a/src/services/patient_context_analyzer.py
+++ b/src/services/patient_context_analyzer.py
@@ -1,13 +1,17 @@
-import json
 import logging
 import os
 import time
-from typing import Optional, Literal
+from typing import Optional, Literal, Tuple
 
 from semantic_kernel import Kernel
 from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
+    AzureChatPromptExecutionSettings,
+)
 from semantic_kernel.contents import ChatHistory
+from semantic_kernel.functions import kernel_function
+
+from data_models.patient_context_models import PatientContextDecision
 
 logger = logging.getLogger(__name__)
 
@@ -16,7 +20,8 @@
 
 class PatientContextAnalyzer:
     """
-    Single LLM call decides patient context action and (if relevant) patient_id.
+    Patient context analyzer using Semantic Kernel structured output with JSON schema.
+    Follows Microsoft best practices for structured LLM outputs.
     """
 
     def __init__(
@@ -33,172 +38,203 @@ def __init__(
         if not self.deployment_name:
             raise ValueError("No deployment name for patient context analyzer.")
         self.api_version = api_version or os.getenv("AZURE_OPENAI_API_VERSION") or "2024-10-21"
+        self._token_provider = token_provider
 
         logger.info(f"PatientContextAnalyzer initialized with deployment: {self.deployment_name}")
 
         self._kernel = Kernel()
         self._kernel.add_service(
             AzureChatCompletion(
-                service_id="default",
+                service_id="patient_context_analyzer",
                 deployment_name=self.deployment_name,
                 api_version=self.api_version,
                 ad_token_provider=token_provider,
             )
         )
 
-    async def analyze(
-        self, user_text: str, prior_patient_id: Optional[str], known_patient_ids: list[str]
-    ) -> tuple[AnalyzerAction, Optional[str], float]:
-        start_time = time.time()
-
-        logger.debug(f"Analyzing user input for patient context | Prior: {prior_patient_id}")
+    @kernel_function(
+        description="Analyze user input for patient context decisions",
+        name="analyze_patient_context",
+    )
+    async def analyze_decision(
+        self,
+        user_text: str,
+        prior_patient_id: Optional[str] = None,
+        known_patient_ids: Optional[list[str]] = None,
+    ) -> PatientContextDecision:
+        """
+        Analyze user input and return structured patient context decision.
+
+        Args:
+            user_text: The user's input message
+            prior_patient_id: Current active patient ID (if any)
+            known_patient_ids: List of known patient IDs in this session
+
+        Returns:
+            PatientContextDecision: Structured decision with action, patient_id, and reasoning
+        """
+        if known_patient_ids is None:
+            known_patient_ids = []
 
         if not user_text or not user_text.strip():
-            duration = time.time() - start_time
-            logger.debug(f"Empty input received | Duration: {duration:.4f}s")
-            return "NONE", None, duration
+            return PatientContextDecision(
+                action="NONE",
+                patient_id=None,
+                reasoning="Empty or whitespace user input; no action needed."
+            )
 
-        # Existing system prompt and LLM call logic...
-        system_prompt = f"""
-You are a patient context analyzer for healthcare conversations.
+        system_prompt = f"""You are a patient context analyzer for healthcare conversations.
 
 TASK: Analyze user input and decide the appropriate patient context action.
 
-ACTIONS:
+AVAILABLE ACTIONS:
 - NONE: No patient context needed (general questions, greetings, system commands)
-- CLEAR: User wants to clear/reset patient context
-- ACTIVATE_NEW: User mentions a new patient ID not in known_patient_ids
+- CLEAR: User wants to clear/reset all patient context
+- ACTIVATE_NEW: User mentions a new patient ID not in the known patient list
 - SWITCH_EXISTING: User wants to switch to a different known patient
 - UNCHANGED: Continue with current patient context
 
 CURRENT STATE:
-- Prior patient ID: {prior_patient_id}
+- Active patient ID: {prior_patient_id or "None"}
 - Known patient IDs: {known_patient_ids}
 
-RULES:
+ANALYSIS RULES:
 1. Extract patient_id ONLY if action is ACTIVATE_NEW or SWITCH_EXISTING
-2. Patient IDs are typically "patient_X" format or explicit medical record numbers
-3. For CLEAR/NONE/UNCHANGED, set patient_id to null
+2. Patient IDs typically follow "patient_X" format or are explicit medical record numbers
+3. For CLEAR/NONE/UNCHANGED actions, set patient_id to null
 4. Prioritize explicit patient mentions over implicit context
+5. Keep reasoning brief and specific (max 50 words)
 
-RESPONSE FORMAT (JSON only):
-{{"action": "ACTION_NAME", "patient_id": "extracted_id_or_null", "reasoning": "brief_explanation"}}
-
-USER INPUT: {user_text}
-"""
+Respond with a structured JSON object matching the required schema."""
 
         try:
             chat_history = ChatHistory()
             chat_history.add_system_message(system_prompt)
-            chat_history.add_user_message(user_text)
+            chat_history.add_user_message(f"User input: {user_text}")
+
+            # Use AzureChatPromptExecutionSettings with response_format for structured output
+            execution_settings = AzureChatPromptExecutionSettings(
+                service_id="patient_context_analyzer",
+                max_tokens=200,
+                temperature=0.1,
+                response_format=PatientContextDecision,  # This generates the JSON schema automatically
+            )
 
-            svc = self._kernel.get_service("default")
-            llm_start = time.time()
+            svc = self._kernel.get_service("patient_context_analyzer")
 
             results = await svc.get_chat_message_contents(
                 chat_history=chat_history,
-                settings=PromptExecutionSettings(
-                    max_tokens=150,
-                    temperature=0.1,
-                    response_format={"type": "json_object"}
-                ),
+                settings=execution_settings,
             )
 
-            llm_duration = time.time() - llm_start
-
-            if not results:
-                raise ValueError("No LLM response received")
+            if not results or not results[0].content:
+                logger.warning("No response from patient context analyzer")
+                return PatientContextDecision(
+                    action="NONE",
+                    patient_id=None,
+                    reasoning="No response from analyzer; defaulting to NONE."
+                )
 
+            # Parse the structured response
             content = results[0].content
-            if not content:
-                logger.warning("Empty LLM response content")
-                duration = time.time() - start_time
-                return "NONE", None, duration
-
-            try:
-                parsed = json.loads(content)
-                action = parsed.get("action", "NONE")
-                pid = parsed.get("patient_id")
-
-                # Validation
-                valid_actions = ["NONE", "CLEAR", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED"]
-                if action not in valid_actions:
-                    logger.error(f"Invalid action from LLM: {action}")
-                    action = "NONE"
-                    pid = None
-
-                duration = time.time() - start_time
-                logger.info(
-                    f"Patient context analysis complete | Action: {action} | Patient: {pid} | Duration: {duration:.4f}s")
-                return action, pid, duration
-
-            except json.JSONDecodeError as je:
-                logger.error(f"Failed to parse LLM JSON response: {je}")
-                duration = time.time() - start_time
-                return "NONE", None, duration
 
-        except Exception as e:
-            duration = time.time() - start_time
-            logger.error(f"Patient context analysis failed: {e} | Duration: {duration:.4f}s")
-            return "NONE", None, duration
-
-    async def summarize_text(self, text: str, patient_id: str) -> str:
-        """Generate a patient-specific summary of conversation text."""
-        try:
-            system_prompt = f"""
-You are a clinical summarization assistant. Your ONLY task is to summarize the provided text for patient '{patient_id}'.
-
-**CRITICAL RULES:**
-1.  **FOCUS EXCLUSIVELY ON `{patient_id}`**: Ignore all information, notes, or mentions related to any other patient.
-2.  **DO NOT BLEND PATIENTS**: If the text mentions other patients (e.g., 'patient_4', 'patient_12'), you must NOT include them in the summary.
-3.  **BE CONCISE**: Create a short, bulleted list of 3-5 key points.
-4.  **NO FABRICATION**: If there is no relevant information for `{patient_id}` in the text, respond with "No specific information was discussed for patient {patient_id} in this segment."
+            # Handle both string and already-parsed responses
+            if isinstance(content, str):
+                try:
+                    decision = PatientContextDecision.model_validate_json(content)
+                except Exception as e:
+                    logger.error(f"Failed to parse structured response: {e}")
+                    return PatientContextDecision(
+                        action="NONE",
+                        patient_id=None,
+                        reasoning=f"Parse error: {str(e)[:30]}..."
+                    )
+            elif isinstance(content, dict):
+                try:
+                    decision = PatientContextDecision.model_validate(content)
+                except Exception as e:
+                    logger.error(f"Failed to validate structured response: {e}")
+                    return PatientContextDecision(
+                        action="NONE",
+                        patient_id=None,
+                        reasoning=f"Validation error: {str(e)[:30]}..."
+                    )
+            else:
+                logger.warning(f"Unexpected response type: {type(content)}")
+                return PatientContextDecision(
+                    action="NONE",
+                    patient_id=None,
+                    reasoning="Unexpected response format; defaulting to NONE."
+                )
 
-Summarize the following text:
----
-{text}
-"""
+            logger.info(
+                f"Patient context decision: {decision.action} | "
+                f"Patient: {decision.patient_id} | "
+                f"Reasoning: {decision.reasoning}"
+            )
 
-            chat_history = ChatHistory()
-            chat_history.add_system_message(system_prompt)
-            chat_history.add_user_message("Please summarize this conversation.")
+            return decision
 
-            svc = self._kernel.get_service("default")
-            results = await svc.get_chat_message_contents(
-                chat_history=chat_history,
-                settings=PromptExecutionSettings(max_tokens=300, temperature=0.3),
+        except Exception as e:
+            logger.error(f"Patient context analysis failed: {e}")
+            return PatientContextDecision(
+                action="NONE",
+                patient_id=None,
+                reasoning=f"Analysis error: {str(e)[:30]}..."
             )
 
-            return results[0].content if results and results[0].content else f"Summary unavailable for {patient_id}"
+    # Wrapper for backward compatibility - returns timing info
+    async def analyze_with_timing(
+        self,
+        user_text: str,
+        prior_patient_id: Optional[str],
+        known_patient_ids: list[str],
+    ) -> Tuple[PatientContextDecision, float]:
+        """
+        Analyze with timing information for backward compatibility.
+        """
+        start_time = time.time()
 
-        except Exception as e:
-            logger.warning(f"Failed to generate summary: {e}")
-            return f"Summary generation failed for {patient_id}"
+        decision = await self.analyze_decision(
+            user_text=user_text,
+            prior_patient_id=prior_patient_id,
+            known_patient_ids=known_patient_ids,
+        )
+
+        duration = time.time() - start_time
+        return decision, duration
 
-    # Add this method to the PatientContextAnalyzer class
+    # Legacy wrapper (for existing callers)
+    async def analyze(
+        self,
+        user_text: str,
+        prior_patient_id: Optional[str],
+        known_patient_ids: list[str],
+    ) -> tuple[AnalyzerAction, Optional[str], float]:
+        """Legacy wrapper - use analyze_decision() for new code."""
+        decision, duration = await self.analyze_with_timing(
+            user_text, prior_patient_id, known_patient_ids
+        )
+        return decision.action, decision.patient_id, duration
 
     def reset_kernel(self):
         """Reset the kernel and service instance to prevent LLM state contamination between patients."""
         try:
-            if hasattr(self, '_kernel') and self._kernel:
-                # Store current configuration
+            if hasattr(self, "_kernel") and self._kernel:
                 current_deployment = self.deployment_name
                 current_api_version = self.api_version
+                token_provider = self._token_provider
 
-                # Create fresh kernel instance
                 self._kernel = Kernel()
-
-                # Re-add the service with same configuration
                 self._kernel.add_service(
                     AzureChatCompletion(
-                        service_id="default",
+                        service_id="patient_context_analyzer",
                         deployment_name=current_deployment,
                         api_version=current_api_version,
-                        ad_token_provider=None,  # Adjust if you use token provider
+                        ad_token_provider=token_provider,
                     )
                 )
 
-                logger.info("Kernel reset to prevent patient context contamination")
-
+                logger.info("Kernel reset completed for patient context isolation")
         except Exception as e:
             logger.warning(f"Error during kernel reset: {e}")
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
index e329ecf..81164f4 100644
--- a/src/services/patient_context_service.py
+++ b/src/services/patient_context_service.py
@@ -1,168 +1,439 @@
 import json
 import logging
+import re
 import time
-from typing import Literal, TypedDict
+from datetime import datetime, timezone  # Add timezone here
+from typing import Literal
 
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents import AuthorRole
+from semantic_kernel.contents import TextContent
 
 from data_models.chat_context import ChatContext, PatientContext
+from data_models.patient_context_models import TimingInfo, PatientContextSystemMessage
 from services.patient_context_analyzer import PatientContextAnalyzer
 
 logger = logging.getLogger(__name__)
 
-PATIENT_CONTEXT_PREFIX = "PATIENT_CONTEXT_JSON:"
-Decision = Literal["NONE", "UNCHANGED", "NEW_BLANK", "SWITCH_EXISTING", "CLEAR"]
-
-
-class TimingInfo(TypedDict):
-    analyzer: float
-    service: float
+PATIENT_CONTEXT_PREFIX = "PATIENT_CONTEXT_JSON"
+PATIENT_ID_PATTERN = re.compile(r"^patient_[0-9]+$")
+Decision = Literal["NONE", "UNCHANGED", "NEW_BLANK", "SWITCH_EXISTING",
+                   "CLEAR", "RESTORED_FROM_STORAGE", "NEEDS_PATIENT_ID"]
 
 
 class PatientContextService:
     """
-    LLM-only patient context manager.
-    Decides action + (optionally) patient_id via PatientContextAnalyzer,
-    maintains a single system message carrying current patient context JSON.
+    Simplified patient context manager:
+    1. Use analyzer to detect explicit patient IDs
+    2. Fall back to storage if analyzer returns NONE
+    3. Simple file-based patient isolation
+    4. Kernel reset on patient switches
     """
 
-    def _estimate_tokens(self, text: str) -> int:
-        """Rough estimate (~4 chars/token) to avoid new dependencies"""
-        return max(1, len(text) // 4)
-
-    def __init__(self, analyzer: PatientContextAnalyzer):
+    def __init__(self, analyzer: PatientContextAnalyzer, registry_accessor=None, context_accessor=None):
         self.analyzer = analyzer
-        logger.info(f"PatientContextService initialized")
+        self.registry_accessor = registry_accessor
+        self.context_accessor = context_accessor
+        logger.info(f"PatientContextService initialized with storage fallback: {registry_accessor is not None}")
 
     async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple[Decision, TimingInfo]:
         service_start_time = time.time()
 
+        # Skip analyzer for very short messages that are likely agent handoffs
+        if user_text and len(user_text.strip()) <= 15 and not any(
+            word in user_text.lower() for word in ["patient", "clear", "switch"]
+        ):
+            logger.info(f"Skipping analyzer for short handoff message: '{user_text}'")
+
+            if not chat_ctx.patient_id:
+                fallback_start = time.time()
+                restored = await self._try_restore_from_storage(chat_ctx)
+                fallback_duration = time.time() - fallback_start
+                decision = "RESTORED_FROM_STORAGE" if restored else "NONE"
+            else:
+                fallback_duration = 0.0
+                decision = "UNCHANGED"
+
+            timing = TimingInfo(
+                analyzer=0.0,
+                storage_fallback=fallback_duration,
+                service=time.time() - service_start_time,
+            )
+            return decision, timing
+
         logger.info(f"Patient context decision for '{user_text}' | Current patient: {chat_ctx.patient_id}")
 
-        action, pid, analyzer_duration = await self.analyzer.analyze(
+        # STEP 1: Run the analyzer with structured output
+        decision_model, analyzer_duration = await self.analyzer.analyze_with_timing(
             user_text=user_text,
             prior_patient_id=chat_ctx.patient_id,
             known_patient_ids=list(chat_ctx.patient_contexts.keys()),
         )
 
-        logger.info(f"Analyzer result: {action} | Patient ID: {pid}")
+        action = decision_model.action
+        pid = decision_model.patient_id
 
-        # Store original state for comparison
-        original_patient_id = chat_ctx.patient_id
+        logger.info(
+            f"Analyzer decision: {action} | Patient ID: {pid} | "
+            f"Reasoning: {decision_model.reasoning}"
+        )
+
+        # STEP 2: Handle analyzer results
+        fallback_duration = 0.0
 
-        decision: Decision = "NONE"
         if action == "CLEAR":
-            self._clear(chat_ctx)
-            decision = "CLEAR"
+            await self._archive_all_and_recreate(chat_ctx)
+            timing = TimingInfo(
+                analyzer=analyzer_duration,
+                storage_fallback=0.0,
+                service=time.time() - service_start_time,
+            )
+            return "CLEAR", timing
+
         elif action in ("ACTIVATE_NEW", "SWITCH_EXISTING"):
-            decision = self._activate_patient(pid, chat_ctx) if pid else "NONE"
+            if not pid or not PATIENT_ID_PATTERN.match(pid):
+                logger.warning(f"Invalid patient ID from analyzer: {pid}")
+                decision = "NEEDS_PATIENT_ID"
+            else:
+                decision = await self._activate_patient_with_registry(pid, chat_ctx)
+
+        elif action == "NONE":
+            fb_start = time.time()
+            if not chat_ctx.patient_id:
+                restored = await self._try_restore_from_storage(chat_ctx)
+                decision = "RESTORED_FROM_STORAGE" if restored else "NONE"
+            else:
+                decision = "UNCHANGED"
+            fallback_duration = time.time() - fb_start
+
         elif action == "UNCHANGED":
             decision = "UNCHANGED"
-
-        if original_patient_id != chat_ctx.patient_id:
-            logger.warning(
-                f"Patient context changed: '{original_patient_id}' -> '{chat_ctx.patient_id}'. "
-                "Resetting analyzer kernel to prevent context leak."
-            )
-            if hasattr(self.analyzer, "reset_kernel"):
-                self.analyzer.reset_kernel()
+        else:
+            decision = "NONE"
 
         service_duration = time.time() - service_start_time
-        timing: TimingInfo = {"analyzer": round(analyzer_duration, 4), "service": round(service_duration, 4)}
+        timing = TimingInfo(
+            analyzer=analyzer_duration,
+            storage_fallback=fallback_duration,
+            service=service_duration,
+        )
 
-        # Generate patient-specific LLM-based chat summary
-        chat_summary = None
         if chat_ctx.patient_id:
-            # Find messages since the last patient context switch to current patient
-            patient_specific_messages = []
-
-            # Go through messages in reverse to find current patient's conversation segment
-            for message in reversed(chat_ctx.chat_history.messages):
-                # Check if this is a system message with patient context JSON
-                if (message.role == AuthorRole.SYSTEM and
-                    isinstance(message.content, str) and
-                        message.content.startswith(PATIENT_CONTEXT_PREFIX)):
-                    try:
-                        json_content = message.content[len(PATIENT_CONTEXT_PREFIX):].strip()
-                        payload = json.loads(json_content)
-                        message_patient_id = payload.get("patient_id")
-
-                        # If we find a context message for a *different* patient,
-                        # that's the boundary of the current patient's conversation.
-                        if message_patient_id != chat_ctx.patient_id:
-                            break
-                    except Exception as e:
-                        logger.warning(f"Failed to parse system message JSON: {e}")
-                        continue
-
-                patient_specific_messages.append(message)
-
-            # Create summary from patient-specific messages only
-            if patient_specific_messages:
-                patient_specific_messages.reverse()  # Back to chronological order
-                history_text = "\n".join(
-                    str(getattr(m, "role", "")) + ": " + (m.content if isinstance(m.content, str) else str(m.content or ""))
-                    for m in patient_specific_messages
-                    if not (m.role == AuthorRole.SYSTEM and isinstance(m.content, str) and m.content.startswith(PATIENT_CONTEXT_PREFIX))
-                )[:8000]
-
-                if history_text.strip():
-                    try:
-                        # LLM still does the summarization, but with patient-specific input
-                        chat_summary = await self.analyzer.summarize_text(history_text, chat_ctx.patient_id)
-                        logger.debug(f"Generated summary for {chat_ctx.patient_id}")
-                    except Exception as e:
-                        logger.warning(f"Failed to summarize: {e}")
-                        chat_summary = f"Chat summary for {chat_ctx.patient_id} unavailable"
-
-        token_counts = {
-            "history_estimate": self._estimate_tokens(chat_summary) if chat_summary else 0,
-            "summary_estimate": self._estimate_tokens(chat_summary) if chat_summary else 0,
-        }
-
-        if decision == "CLEAR":
-            self._remove_system_message(chat_ctx)
-        else:
-            self._ensure_system_message(chat_ctx, timing, chat_summary, token_counts)
+            await self._ensure_system_message(chat_ctx, timing)
 
-        logger.info(f"Patient context decision complete: {decision} | Patient: {chat_ctx.patient_id}")
         return decision, timing
 
-    # -------- Internal helpers --------
+    async def set_explicit_patient_context(self, patient_id: str, chat_ctx: ChatContext) -> bool:
+        if not patient_id or not PATIENT_ID_PATTERN.match(patient_id):
+            logger.warning(f"Invalid patient ID format: {patient_id}")
+            return False
+
+        if chat_ctx.patient_id and patient_id != chat_ctx.patient_id:
+            logger.info(f"Resetting kernel for explicit patient switch: {chat_ctx.patient_id} -> {patient_id}")
+            self.analyzer.reset_kernel()
+
+        restored = await self._try_restore_specific_patient(patient_id, chat_ctx)
+        if not restored:
+            chat_ctx.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
+            logger.info(f"Created new patient context: {patient_id}")
+
+        chat_ctx.patient_id = patient_id
+        timing = TimingInfo(analyzer=0.0, storage_fallback=0.0, service=0.0)
+        await self._ensure_system_message(chat_ctx, timing)
+
+        if self.registry_accessor:
+            try:
+                await self._update_registry_storage(chat_ctx)
+            except Exception as e:
+                logger.warning(f"Failed to update registry storage: {e}")
+
+        return True
+
+    async def _ensure_system_message(self, chat_ctx: ChatContext, timing: TimingInfo):
+        """Ensure system message with patient context data using structured model."""
+        self._remove_system_message(chat_ctx)
+
+        if not chat_ctx.patient_id:
+            return
+
+        # Get all session patients from registry
+        all_patient_ids = list(chat_ctx.patient_contexts.keys())
+        if self.registry_accessor:
+            try:
+                patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
+                if patient_registry:
+                    all_patient_ids = list(patient_registry.keys())
+                    logger.debug(f"Using patient registry for system message: {all_patient_ids}")
+            except Exception as e:
+                logger.warning(f"Failed to read patient registry for system message: {e}")
+
+        # Use structured model for system message
+        payload = PatientContextSystemMessage(
+            conversation_id=chat_ctx.conversation_id,
+            patient_id=chat_ctx.patient_id,
+            all_patient_ids=all_patient_ids,
+            timing_sec=timing,
+        )
+
+        # Fix: Remove separators parameter - Pydantic doesn't support it
+        line = f"{PATIENT_CONTEXT_PREFIX}: {payload.model_dump_json()}"
+        system_message = ChatMessageContent(
+            role=AuthorRole.SYSTEM,
+            items=[TextContent(text=line)]
+        )
+        chat_ctx.chat_history.messages.insert(0, system_message)
+        logger.debug(
+            f"Added structured patient context system message for {chat_ctx.patient_id} "
+            f"with {len(all_patient_ids)} session patients"
+        )
+
+    async def _try_restore_from_storage(self, chat_ctx: ChatContext) -> bool:
+        """Try to restore patient context from storage files."""
+        logger.info(f"Attempting storage fallback for conversation: {chat_ctx.conversation_id}")
+
+        # Priority 1: Check patient registry file (session registry)
+        if self.registry_accessor:
+            try:
+                patient_registry, active_patient_id = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
+
+                if patient_registry and active_patient_id:
+                    logger.info(f"Found {len(patient_registry)} patients. Active: {active_patient_id}")
+
+                    # Restore all patient metadata from registry
+                    for patient_id, registry_entry in patient_registry.items():
+                        chat_ctx.patient_contexts[patient_id] = PatientContext(
+                            patient_id=patient_id,
+                            facts=registry_entry.get("facts", {})
+                        )
+                        logger.info(f"Restored patient {patient_id} metadata")
+
+                    # Set active patient and load their isolated chat history
+                    if active_patient_id in patient_registry:
+                        chat_ctx.patient_id = active_patient_id
+
+                        # Load isolated chat history for active patient
+                        if self.context_accessor:
+                            try:
+                                restored_chat_ctx = await self.context_accessor.read(chat_ctx.conversation_id, active_patient_id)
+                                if restored_chat_ctx and hasattr(restored_chat_ctx, 'chat_history'):
+                                    # Clear current history and load patient-specific history
+                                    chat_ctx.chat_history.messages.clear()
+                                    chat_ctx.chat_history.messages.extend(restored_chat_ctx.chat_history.messages)
+                                    logger.info(f"Loaded isolated chat history for: {active_patient_id}")
+                            except Exception as e:
+                                logger.warning(f"Failed to load patient-specific chat history: {e}")
+
+                        logger.info(f"Restored active patient: {active_patient_id}")
+                        return True
+            except Exception as e:
+                logger.warning(f"Failed to read patient registry: {e}")
+
+        # Priority 2: Check session context
+        if self.context_accessor:
+            try:
+                restored_ctx = await self.context_accessor.read(chat_ctx.conversation_id)
+                if restored_ctx and restored_ctx.patient_id:
+                    chat_ctx.patient_id = restored_ctx.patient_id
+                    chat_ctx.patient_contexts = restored_ctx.patient_contexts or {}
+                    chat_ctx.chat_history = restored_ctx.chat_history or chat_ctx.chat_history
+                    logger.info(f"Restored session context: {restored_ctx.patient_id}")
+                    return True
+            except Exception as e:
+                logger.warning(f"Failed to read session context: {e}")
+
+        logger.info("No patient context found in storage")
+        return False
+
+# Replace the incomplete _archive_all_and_recreate method with this complete implementation:
+
+    async def _archive_all_and_recreate(self, chat_ctx: ChatContext) -> None:
+        """Archive all files to blob storage and recreate fresh files."""
+        logger.info("Archiving all contexts to blob storage for conversation: %s", chat_ctx.conversation_id)
+
+        # Kernel reset for complete context clear
+        if chat_ctx.patient_id:
+            logger.info("Resetting kernel for complete context clear")
+            self.analyzer.reset_kernel()
+
+        archive_failures = []
+
+        # Get ALL patients from registry
+        all_patient_ids = list(chat_ctx.patient_contexts.keys())
+
+        # Try to get the complete list from the patient registry
+        if self.registry_accessor:
+            try:
+                patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
+                if patient_registry:
+                    all_patient_ids = list(patient_registry.keys())
+                    logger.info("Found %s patients in registry to archive: %s", len(all_patient_ids), all_patient_ids)
+                else:
+                    logger.warning("No patient registry found for archival")
+            except Exception as e:
+                logger.warning("Failed to read patient registry for archival: %s", e)
+
+        # Create timestamped archive folder
+        timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S-%f")
+        archive_folder = "archive/%s" % timestamp
+
+        try:
+            logger.info("Starting archive to folder: %s", archive_folder)
+
+            # Archive session context (main conversation)
+            try:
+                await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, None, archive_folder)
+                logger.info("Archived session context to %s", archive_folder)
+            except Exception as e:
+                logger.warning("Failed to archive session context: %s", e)
+                archive_failures.append("session")
+
+            # Archive ALL patient contexts from registry
+            for patient_id in all_patient_ids:
+                try:
+                    await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, patient_id, archive_folder)
+                    logger.info("Archived patient context for %s to %s", patient_id, archive_folder)
+                except Exception as e:
+                    logger.warning("Failed to archive patient context for %s: %s", patient_id, e)
+                    archive_failures.append(patient_id)
+
+            # Archive patient registry
+            if self.registry_accessor:
+                try:
+                    await self.registry_accessor.archive_registry(chat_ctx.conversation_id)
+                    logger.info("Archived patient registry for %s", chat_ctx.conversation_id)
+                except Exception as e:
+                    logger.warning("Failed to archive patient registry: %s", e)
+                    archive_failures.append("registry")
+
+            # Report archive status
+            if archive_failures:
+                logger.warning("Some archives failed: %s", archive_failures)
+            else:
+                logger.info("Successfully archived all contexts to %s", archive_folder)
+
+        except Exception as e:
+            logger.error("Critical failure during archive process: %s", e)
+
+        # Clear memory only after archival attempt (even if some failed)
+        chat_ctx.patient_id = None
+        chat_ctx.patient_contexts.clear()
+        chat_ctx.chat_history.messages.clear()
+        self._remove_system_message(chat_ctx)
+
+        logger.info("Archival complete - memory cleared for fresh start")
 
-    def _activate_patient(self, patient_id: str, chat_ctx: ChatContext) -> Decision:
+    async def _activate_patient_with_registry(self, patient_id: str, chat_ctx: ChatContext) -> Decision:
+        """Activate patient and load from registry if available."""
         if not patient_id:
-            return "NONE"
+            return "NEEDS_PATIENT_ID"
 
         # Same patient
         if patient_id == chat_ctx.patient_id:
             return "UNCHANGED"
 
-        # Switch to existing
+        # Kernel reset when switching patients
+        if chat_ctx.patient_id and patient_id != chat_ctx.patient_id:
+            logger.info(f"Resetting kernel for patient switch: {chat_ctx.patient_id} -> {patient_id}")
+            self.analyzer.reset_kernel()
+
+        # Load registry metadata for all patients
+        if self.registry_accessor:
+            try:
+                patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
+                if patient_registry:
+                    # Load metadata for all patients from registry
+                    for pid, registry_entry in patient_registry.items():
+                        if pid not in chat_ctx.patient_contexts:
+                            chat_ctx.patient_contexts[pid] = PatientContext(
+                                patient_id=pid,
+                                facts=registry_entry.get("facts", {})
+                            )
+            except Exception as e:
+                logger.warning(f"Failed to load patient registry: {e}")
+
+        # Check if we have registry data for this patient
+        if self.registry_accessor:
+            try:
+                patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
+                if patient_id in patient_registry:
+                    registry_entry = patient_registry[patient_id]
+                    if patient_id not in chat_ctx.patient_contexts:
+                        chat_ctx.patient_contexts[patient_id] = PatientContext(
+                            patient_id=patient_id,
+                            facts=registry_entry.get("facts", {})
+                        )
+
+                    chat_ctx.patient_id = patient_id
+
+                    # Load isolated chat history for this patient
+                    if self.context_accessor:
+                        try:
+                            restored_chat_ctx = await self.context_accessor.read(chat_ctx.conversation_id, patient_id)
+                            if restored_chat_ctx and hasattr(restored_chat_ctx, 'chat_history'):
+                                # Clear current history and load patient-specific history
+                                chat_ctx.chat_history.messages.clear()
+                                chat_ctx.chat_history.messages.extend(restored_chat_ctx.chat_history.messages)
+                                logger.info(f"Loaded isolated chat history for: {patient_id}")
+                        except Exception as e:
+                            logger.warning(f"Failed to load patient-specific chat history: {e}")
+
+                    logger.info(f"Switched to existing patient from registry: {patient_id}")
+                    # CRITICAL: Update registry to mark this patient as currently active
+                    await self._update_registry_storage(chat_ctx)
+
+                    return "SWITCH_EXISTING"
+            except Exception as e:
+                logger.warning(f"Failed to check registry for {patient_id}: {e}")
+
+        # Switch to existing in memory - PRESERVE CHAT HISTORY
         if patient_id in chat_ctx.patient_contexts:
             chat_ctx.patient_id = patient_id
-            logger.info(f"Switched to existing patient: {patient_id}")
+            logger.info(f"Switched to existing patient (preserving chat history): {patient_id}")
+            # Update registry when switching to existing patient
+            await self._update_registry_storage(chat_ctx)
             return "SWITCH_EXISTING"
 
-        # New blank patient context
+        # New blank patient context - PRESERVE CHAT HISTORY
         chat_ctx.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
         chat_ctx.patient_id = patient_id
-        logger.info(f"Created new patient context: {patient_id}")
+        logger.info(f"Created new patient context (preserving chat history): {patient_id}")
+
+        # CRITICAL: Update registry storage for new patient
+        await self._update_registry_storage(chat_ctx)
+
         return "NEW_BLANK"
 
-    def _clear(self, chat_ctx: ChatContext):
-        logger.info(f"Clearing patient context: {chat_ctx.patient_id}")
-        chat_ctx.patient_id = None  # retain historical contexts for potential reuse
+    async def _update_registry_storage(self, chat_ctx: ChatContext):
+        """Update registry storage for current patient."""
+        if not self.registry_accessor or not chat_ctx.patient_id:
+            return
+
+        current_patient = chat_ctx.patient_contexts.get(chat_ctx.patient_id)
+        if not current_patient:
+            logger.warning(f"No patient context found for {chat_ctx.patient_id}")
+            return
+
+        # Simple registry entry
+        registry_entry = {
+            "patient_id": chat_ctx.patient_id,
+            "facts": current_patient.facts,
+            "conversation_id": chat_ctx.conversation_id
+        }
+
+        try:
+            await self.registry_accessor.update_patient_registry(
+                chat_ctx.conversation_id,
+                chat_ctx.patient_id,
+                registry_entry,
+                chat_ctx.patient_id  # Set as active patient
+            )
+            logger.info(f"Updated registry storage for {chat_ctx.patient_id}")
+        except Exception as e:
+            logger.warning(f"Failed to update registry storage: {e}")
 
     def _remove_system_message(self, chat_ctx: ChatContext):
-        """
-        Removes only the system message(s) for the *currently active* patient.
-        This preserves the system messages from other patients, which act as crucial
-        boundaries for the conversation history slicing logic.
-        """
+        """Remove patient context system messages."""
         if not chat_ctx.patient_id:
-            # If there's no active patient, there's nothing to remove.
             return
 
         current_patient_id = chat_ctx.patient_id
@@ -170,55 +441,57 @@ def _remove_system_message(self, chat_ctx: ChatContext):
         removed_count = 0
 
         for m in chat_ctx.chat_history.messages:
-            if (
-                m.role == AuthorRole.SYSTEM
-                and isinstance(m.content, str)
-                and m.content.startswith(PATIENT_CONTEXT_PREFIX)
-            ):
-                try:
-                    # Extract patient_id from the message payload
-                    json_content = m.content[len(PATIENT_CONTEXT_PREFIX):].strip()
-                    payload = json.loads(json_content)
-                    message_patient_id = payload.get("patient_id")
-
-                    # If the message is for the current patient, we skip it (i.e., remove it)
-                    if message_patient_id == current_patient_id:
-                        removed_count += 1
-                        continue
-                except (json.JSONDecodeError, KeyError):
-                    # If parsing fails, keep the message to be safe
-                    pass
-
-            # Keep all other messages
+            if (m.role == AuthorRole.SYSTEM and m.items and len(m.items) > 0):
+                content_str = m.items[0].text if hasattr(m.items[0], 'text') else str(m.items[0])
+                if content_str.startswith(PATIENT_CONTEXT_PREFIX):
+                    try:
+                        json_content = content_str[len(PATIENT_CONTEXT_PREFIX):].strip()
+                        if json_content.startswith(":"):
+                            json_content = json_content[1:].strip()
+                        payload = json.loads(json_content)
+                        if payload.get("patient_id") == current_patient_id:
+                            removed_count += 1
+                            continue  # Skip this message (remove it)
+                    except Exception:
+                        pass  # Keep malformed messages
+
             messages_to_keep.append(m)
 
         if removed_count > 0:
-            logger.debug(
-                f"Removed {removed_count} prior context system message(s) for current patient '{current_patient_id}'.")
+            logger.debug(f"Removed {removed_count} system messages for {current_patient_id}")
 
         chat_ctx.chat_history.messages = messages_to_keep
 
-    def _ensure_system_message(self, chat_ctx: ChatContext, timing: TimingInfo,
-                               chat_summary: str | None = None,
-                               token_counts: dict | None = None):
-        self._remove_system_message(chat_ctx)
-
-        if not chat_ctx.patient_id:
-            return
-
-        # Simplified payload without agent tracking and chat excerpt
-        payload = {
-            "conversation_id": chat_ctx.conversation_id,
-            "patient_id": chat_ctx.patient_id,
-            "all_patient_ids": list(chat_ctx.patient_contexts.keys()),
-            "timing_sec": timing,
-            "chat_summary": chat_summary,
-            "token_counts": token_counts or {},
-        }
-
-        line = f"{PATIENT_CONTEXT_PREFIX} {json.dumps(payload, separators=(',', ':'))}"
-
-        system_message = ChatMessageContent(role=AuthorRole.SYSTEM, content=line)
-        chat_ctx.chat_history.messages.insert(0, system_message)
-
-        logger.debug(f"Added patient context system message for {chat_ctx.patient_id}")
+    async def _try_restore_specific_patient(self, patient_id: str, chat_ctx: ChatContext) -> bool:
+        """Try to restore specific patient from storage."""
+        # Try registry storage first
+        if self.registry_accessor:
+            try:
+                patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
+                if patient_id in patient_registry:
+                    registry_entry = patient_registry[patient_id]
+                    chat_ctx.patient_contexts[patient_id] = PatientContext(
+                        patient_id=patient_id,
+                        facts=registry_entry.get("facts", {})
+                    )
+                    logger.info(f"Restored {patient_id} from registry storage")
+                    return True
+            except Exception as e:
+                logger.warning(f"Failed to restore {patient_id} from registry: {e}")
+
+        # Try patient-specific context file
+        if self.context_accessor:
+            try:
+                stored_ctx = await self.context_accessor.read(chat_ctx.conversation_id, patient_id)
+                if stored_ctx and patient_id in stored_ctx.patient_contexts:
+                    stored_context = stored_ctx.patient_contexts[patient_id]
+                    chat_ctx.patient_contexts[patient_id] = PatientContext(
+                        patient_id=patient_id,
+                        facts=getattr(stored_context, 'facts', {})
+                    )
+                    logger.info(f"Restored {patient_id} from patient-specific context")
+                    return True
+            except Exception as e:
+                logger.warning(f"Failed to restore {patient_id} from context: {e}")
+
+        return False
diff --git a/teamsApp/manifest.json b/teamsApp/manifest.json
index 45f5695..078f82b 100644
--- a/teamsApp/manifest.json
+++ b/teamsApp/manifest.json
@@ -10,7 +10,7 @@
     "developer": {
         "name": "Microsoft",
         "mpnId": "",
-        "websiteUrl": "https://microsoft.comg",
+        "websiteUrl": "https://microsoft.com",
         "privacyUrl": "https://microsoft.com",
         "termsOfUseUrl": "https://microsoft.com"
     },
@@ -61,16 +61,6 @@
     },
     "authorization": {
         "permissions": {
-            "resourceSpecific": [
-                {
-                    "name": "ChatMessage.Send.Chat",
-                    "type": "Application"
-                },
-                {
-                    "name": "ChatMessage.Read.Chat",
-                    "type": "Application"
-                }
-            ]
         }
     },
     "activities": {
@@ -80,5 +70,9 @@
         "privateChannels",
         "sharedChannels"
     ],
-    "configurableTabs": []
+    "configurableTabs": [],
+    "webApplicationInfo": {
+        "id": "",
+        "resource": ""
+    }
 }
\ No newline at end of file

From f42ff3f16c22373f27e9bff5c3326052806d4b8b Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Thu, 18 Sep 2025 16:20:17 +0000
Subject: [PATCH 13/20] updated - align with main

---
 democlient/package.json                       |  2 +-
 docs/evaluation.md                            |  2 +-
 src/evaluation/chat_simulator.py              | 45 ++++++-------------
 src/magentic_chat.py                          |  2 +-
 .../default/config/healthcare_agents.yaml     |  2 +-
 .../default/tools/clinical_trials.py          |  2 +-
 .../tools/content_export/content_export.py    |  9 +++-
 src/scenarios/default/tools/patient_data.py   |  1 -
 src/utils/model_utils.py                      | 28 ++++++++++++
 9 files changed, 55 insertions(+), 38 deletions(-)
 create mode 100644 src/utils/model_utils.py

diff --git a/democlient/package.json b/democlient/package.json
index 3c1a032..61fd72b 100644
--- a/democlient/package.json
+++ b/democlient/package.json
@@ -24,7 +24,7 @@
     "@types/react-dom": "^18.2.0",
     "@types/uuid": "^10.0.0",
     "@vitejs/plugin-react": "^4.6.0",
-    "vite": "^6.3.5"
+    "vite": "^6.3.6"
   },
   "scripts": {
     "dev": "vite",
diff --git a/docs/evaluation.md b/docs/evaluation.md
index 4d2e0d4..0d716af 100644
--- a/docs/evaluation.md
+++ b/docs/evaluation.md
@@ -164,7 +164,7 @@ from evaluation.metrics.agent_selection import AgentSelectionEvaluator
 
 llm_service = AzureChatCompletion(
     deployment_name=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
-    api_version="2024-12-01-preview",
+    api_version="2025-04-01-preview",
     endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
 )
 
diff --git a/src/evaluation/chat_simulator.py b/src/evaluation/chat_simulator.py
index 829772c..e6ed44f 100644
--- a/src/evaluation/chat_simulator.py
+++ b/src/evaluation/chat_simulator.py
@@ -51,6 +51,11 @@ def setup(self, patient_id: str, initial_query: str, followup_questions: list[st
         self.followup_asked = False
 
     async def generate_user_message(self, chat_history: ChatHistory) -> str:
+        if not self.followup_asked and self.followup_questions:
+            self.followup_asked = True
+            if self.followup_questions:
+                next_question = self.followup_questions.pop(0)
+                return f"Orchestrator: {next_question}"
         if not self.followup_asked and self.followup_questions:
             self.followup_asked = True
             if self.followup_questions:
@@ -137,7 +142,7 @@ def _extract_new_messages(self, chat_history: ChatHistory) -> list[ChatMessageCo
             ChatHistory chat_history: The chat history containing messages from the user and agents.
 
         Returns:
-            list[ChatMessageContent]: List of new messages since the last user message.
+            str: String representation of the latest messages.
         """
         last_user_message_idx = -1
         for i, message in enumerate(chat_history.messages):
@@ -151,7 +156,7 @@ def _transform_chat_history(self, messages: list[ChatMessageContent]) -> str:
         Transforms the chat history into a format suitable for the LLM simulation.
 
         Args:
-            messages: List of chat messages to transform.
+            ChatHistory chat_history: The chat history containing messages from the user and agents.
 
         Returns:
             str: String representation of the chat history.
@@ -168,9 +173,6 @@ class ChatSimulator:
     """
     Class to simulate a chat with a group of agents.
 
-    Handles patient context isolation during simulation by ensuring proper
-    patient context setup and cleanup between simulation runs.
-
     Attributes:
         simulated_user: The simulated user to interact with the agents.
         group_chat_kwargs: Additional arguments for the group chat.
@@ -181,6 +183,7 @@ class ChatSimulator:
             Can be loaded from a CSV file with `load_initial_queries`.
         followup_questions: Optional list of follow-up questions for the simulation.
             Can be loaded from a CSV file with `load_initial_queries`.
+        group_followups: Whether to group follow-up questions by initial query.
         trial_count: Number of trials for each initial query.
         max_turns: Maximum number of turns in the conversation.
         output_folder_path: Path to the folder where chat history will be saved.
@@ -239,18 +242,7 @@ def setup_group_chat(self, chat_id: str, **kwargs) -> None:
             kwargs: Additional arguments to be passed to the group chat creation function.
         """
         if "chat_ctx" not in kwargs:
-            # Create chat context with patient context support
-            chat_context = ChatContext(chat_id)
-            # Extract patient ID from chat_id if it follows pattern
-            if "patient_" in chat_id:
-                parts = chat_id.split("_")
-                for i, part in enumerate(parts):
-                    if part == "patient" and i + 1 < len(parts):
-                        patient_id = f"patient_{parts[i + 1]}"
-                        chat_context.patient_id = patient_id
-                        logging.debug(f"Set patient context for simulation: {patient_id}")
-                        break
-            kwargs["chat_ctx"] = chat_context
+            kwargs["chat_ctx"] = ChatContext(chat_id)
         self.group_chat, self.chat_context = create_group_chat(**kwargs)
 
         return self
@@ -273,7 +265,6 @@ def load_initial_queries(
             initial_queries_column: Name of the column containing initial queries.
             followup_column: Name of the column containing follow-up questions.
             delimiter: Delimiter used in the CSV file (default is comma).
-            group_followups: Whether to group follow-up questions by initial query.
 
         Returns:
             self: Returns the instance for method chaining.
@@ -322,16 +313,14 @@ async def simulate_chats(self):
                         f"Setting up simulated user with initial query: {initial_query} and followups: {followup_questions}"
                     )
 
-                    # Create chat ID that includes patient information for context isolation
-                    chat_id = f"sim_{patient_id}_{trial}_{checkpoint_key[:8]}"
-                    self.setup_group_chat(chat_id, **self.group_chat_kwargs)
+                    self.setup_group_chat(checkpoint_key, **self.group_chat_kwargs)
 
                     await self.chat(patient_id, initial_query, followup_questions, self.max_turns)
                     self.save(f"chat_context_trial{trial}_{checkpoint_key}.json",
                               save_readable_history=self.save_readable_history)
                 except Exception as e:
                     logging.error(
-                        f"Error during conversation with initial query: {initial_query} and followup: {followup_questions[0] if followup_questions else 'None'}: {e}")
+                        f"Error during conversation with initial query: {initial_query} and followup: {followup_questions[0]}: {e}")
                     if self.raise_errors:
                         raise e
                     else:
@@ -368,7 +357,7 @@ async def chat(self, patient_id: str, initial_query: str, followup_questions: li
             try:
                 new_user_message = await self.simulated_user.generate_user_message(self.group_chat.history)
             except Exception as e:
-                logging.error(f"Error generating user message: {e}")
+                print(f"Error generating user message: {e}")
                 break
 
             if self.simulated_user.is_complete:
@@ -435,8 +424,6 @@ def save(self, output_filename: str = None, save_readable_history: bool = False)
             with open(readable_filename, 'w', encoding="utf-8") as f:
                 f.write(messages)
 
-        logging.info(f"Saved simulation results to {output_file_path}")
-
         return self
 
     def _print_message(self, message: ChatMessageContent):
@@ -462,7 +449,6 @@ def _process_csv_content(
             patients_id_column: Name of the column containing patient IDs.
             initial_queries_column: Name of the column containing initial queries.
             followup_column: Name of the column containing follow-up questions.
-            group_followups: Whether to group follow-up questions by initial query.
 
         Raises:
             ValueError: If the specified columns are not found in the CSV file.
@@ -471,7 +457,7 @@ def _process_csv_content(
             raise ValueError(f"Column '{initial_queries_column}' not found in the CSV file.")
 
         if patients_id_column not in reader.fieldnames:
-            raise ValueError(f"Column '{patients_id_column}' not found in the CSV file.")
+            raise ValueError(f"Columns '{patients_id_column}' not found in the CSV file.")
 
         followup_column_available = followup_column is not None and followup_column in reader.fieldnames
 
@@ -521,13 +507,10 @@ def _load_checkpoint(self) -> set:
 
     def _save_checkpoint(self, query: str):
         """Save a completed query to the checkpoint file."""
-        with open(self.checkpoint_file, "a", encoding="utf-8") as f:
+        with open(self.checkpoint_file, "w+", encoding="utf-8") as f:
             f.write(f"{query}\n")
 
     def _generate_chat_unique_id(self, patient_id: str, initial_query: str, followup_questions: list[str]) -> str:
         """Generate a unique ID for the chat based on patient ID, initial query, and follow-up questions."""
         return hashlib.sha256(
             f"{patient_id}{initial_query}{"".join(followup_questions)}{type(self.simulated_user).__name__}".encode()).hexdigest()
-
-
-logger = logging.getLogger(__name__)
diff --git a/src/magentic_chat.py b/src/magentic_chat.py
index 75f8651..fc5982f 100644
--- a/src/magentic_chat.py
+++ b/src/magentic_chat.py
@@ -25,7 +25,7 @@ def create_magentic_chat(chat: AgentGroupChat, app_context: AppContext, input_fu
     az_model_client = AzureOpenAIChatCompletionClient(
         azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
         model=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
-        api_version="2024-10-21",
+        api_version="2025-04-01-preview",
         azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
         azure_ad_token_provider=app_context.cognitive_services_token_provider,
     )
diff --git a/src/scenarios/default/config/healthcare_agents.yaml b/src/scenarios/default/config/healthcare_agents.yaml
index 099f8a6..a85be7b 100644
--- a/src/scenarios/default/config/healthcare_agents.yaml
+++ b/src/scenarios/default/config/healthcare_agents.yaml
@@ -41,7 +41,7 @@
       * End every reply with 'back to you: <RequestingAgent>'.
       * When you finish answering, end with **"back to you: *<RequestingAgent>*"**, where <RequestingAgent> is the agent that asked you (e.g., *PatientStatus* or *Orchestrator*).
 
-  temperature: 0.0
+  temperature: 0
   tools:
     - name: patient_data
   description: |
diff --git a/src/scenarios/default/tools/clinical_trials.py b/src/scenarios/default/tools/clinical_trials.py
index f889763..071e260 100644
--- a/src/scenarios/default/tools/clinical_trials.py
+++ b/src/scenarios/default/tools/clinical_trials.py
@@ -76,7 +76,7 @@ def __init__(self, kernel: Kernel, chat_ctx: ChatContext):
         self.chat_completion_service = AzureChatCompletion(
             service_id="reasoning-model",
             deployment_name=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME_REASONING_MODEL"],
-            api_version="2024-12-01-preview",
+            api_version="2025-04-01-preview",
             endpoint=os.environ["AZURE_OPENAI_REASONING_MODEL_ENDPOINT"],
         )
 
diff --git a/src/scenarios/default/tools/content_export/content_export.py b/src/scenarios/default/tools/content_export/content_export.py
index d82b8cd..7e835fe 100644
--- a/src/scenarios/default/tools/content_export/content_export.py
+++ b/src/scenarios/default/tools/content_export/content_export.py
@@ -24,6 +24,7 @@
 from data_models.plugin_configuration import PluginConfiguration
 from data_models.tumor_board_summary import ClinicalSummary, ClinicalTrial
 from routes.patient_data.patient_data_routes import get_chat_artifacts_url
+from utils.model_utils import model_supports_temperature
 
 from .timeline_image import create_timeline_images_by_height
 
@@ -204,7 +205,13 @@ async def _get_clinical_summary(self, patient_timeline: PatientTimeline, max_ent
 
         # Generate timeline
         # https://devblogs.microsoft.com/semantic-kernel/using-json-schema-for-structured-output-in-python-for-openai-models/
-        settings = AzureChatPromptExecutionSettings(temperature=0.0, response_format=ClinicalSummary)
+        if model_supports_temperature():
+            temperature = 0.0
+            logger.info(f"Using temperature setting: {temperature}")
+        else:
+            temperature = None
+            logger.info("Model does not support temperature setting")
+        settings = AzureChatPromptExecutionSettings(temperature=temperature, response_format=ClinicalSummary)
         chat_completion_service = self.kernel.get_service(service_id="default")
         chat_resp = await chat_completion_service.get_chat_message_content(chat_history=chat_history, settings=settings)
 
diff --git a/src/scenarios/default/tools/patient_data.py b/src/scenarios/default/tools/patient_data.py
index 6250d8c..0075875 100644
--- a/src/scenarios/default/tools/patient_data.py
+++ b/src/scenarios/default/tools/patient_data.py
@@ -214,6 +214,5 @@ async def process_prompt(self, patient_id: str, prompt: str) -> str:
     def _get_chat_prompt_exec_settings(response_format) -> AzureChatPromptExecutionSettings:
         return AzureChatPromptExecutionSettings(
             response_format=response_format,
-            temperature=0.0,
             seed=42
         )
diff --git a/src/utils/model_utils.py b/src/utils/model_utils.py
new file mode 100644
index 0000000..e1b03e8
--- /dev/null
+++ b/src/utils/model_utils.py
@@ -0,0 +1,28 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import os
+
+
+def model_supports_temperature() -> bool:
+    """
+    Check if the given model supports the temperature parameter.
+
+    Args:
+        model_name: checks AZURE_OPENAI_DEPLOYMENT_NAME from environment.
+
+    Returns:
+        bool: True if the model supports temperature, False if it's a non-temperature/reasoning model that doesn't.
+    """
+    non_temp_models = {"o1", "o1-mini", "o3", "o3-mini", "o3-pro",
+                       "o4-mini", "gpt-5", "gpt-5-mini", "gpt-5-nano", "DeepSeek-R1"}
+
+    model_name = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME", "")
+
+    # Check if the model name contains any of the non-temperature model names
+    model = model_name.lower()
+    for non_temp_model in non_temp_models:
+        if non_temp_model.lower() in model:
+            return False
+
+    return True

From 826b16cb2342a586ca1f4f130488c4f5cfb210fc Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Fri, 19 Sep 2025 01:59:25 +0000
Subject: [PATCH 14/20] updated

---
 src/group_chat.py | 96 +++++++++++------------------------------------
 1 file changed, 21 insertions(+), 75 deletions(-)

diff --git a/src/group_chat.py b/src/group_chat.py
index 7385b89..449e92d 100644
--- a/src/group_chat.py
+++ b/src/group_chat.py
@@ -4,13 +4,12 @@
 import importlib
 import logging
 import os
-from typing import Any, Awaitable, Callable, Tuple, override, override
+from typing import Any, Awaitable, Callable, Tuple, override
 
 from pydantic import BaseModel
 from semantic_kernel import Kernel
 from semantic_kernel.agents import AgentGroupChat, ChatCompletionAgent
 from semantic_kernel.agents.channels.chat_history_channel import ChatHistoryChannel
-from semantic_kernel.agents.channels.chat_history_channel import ChatHistoryChannel
 from semantic_kernel.agents.strategies.selection.kernel_function_selection_strategy import \
     KernelFunctionSelectionStrategy
 from semantic_kernel.agents.strategies.termination.kernel_function_termination_strategy import \
@@ -22,8 +21,6 @@
 from semantic_kernel.connectors.openapi_plugin import OpenAPIFunctionExecutionParameters
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.chat_history import ChatHistory
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
 from semantic_kernel.functions.kernel_function_from_prompt import KernelFunctionFromPrompt
 from semantic_kernel.kernel import Kernel, KernelArguments
@@ -91,53 +88,6 @@ async def auth_callback():
         return {'conversation-id': chat_ctx.conversation_id}
     return auth_callback
 
-# Need to introduce a CustomChatCompletionAgent and a CustomHistoryChannel because of issue https://github.com/microsoft/semantic-kernel/issues/12095
-
-
-class CustomHistoryChannel(ChatHistoryChannel):
-    @override
-    async def receive(self, history: list[ChatMessageContent],) -> None:
-        await super().receive(history)
-
-        for message in history[:-1]:
-            await self.thread.on_new_message(message)
-
-
-async def create_channel(
-    self, chat_history: ChatHistory | None = None, thread_id: str | None = None
-) -> CustomHistoryChannel:
-    """Create a ChatHistoryChannel.
-
-    Args:
-        chat_history: The chat history for the channel. If None, a new ChatHistory instance will be created.
-        thread_id: The ID of the thread. If None, a new thread will be created.
-
-    Returns:
-        An instance of AgentChannel.
-    """
-    from semantic_kernel.agents.chat_completion.chat_completion_agent import ChatHistoryAgentThread
-
-    CustomHistoryChannel.model_rebuild()
-
-    thread = ChatHistoryAgentThread(chat_history=chat_history, thread_id=thread_id)
-
-    if thread.id is None:
-        await thread.create()
-
-    messages = [message async for message in thread.get_messages()]
-
-    return CustomHistoryChannel(messages=messages, thread=thread)
-
-
-class CustomChatCompletionAgent(ChatCompletionAgent):
-    """Custom ChatCompletionAgent to override the create_channel method."""
-
-    @override
-    async def create_channel(
-        self, chat_history: ChatHistory | None = None, thread_id: str | None = None
-    ) -> CustomHistoryChannel:
-        return await create_channel(self, chat_history, thread_id)
-
 
 def inject_workflow_summary(chat_ctx: ChatContext) -> None:
     """Inject workflow summary if available."""
@@ -158,7 +108,7 @@ def inject_workflow_summary(chat_ctx: ChatContext) -> None:
             items=[TextContent(text=f"WORKFLOW_SUMMARY: {chat_ctx.workflow_summary}")]
         )
         chat_ctx.chat_history.messages.insert(1, summary_message)
-        logger.info(f"Injected workflow summary for patient {chat_ctx.patient_id}")
+        logger.info("Injected workflow summary for patient %s", chat_ctx.patient_id)
 
 
 async def generate_workflow_summary(
@@ -242,7 +192,7 @@ async def generate_workflow_summary(
             try:
                 workflow = WorkflowSummary.model_validate_json(content)
             except Exception as e:
-                logger.error(f"Failed to parse workflow summary: {e}")
+                logger.error("Failed to parse workflow summary: %s", e)
                 # Return fallback
                 from data_models.patient_context_models import WorkflowStep
                 return WorkflowSummary(
@@ -256,7 +206,7 @@ async def generate_workflow_summary(
             try:
                 workflow = WorkflowSummary.model_validate(content)
             except Exception as e:
-                logger.error(f"Failed to validate workflow summary: {e}")
+                logger.error("Failed to validate workflow summary: %s", e)
                 from data_models.patient_context_models import WorkflowStep
                 return WorkflowSummary(
                     patient_id=patient_id,
@@ -266,7 +216,7 @@ async def generate_workflow_summary(
                     reasoning=f"Validation error: {str(e)[:30]}..."
                 )
         else:
-            logger.warning(f"Unexpected workflow response type: {type(content)}")
+            logger.warning("Unexpected workflow response type: %s", type(content))
             from data_models.patient_context_models import WorkflowStep
             return WorkflowSummary(
                 patient_id=patient_id,
@@ -276,11 +226,11 @@ async def generate_workflow_summary(
                 reasoning="Unexpected response format"
             )
 
-        logger.info(f"Generated workflow summary with {len(workflow.steps)} steps for patient {patient_id}")
+        logger.info("Generated workflow summary with %d steps for patient %s", len(workflow.steps), patient_id)
         return workflow
 
     except Exception as e:
-        logger.error(f"Workflow summary generation failed: {e}")
+        logger.error("Workflow summary generation failed: %s", e)
         from data_models.patient_context_models import WorkflowStep
         return WorkflowSummary(
             patient_id=patient_id,
@@ -307,7 +257,7 @@ def create_group_chat(
     """
     participant_configs = participants or app_ctx.all_agent_configs
     participant_names = [cfg.get("name") for cfg in participant_configs]
-    logger.info(f"Creating group chat with participants: {participant_names}")
+    logger.info("Creating group chat with participants: %s", participant_names)
 
     # Inject workflow summary before creating agents
     inject_workflow_summary(chat_ctx)
@@ -371,18 +321,19 @@ def _create_agent(agent_config: dict):
 
         if model_supports_temperature():
             temperature = agent_config.get("temperature", DEFAULT_MODEL_TEMP)
-            logger.info(f"Setting model temperature for agent {agent_config['name']} to {temperature}")
+            logger.info("Setting model temperature for agent %s to %s", agent_config['name'], temperature)
         else:
             temperature = None
-            logger.info(
-                f"Model does not support temperature. Setting temperature to None for agent {agent_config['name']}")
+            logger.info("Model does not support temperature. Setting temperature to None for agent %s",
+                        agent_config['name'])
+
         settings = AzureChatPromptExecutionSettings(
             function_choice_behavior=FunctionChoiceBehavior.Auto(), seed=42, temperature=temperature)
         arguments = KernelArguments(settings=settings)
         instructions = agent_config.get("instructions")
         if agent_config.get("facilitator") and instructions:
             instructions = instructions.replace(
-                "{{aiAgents}}", "\n\t\t".join([f"- {agent['name']}: {agent["description"]}" for agent in all_agents_config]))
+                "{{aiAgents}}", "\n\t\t".join([f"- {agent['name']}: {agent['description']}" for agent in all_agents_config]))
 
         return (CustomChatCompletionAgent(kernel=agent_kernel,
                                           name=agent_config["name"],
@@ -393,13 +344,8 @@ def _create_agent(agent_config: dict):
                                 chat_ctx=chat_ctx,
                                 app_ctx=app_ctx))
 
-    if model_supports_temperature():
-        settings = AzureChatPromptExecutionSettings(
-            function_choice_behavior=FunctionChoiceBehavior.Auto(), seed=42, temperature=0, response_format=ChatRule)
-    else:
-        settings = AzureChatPromptExecutionSettings(
-            function_choice_behavior=FunctionChoiceBehavior.Auto(), seed=42, response_format=ChatRule)
-    arguments = KernelArguments(settings=settings)
+    # Create kernel for orchestrator functions (THIS WAS MISSING!)
+    orchestrator_kernel = _create_kernel_with_chat_completion()
 
     # Find facilitator agent
     facilitator_agent = next((agent for agent in all_agents_config if agent.get("facilitator")), all_agents_config[0])
@@ -433,7 +379,7 @@ async def create_workflow_summary_if_needed():
 
             # Store workflow summary in chat context
             chat_ctx.workflow_summary = workflow.model_dump_json()
-            logger.info(f"Generated new workflow summary for patient {chat_ctx.patient_id}")
+            logger.info("Generated new workflow summary for patient %s", chat_ctx.patient_id)
 
     selection_function = KernelFunctionFromPrompt(
         function_name="selection",
@@ -510,10 +456,10 @@ def evaluate_termination(result):
         try:
             rule = ChatRule.model_validate_json(str(result.value[0]))
             should_terminate = rule.verdict == "yes"
-            logger.debug(f"Termination decision: {should_terminate} | Reasoning: {rule.reasoning}")
+            logger.debug("Termination decision: %s | Reasoning: %s", should_terminate, rule.reasoning)
             return should_terminate
         except Exception as e:
-            logger.error(f"Termination function error: {e}")
+            logger.error("Termination function error: %s", e)
             return False  # Fallback to continue conversation
 
     def evaluate_selection(result):
@@ -522,10 +468,10 @@ def evaluate_selection(result):
             rule = ChatRule.model_validate_json(str(result.value[0]))
             selected_agent = rule.verdict if rule.verdict in [agent["name"]
                                                               for agent in all_agents_config] else facilitator
-            logger.debug(f"Selected agent: {selected_agent} | Reasoning: {rule.reasoning}")
+            logger.debug("Selected agent: %s | Reasoning: %s", selected_agent, rule.reasoning)
             return selected_agent
         except Exception as e:
-            logger.error(f"Selection function error: {e}")
+            logger.error("Selection function error: %s", e)
             return facilitator  # Fallback to facilitator
 
     chat = AgentGroupChat(
@@ -557,5 +503,5 @@ def evaluate_selection(result):
         ),
     )
 
-    logger.info(f"Group chat created successfully with {len(agents)} agents")
+    logger.info("Group chat created successfully with %d agents", len(agents))
     return (chat, chat_ctx)

From ffad20fb0bf6b4af0a35a1592ae4b9ca62d9d53a Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Sat, 20 Sep 2025 21:57:58 +0000
Subject: [PATCH 15/20] updated - made some changes to patient context storage

---
 src/data_models/chat_context_accessor.py |  39 ++---
 src/services/patient_context_service.py  | 208 ++++++++++++-----------
 2 files changed, 120 insertions(+), 127 deletions(-)

diff --git a/src/data_models/chat_context_accessor.py b/src/data_models/chat_context_accessor.py
index ae26211..1253e8d 100644
--- a/src/data_models/chat_context_accessor.py
+++ b/src/data_models/chat_context_accessor.py
@@ -195,19 +195,13 @@ def serialize(chat_ctx: ChatContext) -> str:
                 "name": getattr(msg, 'name', None)
             })
 
-        # Build patient contexts
-        patient_contexts = {}
-        for pid, pctx in chat_ctx.patient_contexts.items():
-            patient_contexts[pid] = {
-                "patient_id": pctx.patient_id,
-                "facts": pctx.facts,
-            }
+        # REMOVED: patient_contexts serialization - use registry instead!
 
         data = {
             "schema_version": CURRENT_SCHEMA_VERSION,
             "conversation_id": chat_ctx.conversation_id,
             "patient_id": chat_ctx.patient_id,
-            "patient_contexts": patient_contexts,
+            # REMOVED: "patient_contexts": patient_contexts,
             "workflow_summary": getattr(chat_ctx, 'workflow_summary', None),
             "chat_history": chat_messages,
             "patient_data": chat_ctx.patient_data,
@@ -223,43 +217,37 @@ def serialize(chat_ctx: ChatContext) -> str:
     def deserialize(data_str: str) -> ChatContext:
         """Deserialize chat context from JSON with migration support."""
         data = json.loads(data_str)
-        schema_version = data.get("schema_version", 1)  # Default to v1 for legacy files
+        schema_version = data.get("schema_version", 1)
 
         context = ChatContext(data["conversation_id"])
         context.patient_id = data.get("patient_id")
 
-        # Restore patient contexts
-        for pid, pc_data in data.get("patient_contexts", {}).items():
-            context.patient_contexts[pid] = PatientContext(
-                patient_id=pc_data["patient_id"],
-                facts=pc_data.get("facts", {}),
-            )
+        # REMOVED: patient_contexts restoration - load from registry instead!
+        # Legacy support for old files that still have patient_contexts
+        if "patient_contexts" in data:
+            logger.info("Found legacy patient_contexts in context file - consider migrating to registry-only")
 
         context.workflow_summary = data.get("workflow_summary")
 
-        # Process chat history with migration support
+        # Process chat history (unchanged)
         for msg_data in data.get("chat_history", []):
-            # Skip messages with insufficient data
             if "role" not in msg_data:
-                logger.warning(f"Skipping message with no role: {msg_data.keys()}")
+                logger.warning("Skipping message with no role: %s", msg_data.keys())
                 continue
 
             role = AuthorRole(msg_data["role"])
             name = msg_data.get("name")
 
-            # Handle both legacy (v1) and new (v2) formats
             if "content" in msg_data:
                 content_str = msg_data["content"]
             elif "items" in msg_data and msg_data["items"]:
-                # Legacy v1 format with items array
                 content_str = msg_data["items"][0].get("text", "")
             else:
-                logger.warning(f"Skipping message with no content: {msg_data}")
+                logger.warning("Skipping message with no content: %s", msg_data)
                 continue
 
-            # Skip tool messages with empty content (can't reconstruct)
             if role == AuthorRole.TOOL and not content_str:
-                logger.warning(f"Skipping empty tool message")
+                logger.warning("Skipping empty tool message")
                 continue
 
             msg = ChatMessageContent(
@@ -270,7 +258,7 @@ def deserialize(data_str: str) -> ChatContext:
                 msg.name = name
             context.chat_history.messages.append(msg)
 
-        # Restore other fields
+        # Restore other fields (unchanged)
         context.patient_data = data.get("patient_data", [])
         context.display_blob_urls = data.get("display_blob_urls", [])
         context.display_image_urls = data.get("display_image_urls", [])
@@ -278,8 +266,7 @@ def deserialize(data_str: str) -> ChatContext:
         context.output_data = data.get("output_data", [])
         context.healthcare_agents = data.get("healthcare_agents", {})
 
-        # Log migration info
         if schema_version < CURRENT_SCHEMA_VERSION:
-            logger.info(f"Migrated context from schema v{schema_version} to v{CURRENT_SCHEMA_VERSION}")
+            logger.info("Migrated context from schema v%s to v%s", schema_version, CURRENT_SCHEMA_VERSION)
 
         return context
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
index 81164f4..6d468f2 100644
--- a/src/services/patient_context_service.py
+++ b/src/services/patient_context_service.py
@@ -2,7 +2,7 @@
 import logging
 import re
 import time
-from datetime import datetime, timezone  # Add timezone here
+from datetime import datetime, timezone
 from typing import Literal
 
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
@@ -23,27 +23,50 @@
 
 class PatientContextService:
     """
-    Simplified patient context manager:
-    1. Use analyzer to detect explicit patient IDs
-    2. Fall back to storage if analyzer returns NONE
-    3. Simple file-based patient isolation
-    4. Kernel reset on patient switches
+    Registry-based patient context manager:
+    1. Patient registry is the single source of truth for patient metadata
+    2. Use analyzer to detect explicit patient IDs
+    3. Fall back to storage if analyzer returns NONE
+    4. Simple file-based patient isolation for chat history
+    5. Kernel reset on patient switches
     """
 
     def __init__(self, analyzer: PatientContextAnalyzer, registry_accessor=None, context_accessor=None):
         self.analyzer = analyzer
         self.registry_accessor = registry_accessor
         self.context_accessor = context_accessor
-        logger.info(f"PatientContextService initialized with storage fallback: {registry_accessor is not None}")
+        logger.info("PatientContextService initialized with storage fallback: %s", registry_accessor is not None)
+
+    async def _ensure_patient_contexts_from_registry(self, chat_ctx: ChatContext):
+        """Ensure patient_contexts is populated from registry (single source of truth)."""
+        if not self.registry_accessor:
+            return
+
+        try:
+            patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
+            if patient_registry:
+                # Clear and rebuild from registry
+                chat_ctx.patient_contexts.clear()
+                for patient_id, registry_entry in patient_registry.items():
+                    chat_ctx.patient_contexts[patient_id] = PatientContext(
+                        patient_id=patient_id,
+                        facts=registry_entry.get("facts", {})
+                    )
+                logger.debug("Loaded %d patients from registry", len(patient_registry))
+        except Exception as e:
+            logger.warning("Failed to load patient contexts from registry: %s", e)
 
     async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple[Decision, TimingInfo]:
         service_start_time = time.time()
 
+        # FIRST: Ensure we have latest patient contexts from registry
+        await self._ensure_patient_contexts_from_registry(chat_ctx)
+
         # Skip analyzer for very short messages that are likely agent handoffs
         if user_text and len(user_text.strip()) <= 15 and not any(
             word in user_text.lower() for word in ["patient", "clear", "switch"]
         ):
-            logger.info(f"Skipping analyzer for short handoff message: '{user_text}'")
+            logger.info("Skipping analyzer for short handoff message: '%s'", user_text)
 
             if not chat_ctx.patient_id:
                 fallback_start = time.time()
@@ -61,7 +84,7 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
             )
             return decision, timing
 
-        logger.info(f"Patient context decision for '{user_text}' | Current patient: {chat_ctx.patient_id}")
+        logger.info("Patient context decision for '%s' | Current patient: %s", user_text, chat_ctx.patient_id)
 
         # STEP 1: Run the analyzer with structured output
         decision_model, analyzer_duration = await self.analyzer.analyze_with_timing(
@@ -74,8 +97,8 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
         pid = decision_model.patient_id
 
         logger.info(
-            f"Analyzer decision: {action} | Patient ID: {pid} | "
-            f"Reasoning: {decision_model.reasoning}"
+            "Analyzer decision: %s | Patient ID: %s | Reasoning: %s",
+            action, pid, decision_model.reasoning
         )
 
         # STEP 2: Handle analyzer results
@@ -92,7 +115,7 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
 
         elif action in ("ACTIVATE_NEW", "SWITCH_EXISTING"):
             if not pid or not PATIENT_ID_PATTERN.match(pid):
-                logger.warning(f"Invalid patient ID from analyzer: {pid}")
+                logger.warning("Invalid patient ID from analyzer: %s", pid)
                 decision = "NEEDS_PATIENT_ID"
             else:
                 decision = await self._activate_patient_with_registry(pid, chat_ctx)
@@ -124,18 +147,21 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
         return decision, timing
 
     async def set_explicit_patient_context(self, patient_id: str, chat_ctx: ChatContext) -> bool:
+        # Ensure we have latest patient contexts from registry
+        await self._ensure_patient_contexts_from_registry(chat_ctx)
+
         if not patient_id or not PATIENT_ID_PATTERN.match(patient_id):
-            logger.warning(f"Invalid patient ID format: {patient_id}")
+            logger.warning("Invalid patient ID format: %s", patient_id)
             return False
 
         if chat_ctx.patient_id and patient_id != chat_ctx.patient_id:
-            logger.info(f"Resetting kernel for explicit patient switch: {chat_ctx.patient_id} -> {patient_id}")
+            logger.info("Resetting kernel for explicit patient switch: %s -> %s", chat_ctx.patient_id, patient_id)
             self.analyzer.reset_kernel()
 
         restored = await self._try_restore_specific_patient(patient_id, chat_ctx)
         if not restored:
             chat_ctx.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
-            logger.info(f"Created new patient context: {patient_id}")
+            logger.info("Created new patient context: %s", patient_id)
 
         chat_ctx.patient_id = patient_id
         timing = TimingInfo(analyzer=0.0, storage_fallback=0.0, service=0.0)
@@ -145,7 +171,7 @@ async def set_explicit_patient_context(self, patient_id: str, chat_ctx: ChatCont
             try:
                 await self._update_registry_storage(chat_ctx)
             except Exception as e:
-                logger.warning(f"Failed to update registry storage: {e}")
+                logger.warning("Failed to update registry storage: %s", e)
 
         return True
 
@@ -156,16 +182,21 @@ async def _ensure_system_message(self, chat_ctx: ChatContext, timing: TimingInfo
         if not chat_ctx.patient_id:
             return
 
-        # Get all session patients from registry
-        all_patient_ids = list(chat_ctx.patient_contexts.keys())
+        # Get all session patients from registry (single source of truth)
+        all_patient_ids = []
         if self.registry_accessor:
             try:
                 patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
                 if patient_registry:
                     all_patient_ids = list(patient_registry.keys())
-                    logger.debug(f"Using patient registry for system message: {all_patient_ids}")
+                    logger.debug("Using patient registry for system message: %s", all_patient_ids)
             except Exception as e:
-                logger.warning(f"Failed to read patient registry for system message: {e}")
+                logger.warning("Failed to read patient registry for system message: %s", e)
+                # Fallback to in-memory contexts
+                all_patient_ids = list(chat_ctx.patient_contexts.keys())
+        else:
+            # Fallback to in-memory contexts
+            all_patient_ids = list(chat_ctx.patient_contexts.keys())
 
         # Use structured model for system message
         payload = PatientContextSystemMessage(
@@ -175,21 +206,23 @@ async def _ensure_system_message(self, chat_ctx: ChatContext, timing: TimingInfo
             timing_sec=timing,
         )
 
-        # Fix: Remove separators parameter - Pydantic doesn't support it
-        line = f"{PATIENT_CONTEXT_PREFIX}: {payload.model_dump_json()}"
+        line = "%s: %s" % (PATIENT_CONTEXT_PREFIX, payload.model_dump_json())
         system_message = ChatMessageContent(
             role=AuthorRole.SYSTEM,
             items=[TextContent(text=line)]
         )
         chat_ctx.chat_history.messages.insert(0, system_message)
         logger.debug(
-            f"Added structured patient context system message for {chat_ctx.patient_id} "
-            f"with {len(all_patient_ids)} session patients"
+            "Added structured patient context system message for %s with %d session patients",
+            chat_ctx.patient_id, len(all_patient_ids)
         )
 
     async def _try_restore_from_storage(self, chat_ctx: ChatContext) -> bool:
         """Try to restore patient context from storage files."""
-        logger.info(f"Attempting storage fallback for conversation: {chat_ctx.conversation_id}")
+        logger.info("Attempting storage fallback for conversation: %s", chat_ctx.conversation_id)
+
+        # Load latest patient contexts from registry
+        await self._ensure_patient_contexts_from_registry(chat_ctx)
 
         # Priority 1: Check patient registry file (session registry)
         if self.registry_accessor:
@@ -197,15 +230,7 @@ async def _try_restore_from_storage(self, chat_ctx: ChatContext) -> bool:
                 patient_registry, active_patient_id = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
 
                 if patient_registry and active_patient_id:
-                    logger.info(f"Found {len(patient_registry)} patients. Active: {active_patient_id}")
-
-                    # Restore all patient metadata from registry
-                    for patient_id, registry_entry in patient_registry.items():
-                        chat_ctx.patient_contexts[patient_id] = PatientContext(
-                            patient_id=patient_id,
-                            facts=registry_entry.get("facts", {})
-                        )
-                        logger.info(f"Restored patient {patient_id} metadata")
+                    logger.info("Found %d patients. Active: %s", len(patient_registry), active_patient_id)
 
                     # Set active patient and load their isolated chat history
                     if active_patient_id in patient_registry:
@@ -219,33 +244,31 @@ async def _try_restore_from_storage(self, chat_ctx: ChatContext) -> bool:
                                     # Clear current history and load patient-specific history
                                     chat_ctx.chat_history.messages.clear()
                                     chat_ctx.chat_history.messages.extend(restored_chat_ctx.chat_history.messages)
-                                    logger.info(f"Loaded isolated chat history for: {active_patient_id}")
+                                    logger.info("Loaded isolated chat history for: %s", active_patient_id)
                             except Exception as e:
-                                logger.warning(f"Failed to load patient-specific chat history: {e}")
+                                logger.warning("Failed to load patient-specific chat history: %s", e)
 
-                        logger.info(f"Restored active patient: {active_patient_id}")
+                        logger.info("Restored active patient: %s", active_patient_id)
                         return True
             except Exception as e:
-                logger.warning(f"Failed to read patient registry: {e}")
+                logger.warning("Failed to read patient registry: %s", e)
 
-        # Priority 2: Check session context
+        # Priority 2: Check session context (legacy fallback)
         if self.context_accessor:
             try:
                 restored_ctx = await self.context_accessor.read(chat_ctx.conversation_id)
                 if restored_ctx and restored_ctx.patient_id:
                     chat_ctx.patient_id = restored_ctx.patient_id
-                    chat_ctx.patient_contexts = restored_ctx.patient_contexts or {}
+                    # Note: Don't restore patient_contexts from file - use registry only
                     chat_ctx.chat_history = restored_ctx.chat_history or chat_ctx.chat_history
-                    logger.info(f"Restored session context: {restored_ctx.patient_id}")
+                    logger.info("Restored session context: %s", restored_ctx.patient_id)
                     return True
             except Exception as e:
-                logger.warning(f"Failed to read session context: {e}")
+                logger.warning("Failed to read session context: %s", e)
 
         logger.info("No patient context found in storage")
         return False
 
-# Replace the incomplete _archive_all_and_recreate method with this complete implementation:
-
     async def _archive_all_and_recreate(self, chat_ctx: ChatContext) -> None:
         """Archive all files to blob storage and recreate fresh files."""
         logger.info("Archiving all contexts to blob storage for conversation: %s", chat_ctx.conversation_id)
@@ -257,20 +280,20 @@ async def _archive_all_and_recreate(self, chat_ctx: ChatContext) -> None:
 
         archive_failures = []
 
-        # Get ALL patients from registry
-        all_patient_ids = list(chat_ctx.patient_contexts.keys())
-
-        # Try to get the complete list from the patient registry
+        # Get ALL patients from registry (single source of truth)
+        all_patient_ids = []
         if self.registry_accessor:
             try:
                 patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
                 if patient_registry:
                     all_patient_ids = list(patient_registry.keys())
-                    logger.info("Found %s patients in registry to archive: %s", len(all_patient_ids), all_patient_ids)
+                    logger.info("Found %d patients in registry to archive: %s", len(all_patient_ids), all_patient_ids)
                 else:
                     logger.warning("No patient registry found for archival")
             except Exception as e:
                 logger.warning("Failed to read patient registry for archival: %s", e)
+                # Fallback to in-memory contexts
+                all_patient_ids = list(chat_ctx.patient_contexts.keys())
 
         # Create timestamped archive folder
         timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S-%f")
@@ -280,21 +303,22 @@ async def _archive_all_and_recreate(self, chat_ctx: ChatContext) -> None:
             logger.info("Starting archive to folder: %s", archive_folder)
 
             # Archive session context (main conversation)
-            try:
-                await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, None, archive_folder)
-                logger.info("Archived session context to %s", archive_folder)
-            except Exception as e:
-                logger.warning("Failed to archive session context: %s", e)
-                archive_failures.append("session")
-
-            # Archive ALL patient contexts from registry
-            for patient_id in all_patient_ids:
+            if self.context_accessor:
                 try:
-                    await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, patient_id, archive_folder)
-                    logger.info("Archived patient context for %s to %s", patient_id, archive_folder)
+                    await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, None, archive_folder)
+                    logger.info("Archived session context to %s", archive_folder)
                 except Exception as e:
-                    logger.warning("Failed to archive patient context for %s: %s", patient_id, e)
-                    archive_failures.append(patient_id)
+                    logger.warning("Failed to archive session context: %s", e)
+                    archive_failures.append("session")
+
+                # Archive ALL patient contexts from registry
+                for patient_id in all_patient_ids:
+                    try:
+                        await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, patient_id, archive_folder)
+                        logger.info("Archived patient context for %s to %s", patient_id, archive_folder)
+                    except Exception as e:
+                        logger.warning("Failed to archive patient context for %s: %s", patient_id, e)
+                        archive_failures.append(patient_id)
 
             # Archive patient registry
             if self.registry_accessor:
@@ -333,36 +357,18 @@ async def _activate_patient_with_registry(self, patient_id: str, chat_ctx: ChatC
 
         # Kernel reset when switching patients
         if chat_ctx.patient_id and patient_id != chat_ctx.patient_id:
-            logger.info(f"Resetting kernel for patient switch: {chat_ctx.patient_id} -> {patient_id}")
+            logger.info("Resetting kernel for patient switch: %s -> %s", chat_ctx.patient_id, patient_id)
             self.analyzer.reset_kernel()
 
-        # Load registry metadata for all patients
-        if self.registry_accessor:
-            try:
-                patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
-                if patient_registry:
-                    # Load metadata for all patients from registry
-                    for pid, registry_entry in patient_registry.items():
-                        if pid not in chat_ctx.patient_contexts:
-                            chat_ctx.patient_contexts[pid] = PatientContext(
-                                patient_id=pid,
-                                facts=registry_entry.get("facts", {})
-                            )
-            except Exception as e:
-                logger.warning(f"Failed to load patient registry: {e}")
+        # Ensure we have latest registry data
+        await self._ensure_patient_contexts_from_registry(chat_ctx)
 
         # Check if we have registry data for this patient
         if self.registry_accessor:
             try:
                 patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
                 if patient_id in patient_registry:
-                    registry_entry = patient_registry[patient_id]
-                    if patient_id not in chat_ctx.patient_contexts:
-                        chat_ctx.patient_contexts[patient_id] = PatientContext(
-                            patient_id=patient_id,
-                            facts=registry_entry.get("facts", {})
-                        )
-
+                    # Patient exists in registry
                     chat_ctx.patient_id = patient_id
 
                     # Load isolated chat history for this patient
@@ -373,22 +379,22 @@ async def _activate_patient_with_registry(self, patient_id: str, chat_ctx: ChatC
                                 # Clear current history and load patient-specific history
                                 chat_ctx.chat_history.messages.clear()
                                 chat_ctx.chat_history.messages.extend(restored_chat_ctx.chat_history.messages)
-                                logger.info(f"Loaded isolated chat history for: {patient_id}")
+                                logger.info("Loaded isolated chat history for: %s", patient_id)
                         except Exception as e:
-                            logger.warning(f"Failed to load patient-specific chat history: {e}")
+                            logger.warning("Failed to load patient-specific chat history: %s", e)
 
-                    logger.info(f"Switched to existing patient from registry: {patient_id}")
+                    logger.info("Switched to existing patient from registry: %s", patient_id)
                     # CRITICAL: Update registry to mark this patient as currently active
                     await self._update_registry_storage(chat_ctx)
 
                     return "SWITCH_EXISTING"
             except Exception as e:
-                logger.warning(f"Failed to check registry for {patient_id}: {e}")
+                logger.warning("Failed to check registry for %s: %s", patient_id, e)
 
         # Switch to existing in memory - PRESERVE CHAT HISTORY
         if patient_id in chat_ctx.patient_contexts:
             chat_ctx.patient_id = patient_id
-            logger.info(f"Switched to existing patient (preserving chat history): {patient_id}")
+            logger.info("Switched to existing patient (preserving chat history): %s", patient_id)
             # Update registry when switching to existing patient
             await self._update_registry_storage(chat_ctx)
             return "SWITCH_EXISTING"
@@ -396,7 +402,7 @@ async def _activate_patient_with_registry(self, patient_id: str, chat_ctx: ChatC
         # New blank patient context - PRESERVE CHAT HISTORY
         chat_ctx.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
         chat_ctx.patient_id = patient_id
-        logger.info(f"Created new patient context (preserving chat history): {patient_id}")
+        logger.info("Created new patient context (preserving chat history): %s", patient_id)
 
         # CRITICAL: Update registry storage for new patient
         await self._update_registry_storage(chat_ctx)
@@ -410,7 +416,7 @@ async def _update_registry_storage(self, chat_ctx: ChatContext):
 
         current_patient = chat_ctx.patient_contexts.get(chat_ctx.patient_id)
         if not current_patient:
-            logger.warning(f"No patient context found for {chat_ctx.patient_id}")
+            logger.warning("No patient context found for %s", chat_ctx.patient_id)
             return
 
         # Simple registry entry
@@ -427,9 +433,9 @@ async def _update_registry_storage(self, chat_ctx: ChatContext):
                 registry_entry,
                 chat_ctx.patient_id  # Set as active patient
             )
-            logger.info(f"Updated registry storage for {chat_ctx.patient_id}")
+            logger.info("Updated registry storage for %s", chat_ctx.patient_id)
         except Exception as e:
-            logger.warning(f"Failed to update registry storage: {e}")
+            logger.warning("Failed to update registry storage: %s", e)
 
     def _remove_system_message(self, chat_ctx: ChatContext):
         """Remove patient context system messages."""
@@ -458,13 +464,13 @@ def _remove_system_message(self, chat_ctx: ChatContext):
             messages_to_keep.append(m)
 
         if removed_count > 0:
-            logger.debug(f"Removed {removed_count} system messages for {current_patient_id}")
+            logger.debug("Removed %d system messages for %s", removed_count, current_patient_id)
 
         chat_ctx.chat_history.messages = messages_to_keep
 
     async def _try_restore_specific_patient(self, patient_id: str, chat_ctx: ChatContext) -> bool:
         """Try to restore specific patient from storage."""
-        # Try registry storage first
+        # Try registry storage first (single source of truth)
         if self.registry_accessor:
             try:
                 patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
@@ -474,24 +480,24 @@ async def _try_restore_specific_patient(self, patient_id: str, chat_ctx: ChatCon
                         patient_id=patient_id,
                         facts=registry_entry.get("facts", {})
                     )
-                    logger.info(f"Restored {patient_id} from registry storage")
+                    logger.info("Restored %s from registry storage", patient_id)
                     return True
             except Exception as e:
-                logger.warning(f"Failed to restore {patient_id} from registry: {e}")
+                logger.warning("Failed to restore %s from registry: %s", patient_id, e)
 
-        # Try patient-specific context file
+        # Legacy fallback: Try patient-specific context file (deprecated)
         if self.context_accessor:
             try:
                 stored_ctx = await self.context_accessor.read(chat_ctx.conversation_id, patient_id)
-                if stored_ctx and patient_id in stored_ctx.patient_contexts:
+                if stored_ctx and hasattr(stored_ctx, 'patient_contexts') and patient_id in stored_ctx.patient_contexts:
                     stored_context = stored_ctx.patient_contexts[patient_id]
                     chat_ctx.patient_contexts[patient_id] = PatientContext(
                         patient_id=patient_id,
                         facts=getattr(stored_context, 'facts', {})
                     )
-                    logger.info(f"Restored {patient_id} from patient-specific context")
+                    logger.info("Restored %s from patient-specific context (legacy)", patient_id)
                     return True
             except Exception as e:
-                logger.warning(f"Failed to restore {patient_id} from context: {e}")
+                logger.warning("Failed to restore %s from context: %s", patient_id, e)
 
         return False

From 7fa10c66defa575804f73ee337bc2fd8522e51a7 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Mon, 22 Sep 2025 20:38:16 +0000
Subject: [PATCH 16/20] updated documentation for patient context for review

---
 docs/patient_context.md | 883 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 883 insertions(+)
 create mode 100644 docs/patient_context.md

diff --git a/docs/patient_context.md b/docs/patient_context.md
new file mode 100644
index 0000000..a07e81d
--- /dev/null
+++ b/docs/patient_context.md
@@ -0,0 +1,883 @@
+# Healthcare Agent Orchestrator: Patient Context Management System
+
+## Table of Contents
+1. [System Overview](#system-overview)
+2. [Architecture Principles](#architecture-principles)
+3. [Core Data Models](#core-data-models)
+4. [Storage Layer](#storage-layer)
+5. [Service Layer](#service-layer)
+6. [AI Analysis Engine](#ai-analysis-engine)
+7. [API Integration](#api-integration)
+8. [Agent Integration](#agent-integration)
+9. [Data Flow](#data-flow)
+10. [File Structure](#file-structure)
+11. [Configuration](#configuration)
+12. [Troubleshooting](#troubleshooting)
+
+---
+
+## System Overview
+
+The Healthcare Agent Orchestrator implements a sophisticated **patient context management system** that enables patient-specific conversation isolation within a multi-agent AI healthcare platform. This system ensures that conversations about different patients remain completely separate while maintaining the ability to switch between patients seamlessly.
+
+### Key Features
+- **Patient-Specific Isolation**: Each patient has their own isolated conversation history
+- **Intelligent Context Switching**: AI-powered analysis determines when to switch patients or create new patient contexts
+- **Registry-Based Metadata Management**: Centralized patient registry as single source of truth
+- **Comprehensive Archival**: Full archival capabilities
+
+
+---
+
+## Architecture Principles
+
+### 1. Registry as Single Source of Truth
+- **Patient Registry**: Centralized JSON file containing all patient metadata
+- **No Duplication**: Patient metadata stored only in registry, not in individual chat files
+- **Authoritative**: All patient information queries go through registry
+
+### 2. Patient-Isolated Chat History
+- **Separate Files**: Each patient has their own chat history file
+- **Complete Isolation**: No cross-patient information leakage
+- **History Loading**: Only relevant patient history loaded per conversation
+
+### 3. Intelligent Context Switching
+- **AI Analysis**: Azure OpenAI analyzes user input to determine context switches
+- **Structured Output**: Reliable JSON schema-based decisions
+- **Pattern Recognition**: Identifies patient IDs, clear commands, and context switches
+
+### 4. Kernel State Isolation
+- **AI State Reset**: Semantic Kernel reset between patients to prevent contamination
+- **Clean Context**: Each patient interaction starts with clean AI state
+- **Memory Isolation**: No AI memory bleeding between patients
+
+### 5. Comprehensive Audit Trail
+- **Timestamped Archives**: Complete archival with timestamps
+- **Compliance Ready**: Full audit trail for healthcare compliance
+- **Recovery Capability**: Ability to restore any previous state
+
+---
+
+## Core Data Models
+
+### ChatContext (`/src/data_models/chat_context.py`)
+
+The central in-memory container for conversation state.
+
+```python
+class ChatContext:
+    def __init__(self, conversation_id: str):
+        self.conversation_id = conversation_id
+        self.chat_history = ChatHistory()          # Semantic Kernel chat history
+        
+        # Patient context fields
+        self.patient_id = None                     # Currently active patient
+        self.patient_contexts: Dict[str, PatientContext] = {}  # In-memory patient cache
+        self.workflow_summary: Optional[str] = None
+        
+        # Legacy fields (preserved for compatibility)
+        self.patient_data = []
+        self.display_blob_urls = []
+        self.display_clinical_trials = []
+        self.output_data = []
+        self.healthcare_agents = {}
+```
+
+**Key Responsibilities**:
+- Maintains current conversation state
+- Tracks active patient (`patient_id`)
+- Caches patient metadata from registry (`patient_contexts`)
+- Contains conversation history (session or patient-specific)
+
+### PatientContext (`/src/data_models/chat_context.py`)
+
+Minimal per-patient metadata container.
+
+```python
+@dataclass
+class PatientContext:
+    patient_id: str
+    facts: Dict[str, Any] = field(default_factory=dict)
+```
+
+**Purpose**:
+- Stores patient-specific metadata
+- Lightweight container for patient facts
+- Used both in-memory and persisted to registry
+
+### PatientContextSystemMessage (`/src/data_models/patient_context_models.py`)
+
+Structured system message for agent context.
+
+```python
+class PatientContextSystemMessage(KernelBaseModel):
+    conversation_id: str
+    patient_id: str
+    all_patient_ids: List[str]
+    timing_sec: TimingInfo
+```
+
+**Purpose**:
+- Injects patient context into agent conversations
+- Provides structured patient information to AI agents
+- Includes performance timing data
+
+---
+
+## Storage Layer
+
+### File Structure in Blob Storage
+
+```
+{conversation_id}/
+├── session_context.json                    # General conversation history
+├── patient_{patient_id}_context.json       # Patient-specific conversation history  
+├── patient_context_registry.json           # Patient metadata registry (SOURCE OF TRUTH)
+└── archive/                                # Timestamped archives
+    └── {timestamp}/
+        ├── {conversation_id}/
+        │   ├── {timestamp}_session_archived.json
+        │   └── {timestamp}_patient_{id}_archived.json
+        └── {timestamp}_patient_context_registry_archived.json
+```
+
+### PatientContextRegistryAccessor (`/src/data_models/patient_context_accessor.py`)
+
+**Single Source of Truth for Patient Metadata**
+
+#### Registry File Format
+```json
+{
+  "conversation_id": "uuid-here",
+  "active_patient_id": "patient_15",
+  "patient_registry": {
+    "patient_4": {
+      "patient_id": "patient_4",
+      "facts": {},
+      "conversation_id": "uuid-here",
+      "last_updated": "2025-09-20T18:28:18.995167+00:00"
+    },
+    "patient_15": {
+      "patient_id": "patient_15", 
+      "facts": {},
+      "conversation_id": "uuid-here",
+      "last_updated": "2025-09-20T18:31:26.859910+00:00"
+    }
+  },
+  "last_updated": "2025-09-20T18:31:26.859935"
+}
+```
+
+#### Key Methods
+
+##### `read_registry(conversation_id) -> (patient_registry, active_patient_id)`
+```python
+async def read_registry(self, conversation_id: str) -> Tuple[Dict[str, Dict], Optional[str]]:
+```
+- **Purpose**: Loads patient registry from blob storage
+- **Returns**: Tuple of (patient_registry_dict, active_patient_id)
+- **Error Handling**: Returns empty registry if file not found
+
+##### `write_registry(conversation_id, patient_registry, active_patient_id)`
+```python
+async def write_registry(self, conversation_id: str, patient_registry: Dict[str, Dict], active_patient_id: str = None):
+```
+- **Purpose**: Persists complete patient registry
+- **Operation**: Overwrites entire registry file
+- **Includes**: Conversation metadata, timestamps, patient data
+
+##### `update_patient_registry(conversation_id, patient_id, registry_entry, active_patient_id)`
+```python
+async def update_patient_registry(self, conversation_id: str, patient_id: str, registry_entry: Dict, active_patient_id: str = None):
+```
+- **Purpose**: Updates specific patient entry
+- **Flow**: Read → Update → Write
+- **Atomic**: Ensures consistency during updates
+
+##### `archive_registry(conversation_id)`
+```python
+async def archive_registry(self, conversation_id: str) -> None:
+```
+- **Purpose**: Archives registry before clearing
+- **Creates**: Timestamped archive file
+- **Cleanup**: Deletes original registry
+
+### ChatContextAccessor (`/src/data_models/chat_context_accessor.py`)
+
+**Manages Chat History Files**
+
+#### File Path Strategy
+```python
+def get_blob_path(self, conversation_id: str, patient_id: str = None) -> str:
+    if patient_id:
+        return f"{conversation_id}/patient_{patient_id}_context.json"
+    return f"{conversation_id}/session_context.json"
+```
+
+#### Key Methods
+
+##### `read(conversation_id, patient_id=None) -> ChatContext`
+```python
+async def read(self, conversation_id: str, patient_id: str = None) -> ChatContext:
+```
+- **Purpose**: Loads chat context from appropriate file
+- **Logic**: Patient-specific if patient_id provided, session otherwise
+- **Registry Independence**: Does NOT restore patient metadata from files
+
+##### `write(chat_ctx: ChatContext)`
+```python
+async def write(self, chat_ctx: ChatContext) -> None:
+```
+- **Purpose**: Saves chat context to appropriate file
+- **File Selection**: Based on `chat_ctx.patient_id`
+- **Content**: Chat history + metadata (NO patient_contexts)
+
+##### `serialize(chat_ctx: ChatContext) -> str`
+```python
+@staticmethod
+def serialize(chat_ctx: ChatContext) -> str:
+```
+- **Purpose**: Converts ChatContext to JSON
+- **Key Change**: **REMOVED patient_contexts serialization**
+- **Content**: 
+  ```json
+  {
+    "schema_version": 2,
+    "conversation_id": "uuid",
+    "patient_id": "patient_15",
+    "workflow_summary": null,
+    "chat_history": [...],
+    "patient_data": [],
+    "display_blob_urls": [],
+    "output_data": []
+  }
+  ```
+
+##### `deserialize(data_str: str) -> ChatContext`
+```python
+@staticmethod
+def deserialize(data_str: str) -> ChatContext:
+```
+- **Purpose**: Loads ChatContext from JSON
+- **Legacy Support**: Handles old files with patient_contexts
+- **Migration**: Logs legacy format detection
+
+##### `archive_to_folder(conversation_id, patient_id, archive_folder)`
+```python
+async def archive_to_folder(self, conversation_id: str, patient_id: str, archive_folder: str) -> None:
+```
+- **Purpose**: Archives context to specific folder
+- **Used By**: Clear operations for compliance
+- **Structure**: `{archive_folder}/{conversation_id}/{timestamp}_{type}_archived.json`
+
+---
+
+## Service Layer
+
+### PatientContextService (`/src/services/patient_context_service.py`)
+
+**Central Orchestrator for Patient Context Management**
+
+#### Initialization
+```python
+def __init__(self, analyzer: PatientContextAnalyzer, registry_accessor=None, context_accessor=None):
+    self.analyzer = analyzer                    # AI decision engine
+    self.registry_accessor = registry_accessor  # Patient metadata storage
+    self.context_accessor = context_accessor    # Chat history storage
+```
+
+#### Core Methods
+
+##### `_ensure_patient_contexts_from_registry(chat_ctx: ChatContext)`
+```python
+async def _ensure_patient_contexts_from_registry(self, chat_ctx: ChatContext):
+```
+**Purpose**: Synchronizes in-memory patient contexts with registry
+
+**Critical Operation**:
+1. Reads patient registry from blob storage
+2. **Clears all in-memory patient contexts**
+3. Rebuilds `patient_contexts` from registry data
+4. Ensures registry is authoritative source
+
+**When Called**: Beginning of all major operations
+
+##### `decide_and_apply(user_text: str, chat_ctx: ChatContext) -> (Decision, TimingInfo)`
+```python
+async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple[Decision, TimingInfo]:
+```
+**Purpose**: Main entry point for patient context processing
+
+**Complete Flow**:
+1. **Registry Sync**: `await self._ensure_patient_contexts_from_registry(chat_ctx)`
+2. **Short Message Bypass**: Skip AI analysis for agent handoffs (≤15 chars)
+3. **AI Analysis**: Use PatientContextAnalyzer for structured decision
+4. **Decision Routing**: Route to appropriate action method
+5. **System Message**: Inject patient context for agents
+6. **Timing**: Return detailed performance metrics
+
+**Possible Decisions**:
+- `NONE`: No patient context needed
+- `UNCHANGED`: Continue with current patient  
+- `NEW_BLANK`: Created new patient context
+- `SWITCH_EXISTING`: Switched to existing patient
+- `CLEAR`: Cleared all patient contexts
+- `RESTORED_FROM_STORAGE`: Restored from storage
+- `NEEDS_PATIENT_ID`: Invalid/missing patient ID
+
+##### `set_explicit_patient_context(patient_id: str, chat_ctx: ChatContext) -> bool`
+```python
+async def set_explicit_patient_context(self, patient_id: str, chat_ctx: ChatContext) -> bool:
+```
+**Purpose**: Directly sets specific patient context (bypass AI analysis)
+
+**Flow**:
+1. **Registry Sync**: Ensure latest registry data
+2. **Validation**: Check patient ID format (`patient_[0-9]+`)
+3. **Kernel Reset**: Reset AI kernel when switching patients
+4. **Restoration**: Try to restore patient from storage
+5. **Creation**: Create new patient if not found
+6. **System Message**: Inject patient context
+7. **Registry Update**: Persist to registry
+
+##### `_ensure_system_message(chat_ctx: ChatContext, timing: TimingInfo)`
+```python
+async def _ensure_system_message(self, chat_ctx: ChatContext, timing: TimingInfo):
+```
+**Purpose**: Injects structured patient context into chat history
+
+**Operation**:
+1. **Cleanup**: Remove existing patient context system messages
+2. **Registry Query**: Get patient list from registry (authoritative)
+3. **Structured Payload**: Create `PatientContextSystemMessage`
+4. **Message Injection**: Insert at position 0 in chat history
+
+**System Message Format**:
+```
+PATIENT_CONTEXT_JSON: {"conversation_id":"uuid","patient_id":"patient_15","all_patient_ids":["patient_4","patient_15"],"timing_sec":{...}}
+```
+
+##### `_try_restore_from_storage(chat_ctx: ChatContext) -> bool`
+```python
+async def _try_restore_from_storage(self, chat_ctx: ChatContext) -> bool:
+```
+**Purpose**: Fallback restoration from storage files
+
+**Priority Order**:
+1. **Patient Registry** (Primary): Authoritative patient metadata
+2. **Session Context** (Fallback): Legacy conversation restoration
+
+**Registry Restoration**:
+1. Load patient registry
+2. Restore ALL patient metadata to memory
+3. Set active patient from registry
+4. Load patient-specific isolated chat history
+5. Replace current history with patient history
+
+##### `_archive_all_and_recreate(chat_ctx: ChatContext)`
+```python
+async def _archive_all_and_recreate(self, chat_ctx: ChatContext) -> None:
+```
+**Purpose**: Complete archival and clearing
+
+**Comprehensive Flow**:
+1. **Kernel Reset**: Clear AI state
+2. **Patient Discovery**: Get ALL patients from registry
+3. **Archive Folder**: Create timestamped folder
+4. **Session Archive**: Archive main conversation
+5. **Patient Archives**: Archive each patient's context
+6. **Registry Archive**: Archive patient registry
+7. **Memory Clear**: Clear all in-memory state
+
+
+**Archive Structure**:
+```
+archive/2025-09-20T18-31-26-123456/
+  {conversation_id}/
+    20250920T183126_session_archived.json
+    20250920T183126_patient_patient_4_archived.json
+    20250920T183126_patient_patient_15_archived.json
+  20250920T183126_patient_context_registry_archived.json
+```
+
+##### `_activate_patient_with_registry(patient_id: str, chat_ctx: ChatContext) -> Decision`
+```python
+async def _activate_patient_with_registry(self, patient_id: str, chat_ctx: ChatContext) -> Decision:
+```
+**Purpose**: Activates specific patient with full registry integration
+
+**Decision Logic**:
+1. **Validation**: Check patient ID
+2. **Same Patient**: Return `UNCHANGED` if already active
+3. **Kernel Reset**: Reset AI when switching
+4. **Registry Check**: Look for patient in registry
+5. **History Loading**: Load patient-specific chat history
+6. **Registry Update**: Mark as active patient
+
+**Return Values**:
+- `SWITCH_EXISTING`: Patient found in registry/memory
+- `NEW_BLANK`: New patient created
+- `UNCHANGED`: Same patient already active
+
+##### `_update_registry_storage(chat_ctx: ChatContext)`
+```python
+async def _update_registry_storage(self, chat_ctx: ChatContext):
+```
+**Purpose**: Persists current patient state to registry
+
+**Registry Entry**:
+```json
+{
+  "patient_id": "patient_15",
+  "facts": {},
+  "conversation_id": "uuid"
+}
+```
+
+**Operation**: Calls registry accessor to update/add patient and set as active
+
+##### `_remove_system_message(chat_ctx: ChatContext)`
+```python
+def _remove_system_message(self, chat_ctx: ChatContext):
+```
+**Purpose**: Removes old patient context system messages
+
+**Logic**:
+1. Iterate through chat history
+2. Identify system messages with `PATIENT_CONTEXT_JSON` prefix
+3. Parse JSON to check patient_id
+4. Remove messages for current patient
+5. Preserve malformed messages (safe default)
+
+##### `_try_restore_specific_patient(patient_id: str, chat_ctx: ChatContext) -> bool`
+```python
+async def _try_restore_specific_patient(self, patient_id: str, chat_ctx: ChatContext) -> bool:
+```
+**Purpose**: Restores specific patient metadata
+
+**Priority**:
+1. **Registry Storage** (Primary): Authoritative source
+2. **Legacy Context File** (Fallback): Migration support
+
+---
+
+## AI Analysis Engine
+
+### PatientContextAnalyzer (`/src/services/patient_context_analyzer.py`)
+
+**Intelligent Decision Engine for Patient Context**
+
+#### Core Capabilities
+- **Natural Language Understanding**: Analyzes user input for patient context
+- **Structured Output**: Uses Semantic Kernel's JSON schema generation
+- **Pattern Recognition**: Identifies patient IDs, clear commands, switches
+- **State Isolation**: Kernel reset prevents AI contamination
+
+#### System Prompt Strategy
+```python
+system_prompt = f"""You are a patient context analyzer for healthcare conversations.
+
+TASK: Analyze user input and decide the appropriate patient context action.
+
+AVAILABLE ACTIONS:
+- NONE: No patient context needed (general questions, greetings, system commands)
+- CLEAR: User wants to clear/reset all patient context
+- ACTIVATE_NEW: User mentions a new patient ID not in the known patient list
+- SWITCH_EXISTING: User wants to switch to a different known patient
+- UNCHANGED: Continue with current patient context
+
+CURRENT STATE:
+- Active patient ID: {prior_patient_id or "None"}
+- Known patient IDs: {known_patient_ids}
+
+ANALYSIS RULES:
+1. Extract patient_id ONLY if action is ACTIVATE_NEW or SWITCH_EXISTING
+2. Patient IDs typically follow "patient_X" format or are explicit medical record numbers
+3. For CLEAR/NONE/UNCHANGED actions, set patient_id to null
+4. Prioritize explicit patient mentions over implicit context
+5. Keep reasoning brief and specific (max 50 words)
+"""
+```
+
+#### Structured Output Model
+```python
+class PatientContextDecision(KernelBaseModel):
+    action: AnalyzerAction
+    patient_id: Optional[str]
+    reasoning: str
+```
+
+#### Example Decisions
+
+| User Input | Action | Patient ID | Reasoning |
+|------------|--------|------------|-----------|
+| `"Hello"` | `NONE` | `null` | General greeting, no patient context needed |
+| `"clear"` | `CLEAR` | `null` | Explicit clear command detected |
+| `"switch to patient_4"` | `SWITCH_EXISTING` or `ACTIVATE_NEW` | `"patient_4"` | Explicit patient switch request |
+| `"tumor board for patient_15"` | `ACTIVATE_NEW` or `SWITCH_EXISTING` | `"patient_15"` | Healthcare task with patient mention |
+| `"back to you Orchestrator"` | `UNCHANGED` | `null` | Agent handoff, maintain context |
+
+#### Key Methods
+
+##### `analyze_decision(user_text, prior_patient_id, known_patient_ids) -> PatientContextDecision`
+```python
+async def analyze_decision(
+    self,
+    user_text: str,
+    prior_patient_id: Optional[str] = None,
+    known_patient_ids: Optional[list[str]] = None,
+) -> PatientContextDecision:
+```
+
+**Azure OpenAI Integration**:
+```python
+execution_settings = AzureChatPromptExecutionSettings(
+    service_id="patient_context_analyzer",
+    max_tokens=200,
+    temperature=0.1,
+    response_format=PatientContextDecision,  # Automatic JSON schema
+)
+```
+
+##### `reset_kernel()`
+```python
+def reset_kernel(self):
+```
+**Purpose**: Prevents AI state contamination between patients
+- Recreates entire Kernel instance
+- Reinitializes AzureChatCompletion service
+- Ensures clean AI state for each patient
+
+---
+
+## API Integration
+
+### WebSocket Chat Route (`/src/routes/api/chats.py`)
+
+**Main User Interface for Patient Context System**
+
+#### Complete Flow Analysis
+
+##### `websocket_chat_endpoint(websocket: WebSocket, chat_id: str)`
+
+**Step-by-Step Patient Context Integration**:
+
+1. **Session Context Loading**:
+```python
+chat_context = await data_access.chat_context_accessor.read(chat_id, None)
+```
+- Always starts with session context
+- Establishes base conversation state
+
+2. **Clear Command Processing**:
+```python
+if await _handle_clear_command(content, chat_context):
+```
+- **Priority**: Processed BEFORE patient context analysis
+- **Comprehensive**: Archives all contexts and registry
+- **Response**: Immediate clear confirmation
+
+3. **Patient Context Decision**:
+```python
+decision, timing = await patient_context_service.decide_and_apply(content, chat_context)
+```
+- **AI-Powered**: Uses PatientContextAnalyzer
+- **Registry Integration**: Syncs with patient registry
+- **Performance**: Captures timing metrics
+
+4. **Error Handling**:
+```python
+if decision == "NEEDS_PATIENT_ID":
+    # Send helpful error message
+```
+- **User Guidance**: Explains patient ID format
+- **Format Example**: `'patient_X' (e.g., 'patient_4')`
+
+5. **Patient History Isolation**:
+```python
+if chat_context.patient_id:
+    isolated_ctx = await data_access.chat_context_accessor.read(chat_id, chat_context.patient_id)
+    chat_context.chat_history = isolated_ctx.chat_history
+```
+- **Critical Isolation**: Loads ONLY active patient's history
+- **Complete Replacement**: Overwrites session history
+- **Fresh Start**: Empty history for new patients
+
+6. **Agent Orchestration**:
+```python
+chat, chat_context = group_chat.create_group_chat(app_context, chat_context)
+```
+- **Multi-Agent**: Creates healthcare agent conversation
+- **Context Injection**: System message contains patient context
+
+7. **Response Enhancement**:
+```python
+response_content_with_pc = _append_pc_ctx_display(response.content, chat_context)
+```
+- **UI Enhancement**: Adds patient context display
+- **User Visibility**: Shows active patient and session patients
+
+8. **Context Persistence**:
+```python
+await data_access.chat_context_accessor.write(chat_context)
+```
+- **Automatic**: Saves to appropriate file (patient/session)
+- **Registry Sync**: Patient metadata already updated
+
+#### Clear Command Handler
+
+##### `_handle_clear_command(content: str, chat_context) -> bool`
+
+**Comprehensive Clear Operation**:
+
+1. **Command Recognition**: 
+   - `"clear"`
+   - `"clear patient"`
+   - `"clear context"`
+   - `"clear patient context"`
+
+2. **Archival Process**:
+```python
+# Create timestamped archive folder
+timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S-%f")
+archive_folder = f"archive/{timestamp}"
+
+# Archive session context
+await data_access.chat_context_accessor.archive_to_folder(...)
+
+# Archive ALL patient contexts from registry
+patient_registry, _ = await patient_context_service.registry_accessor.read_registry(...)
+for patient_id in patient_registry.keys():
+    await data_access.chat_context_accessor.archive_to_folder(...)
+
+# Archive patient registry
+await patient_context_service.registry_accessor.archive_registry(...)
+```
+
+3. **Memory Clear**:
+```python
+chat_context.patient_context = None
+chat_context.patient_contexts.clear()
+chat_context.chat_history.messages.clear()
+chat_context.patient_id = None
+```
+
+
+#### Patient Context Display
+
+##### `_append_pc_ctx_display(base: str, chat_context) -> str`
+
+**UI Enhancement for Patient Visibility**:
+
+1. **System Message Extraction**: Gets patient context JSON
+2. **JSON Parsing**: Parses structured payload
+3. **Markdown Formatting**: Creates user-friendly display
+
+**Display Format**:
+```markdown
+---
+*PT_CTX:*
+- **Patient ID:** `patient_15`
+- **Conversation ID:** `uuid`
+- **Session Patients:** `patient_4`, `patient_15 (active)`
+```
+
+---
+
+## Agent Integration
+
+### How Healthcare Agents See Patient Context
+
+#### System Message Injection
+Every agent conversation includes structured patient context:
+
+```json
+{
+  "role": "system",
+  "content": "PATIENT_CONTEXT_JSON: {\"conversation_id\":\"uuid\",\"patient_id\":\"patient_15\",\"all_patient_ids\":[\"patient_4\",\"patient_15\"],\"timing_sec\":{...}}"
+}
+```
+
+#### Agent Awareness
+- **Active Patient**: Agents know current patient
+- **Session Patients**: Agents see all patients in session  
+- **Conversation Scope**: Agents understand context boundaries
+- **Performance Data**: Timing metrics available
+
+#### Isolation Benefits
+- **Clean History**: Only relevant patient conversation
+- **Context Switching**: Clear boundaries between patients
+- **Privacy Protection**: No cross-patient information leakage
+- **Compliance**: Full audit trail per patient
+
+#### Group Chat Integration (`/src/group_chat.py`)
+
+**Patient Context in Multi-Agent Conversations**:
+
+1. **Context Injection**: 
+```python
+inject_workflow_summary(chat_ctx)
+```
+
+2. **Agent Creation**: Each agent gets fresh kernel with patient context
+
+3. **Selection Strategy**: AI-powered agent selection with patient awareness
+
+4. **Termination Strategy**: Context-aware conversation termination
+
+---
+
+## Data Flow
+
+### Complete Patient Context Lifecycle
+
+```mermaid
+graph TD
+    A[User Message] --> B[PatientContextAnalyzer]
+    B --> C[Structured Decision]
+    C --> D[PatientContextService]
+    D --> E[Registry Update]
+    D --> F[History Loading]
+    E --> G[Agent Orchestration]
+    F --> G
+    G --> H[Response Generation]
+    H --> I[Context Persistence]
+    I --> J[User Response]
+```
+
+### Detailed Flow Steps
+
+1. **User Input**: Message received via WebSocket
+2. **AI Analysis**: PatientContextAnalyzer determines action
+3. **Service Processing**: PatientContextService executes decision
+4. **Registry Sync**: Patient metadata updated in registry
+5. **History Loading**: Appropriate chat history loaded
+6. **Agent Orchestration**: Multi-agent conversation with context
+7. **Response Generation**: AI agents generate responses
+8. **Context Persistence**: State saved to appropriate files
+9. **User Display**: Enhanced response with patient context
+
+### Storage Operations
+
+#### Read Operations
+```python
+# Registry (authoritative)
+patient_registry, active_id = await registry_accessor.read_registry(conversation_id)
+
+# Chat History (patient-specific or session)
+chat_ctx = await context_accessor.read(conversation_id, patient_id)
+```
+
+#### Write Operations
+```python
+# Registry Update
+await registry_accessor.update_patient_registry(conversation_id, patient_id, entry, active_id)
+
+# Chat History Save
+await context_accessor.write(chat_ctx)
+```
+
+#### Archive Operations
+```python
+# Complete Archival
+await context_accessor.archive_to_folder(conversation_id, patient_id, archive_folder)
+await registry_accessor.archive_registry(conversation_id)
+```
+
+---
+
+## File Structure
+
+### Project Organization
+
+```
+src/
+├── services/
+│   ├── patient_context_service.py      # Main orchestrator
+│   └── patient_context_analyzer.py     # AI decision engine
+├── data_models/
+│   ├── chat_context.py                 # Core data models
+│   ├── chat_context_accessor.py        # Chat history storage
+│   ├── patient_context_accessor.py     # Registry storage
+│   └── patient_context_models.py       # Structured output models
+├── routes/api/
+│   └── chats.py                        # WebSocket API integration
+└── group_chat.py                       # Multi-agent orchestration
+```
+
+### Configuration Files
+
+#### Environment Variables
+```env
+AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4o
+AZURE_OPENAI_API_VERSION=2025-04-01-preview
+PATIENT_CONTEXT_DECIDER_DEPLOYMENT_NAME=gpt-4o  # Optional override
+```
+
+#### Agent Configuration
+```json
+{
+  "agents": [
+    {
+      "name": "Orchestrator",
+      "facilitator": true,
+      "instructions": "You are the healthcare orchestrator...",
+      "tools": [...]
+    }
+  ]
+}
+```
+
+---
+
+## Configuration
+
+### Service Initialization
+
+```python
+# In routes/api/chats.py
+analyzer = PatientContextAnalyzer(token_provider=app_context.cognitive_services_token_provider)
+patient_context_service = PatientContextService(
+    analyzer=analyzer,
+    registry_accessor=app_context.data_access.patient_context_registry_accessor,
+    context_accessor=app_context.data_access.chat_context_accessor
+)
+```
+
+### Azure OpenAI Setup
+
+```python
+# In PatientContextAnalyzer
+self._kernel.add_service(
+    AzureChatCompletion(
+        service_id="patient_context_analyzer",
+        deployment_name=self.deployment_name,
+        api_version=self.api_version,
+        ad_token_provider=token_provider,
+    )
+)
+```
+
+### Storage Configuration
+
+```python
+# In DataAccess
+patient_context_registry_accessor = PatientContextRegistryAccessor(
+    blob_service_client=blob_service_client,
+    container_name="chat-sessions"
+)
+
+chat_context_accessor = ChatContextAccessor(
+    blob_service_client=blob_service_client,
+    container_name="chat-sessions",
+    cognitive_services_token_provider=token_provider
+)
+```
+
+---
+
+## Conclusion
+
+The Healthcare Agent Orchestrator's patient context management system provides a robust, compliant, and scalable solution for managing patient-specific AI conversations. By implementing registry-based metadata management, intelligent context switching, and comprehensive archival capabilities, the system ensures both user experience and regulatory compliance in healthcare AI applications.
+
+The architecture's separation of concerns, structured decision making, and comprehensive error handling make it suitable for production healthcare environments while maintaining the flexibility needed for complex multi-agent AI interactions.
\ No newline at end of file

From 7df30362917f9fa0dd89559506ad663eae232654 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Mon, 29 Sep 2025 01:36:49 +0000
Subject: [PATCH 17/20] updated fixes made patient context ephemeral

---
 docs/patient_context.md                  | 988 ++++++-----------------
 src/bots/assistant_bot.py                | 367 +++------
 src/data_models/chat_context.py          |   5 +-
 src/data_models/chat_context_accessor.py | 172 ++--
 src/group_chat.py                        | 421 ++++------
 src/routes/api/chats.py                  | 279 +++----
 src/scenarios/default/config/agents.yaml | 129 +--
 src/services/patient_context_analyzer.py |   3 +
 src/services/patient_context_service.py  | 461 ++---------
 9 files changed, 787 insertions(+), 2038 deletions(-)

diff --git a/docs/patient_context.md b/docs/patient_context.md
index a07e81d..4a2c866 100644
--- a/docs/patient_context.md
+++ b/docs/patient_context.md
@@ -1,883 +1,359 @@
-# Healthcare Agent Orchestrator: Patient Context Management System
-
-## Table of Contents
-1. [System Overview](#system-overview)
-2. [Architecture Principles](#architecture-principles)
-3. [Core Data Models](#core-data-models)
-4. [Storage Layer](#storage-layer)
-5. [Service Layer](#service-layer)
-6. [AI Analysis Engine](#ai-analysis-engine)
-7. [API Integration](#api-integration)
-8. [Agent Integration](#agent-integration)
-9. [Data Flow](#data-flow)
-10. [File Structure](#file-structure)
-11. [Configuration](#configuration)
-12. [Troubleshooting](#troubleshooting)
-
----
-
-## System Overview
-
-The Healthcare Agent Orchestrator implements a sophisticated **patient context management system** that enables patient-specific conversation isolation within a multi-agent AI healthcare platform. This system ensures that conversations about different patients remain completely separate while maintaining the ability to switch between patients seamlessly.
-
-### Key Features
-- **Patient-Specific Isolation**: Each patient has their own isolated conversation history
-- **Intelligent Context Switching**: AI-powered analysis determines when to switch patients or create new patient contexts
-- **Registry-Based Metadata Management**: Centralized patient registry as single source of truth
-- **Comprehensive Archival**: Full archival capabilities
+# Patient Context Management (Current Architecture)
 
+This document describes the current (ephemeral, registry‑based) patient context model. It replaces any legacy behavior that persisted system snapshot messages or embedded timing metadata in `PATIENT_CONTEXT_JSON`.
 
 ---
 
-## Architecture Principles
-
-### 1. Registry as Single Source of Truth
-- **Patient Registry**: Centralized JSON file containing all patient metadata
-- **No Duplication**: Patient metadata stored only in registry, not in individual chat files
-- **Authoritative**: All patient information queries go through registry
-
-### 2. Patient-Isolated Chat History
-- **Separate Files**: Each patient has their own chat history file
-- **Complete Isolation**: No cross-patient information leakage
-- **History Loading**: Only relevant patient history loaded per conversation
+## ✅ Core Goals
 
-### 3. Intelligent Context Switching
-- **AI Analysis**: Azure OpenAI analyzes user input to determine context switches
-- **Structured Output**: Reliable JSON schema-based decisions
-- **Pattern Recognition**: Identifies patient IDs, clear commands, and context switches
-
-### 4. Kernel State Isolation
-- **AI State Reset**: Semantic Kernel reset between patients to prevent contamination
-- **Clean Context**: Each patient interaction starts with clean AI state
-- **Memory Isolation**: No AI memory bleeding between patients
-
-### 5. Comprehensive Audit Trail
-- **Timestamped Archives**: Complete archival with timestamps
-- **Compliance Ready**: Full audit trail for healthcare compliance
-- **Recovery Capability**: Ability to restore any previous state
+| Goal | Current Mechanism |
+|------|-------------------|
+| Patient isolation | Separate per‑patient history blobs: `patient_{id}_context.json` |
+| Multi-patient roster | Central registry: `patient_context_registry.json` (authoritative) |
+| Ephemeral grounding | Fresh `PATIENT_CONTEXT_JSON` system snapshot injected each turn (never persisted) |
+| Low-noise storage | Only user + agent dialogue retained; snapshots stripped before write |
+| Safe switching | Analyzer governs transitions; kernel reset only when changing active patient |
+| Clear operation | Archives session + all patient histories + registry, then resets in-memory state |
 
 ---
 
-## Core Data Models
-
-### ChatContext (`/src/data_models/chat_context.py`)
+## 🔄 High‑Level Turn Flow
+
+1. Load the session `ChatContext` (no patient file loaded yet).
+2. If a clear command was issued: archive everything, reset state, send “cleared” reply, stop.
+3. Call `PatientContextService.decide_and_apply()`:
+   - Hydrate `chat_ctx.patient_contexts` from the registry (source of truth).
+   - Apply any transition: activate, switch, clear, restore, or no-op.
+4. If a patient is now active, load that patient’s isolated chat history (replacing the session history in memory).
+5. Remove any prior ephemeral `PATIENT_CONTEXT_JSON` system snapshot(s) from memory.
+6. Construct and inject a fresh ephemeral snapshot system message (not persisted).
+7. Append the raw user message.
+8. Run multi-agent orchestration (Orchestrator + specialists).
+9. (Teams only) Append a single guarded `PT_CTX` audit footer (never duplicates).
+10. Persist:
+    - Write to the patient file if `chat_ctx.patient_id` is set; otherwise to the session file.
+    - The ephemeral snapshot is excluded (it was already filtered before persistence).
+11. The registry already reflects any activation / switch / new patient from step 3.
 
-The central in-memory container for conversation state.
-
-```python
-class ChatContext:
-    def __init__(self, conversation_id: str):
-        self.conversation_id = conversation_id
-        self.chat_history = ChatHistory()          # Semantic Kernel chat history
-        
-        # Patient context fields
-        self.patient_id = None                     # Currently active patient
-        self.patient_contexts: Dict[str, PatientContext] = {}  # In-memory patient cache
-        self.workflow_summary: Optional[str] = None
-        
-        # Legacy fields (preserved for compatibility)
-        self.patient_data = []
-        self.display_blob_urls = []
-        self.display_clinical_trials = []
-        self.output_data = []
-        self.healthcare_agents = {}
-```
-
-**Key Responsibilities**:
-- Maintains current conversation state
-- Tracks active patient (`patient_id`)
-- Caches patient metadata from registry (`patient_contexts`)
-- Contains conversation history (session or patient-specific)
-
-### PatientContext (`/src/data_models/chat_context.py`)
-
-Minimal per-patient metadata container.
-
-```python
-@dataclass
-class PatientContext:
-    patient_id: str
-    facts: Dict[str, Any] = field(default_factory=dict)
-```
-
-**Purpose**:
-- Stores patient-specific metadata
-- Lightweight container for patient facts
-- Used both in-memory and persisted to registry
+---
 
-### PatientContextSystemMessage (`/src/data_models/patient_context_models.py`)
+## 🧠 Decision Engine (`PatientContextAnalyzer`)
 
-Structured system message for agent context.
+Produces an action plus (optionally) a `patient_id`.
 
-```python
-class PatientContextSystemMessage(KernelBaseModel):
-    conversation_id: str
-    patient_id: str
-    all_patient_ids: List[str]
-    timing_sec: TimingInfo
-```
+| Action | Meaning |
+|--------|---------|
+| `NONE` | No patient context required (general/meta turn) |
+| `ACTIVATE_NEW` | Start a brand-new patient (ID extracted) |
+| `SWITCH_EXISTING` | Switch to an existing (registry) patient |
+| `UNCHANGED` | Keep the current active patient |
+| `CLEAR` | User intends to clear all patient context |
+| (Service-derived) `RESTORED_FROM_STORAGE` | Previous active patient resurrected (no active in-memory, registry had one) |
+| (Service-derived) `NEEDS_PATIENT_ID` | User intent implies patient focus but no resolvable ID provided |
 
-**Purpose**:
-- Injects patient context into agent conversations
-- Provides structured patient information to AI agents
-- Includes performance timing data
+Service-level post-processing can reclassify into operational decisions like `NEW_BLANK`.
 
 ---
 
-## Storage Layer
+## 🏛 Registry (Single Source of Truth)
 
-### File Structure in Blob Storage
+File: `patient_context_registry.json`
 
-```
-{conversation_id}/
-├── session_context.json                    # General conversation history
-├── patient_{patient_id}_context.json       # Patient-specific conversation history  
-├── patient_context_registry.json           # Patient metadata registry (SOURCE OF TRUTH)
-└── archive/                                # Timestamped archives
-    └── {timestamp}/
-        ├── {conversation_id}/
-        │   ├── {timestamp}_session_archived.json
-        │   └── {timestamp}_patient_{id}_archived.json
-        └── {timestamp}_patient_context_registry_archived.json
-```
-
-### PatientContextRegistryAccessor (`/src/data_models/patient_context_accessor.py`)
-
-**Single Source of Truth for Patient Metadata**
-
-#### Registry File Format
 ```json
 {
-  "conversation_id": "uuid-here",
-  "active_patient_id": "patient_15",
+  "conversation_id": "uuid",
+  "active_patient_id": "patient_16",
   "patient_registry": {
     "patient_4": {
       "patient_id": "patient_4",
       "facts": {},
-      "conversation_id": "uuid-here",
-      "last_updated": "2025-09-20T18:28:18.995167+00:00"
+      "conversation_id": "uuid",
+      "last_updated": "2025-09-28T14:55:41.221939+00:00"
     },
-    "patient_15": {
-      "patient_id": "patient_15", 
+    "patient_16": {
+      "patient_id": "patient_16",
       "facts": {},
-      "conversation_id": "uuid-here",
-      "last_updated": "2025-09-20T18:31:26.859910+00:00"
+      "conversation_id": "uuid",
+      "last_updated": "2025-09-28T15:04:10.119003+00:00"
     }
   },
-  "last_updated": "2025-09-20T18:31:26.859935"
+  "last_updated": "2025-09-28T15:04:10.119020+00:00"
 }
 ```
 
-#### Key Methods
-
-##### `read_registry(conversation_id) -> (patient_registry, active_patient_id)`
-```python
-async def read_registry(self, conversation_id: str) -> Tuple[Dict[str, Dict], Optional[str]]:
-```
-- **Purpose**: Loads patient registry from blob storage
-- **Returns**: Tuple of (patient_registry_dict, active_patient_id)
-- **Error Handling**: Returns empty registry if file not found
-
-##### `write_registry(conversation_id, patient_registry, active_patient_id)`
-```python
-async def write_registry(self, conversation_id: str, patient_registry: Dict[str, Dict], active_patient_id: str = None):
-```
-- **Purpose**: Persists complete patient registry
-- **Operation**: Overwrites entire registry file
-- **Includes**: Conversation metadata, timestamps, patient data
-
-##### `update_patient_registry(conversation_id, patient_id, registry_entry, active_patient_id)`
-```python
-async def update_patient_registry(self, conversation_id: str, patient_id: str, registry_entry: Dict, active_patient_id: str = None):
-```
-- **Purpose**: Updates specific patient entry
-- **Flow**: Read → Update → Write
-- **Atomic**: Ensures consistency during updates
-
-##### `archive_registry(conversation_id)`
-```python
-async def archive_registry(self, conversation_id: str) -> None:
-```
-- **Purpose**: Archives registry before clearing
-- **Creates**: Timestamped archive file
-- **Cleanup**: Deletes original registry
-
-### ChatContextAccessor (`/src/data_models/chat_context_accessor.py`)
-
-**Manages Chat History Files**
-
-#### File Path Strategy
-```python
-def get_blob_path(self, conversation_id: str, patient_id: str = None) -> str:
-    if patient_id:
-        return f"{conversation_id}/patient_{patient_id}_context.json"
-    return f"{conversation_id}/session_context.json"
-```
-
-#### Key Methods
+Characteristics:
+- Contains only roster + active pointer.
+- No embedded system message text.
+- `facts` is a lightweight dict (reserved for future enrichment).
 
-##### `read(conversation_id, patient_id=None) -> ChatContext`
-```python
-async def read(self, conversation_id: str, patient_id: str = None) -> ChatContext:
-```
-- **Purpose**: Loads chat context from appropriate file
-- **Logic**: Patient-specific if patient_id provided, session otherwise
-- **Registry Independence**: Does NOT restore patient metadata from files
+---
 
-##### `write(chat_ctx: ChatContext)`
-```python
-async def write(self, chat_ctx: ChatContext) -> None:
-```
-- **Purpose**: Saves chat context to appropriate file
-- **File Selection**: Based on `chat_ctx.patient_id`
-- **Content**: Chat history + metadata (NO patient_contexts)
+## 🗂 Storage Layout
 
-##### `serialize(chat_ctx: ChatContext) -> str`
-```python
-@staticmethod
-def serialize(chat_ctx: ChatContext) -> str:
 ```
-- **Purpose**: Converts ChatContext to JSON
-- **Key Change**: **REMOVED patient_contexts serialization**
-- **Content**: 
-  ```json
-  {
-    "schema_version": 2,
-    "conversation_id": "uuid",
-    "patient_id": "patient_15",
-    "workflow_summary": null,
-    "chat_history": [...],
-    "patient_data": [],
-    "display_blob_urls": [],
-    "output_data": []
-  }
-  ```
-
-##### `deserialize(data_str: str) -> ChatContext`
-```python
-@staticmethod
-def deserialize(data_str: str) -> ChatContext:
+{conversation_id}/
+├── session_context.json
+├── patient_{patient_id}_context.json
+├── patient_context_registry.json
+└── archive/
+    └── {timestamp}/
+        ├── {conversation_id}/
+        │   ├── {timestamp}_session_archived.json
+        │   ├── {timestamp}_patient_patient_4_archived.json
+        │   └── {timestamp}_patient_patient_15_archived.json
+        └── {timestamp}_patient_context_registry_archived.json
 ```
-- **Purpose**: Loads ChatContext from JSON
-- **Legacy Support**: Handles old files with patient_contexts
-- **Migration**: Logs legacy format detection
 
-##### `archive_to_folder(conversation_id, patient_id, archive_folder)`
-```python
-async def archive_to_folder(self, conversation_id: str, patient_id: str, archive_folder: str) -> None:
-```
-- **Purpose**: Archives context to specific folder
-- **Used By**: Clear operations for compliance
-- **Structure**: `{archive_folder}/{conversation_id}/{timestamp}_{type}_archived.json`
+Key behavior:
+- `PATIENT_CONTEXT_JSON` messages never persist.
+- Only dialogue + ancillary arrays (display/output) remain.
 
 ---
 
-## Service Layer
+## 💬 Ephemeral Snapshot Format
 
-### PatientContextService (`/src/services/patient_context_service.py`)
+Injected each turn at index 0 of `chat_ctx.chat_history.messages`:
 
-**Central Orchestrator for Patient Context Management**
-
-#### Initialization
-```python
-def __init__(self, analyzer: PatientContextAnalyzer, registry_accessor=None, context_accessor=None):
-    self.analyzer = analyzer                    # AI decision engine
-    self.registry_accessor = registry_accessor  # Patient metadata storage
-    self.context_accessor = context_accessor    # Chat history storage
+```text
+PATIENT_CONTEXT_JSON: {"conversation_id":"uuid","patient_id":"patient_16","all_patient_ids":["patient_4","patient_15","patient_16"],"generated_at":"2025-09-28T15:07:44.012345Z"}
 ```
 
-#### Core Methods
-
-##### `_ensure_patient_contexts_from_registry(chat_ctx: ChatContext)`
-```python
-async def _ensure_patient_contexts_from_registry(self, chat_ctx: ChatContext):
-```
-**Purpose**: Synchronizes in-memory patient contexts with registry
+Differences vs legacy:
 
-**Critical Operation**:
-1. Reads patient registry from blob storage
-2. **Clears all in-memory patient contexts**
-3. Rebuilds `patient_contexts` from registry data
-4. Ensures registry is authoritative source
+| Aspect | Legacy | Current |
+|--------|--------|---------|
+| Timing field (`timing_sec`) | Present | Removed |
+| Injection site | Inside service | Caller (route / bot) post-decision |
+| Persistence | Stored & reloaded | Rebuilt every turn (never stored) |
+| Cleanup | Service replaced old | Caller strips before reinjecting |
+| Purpose | Grounding (stale risk) | Always-fresh grounding snapshot |
 
-**When Called**: Beginning of all major operations
+Rationale for removal of timing: operational concern, not reasoning signal.
 
-##### `decide_and_apply(user_text: str, chat_ctx: ChatContext) -> (Decision, TimingInfo)`
-```python
-async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple[Decision, TimingInfo]:
-```
-**Purpose**: Main entry point for patient context processing
-
-**Complete Flow**:
-1. **Registry Sync**: `await self._ensure_patient_contexts_from_registry(chat_ctx)`
-2. **Short Message Bypass**: Skip AI analysis for agent handoffs (≤15 chars)
-3. **AI Analysis**: Use PatientContextAnalyzer for structured decision
-4. **Decision Routing**: Route to appropriate action method
-5. **System Message**: Inject patient context for agents
-6. **Timing**: Return detailed performance metrics
-
-**Possible Decisions**:
-- `NONE`: No patient context needed
-- `UNCHANGED`: Continue with current patient  
-- `NEW_BLANK`: Created new patient context
-- `SWITCH_EXISTING`: Switched to existing patient
-- `CLEAR`: Cleared all patient contexts
-- `RESTORED_FROM_STORAGE`: Restored from storage
-- `NEEDS_PATIENT_ID`: Invalid/missing patient ID
-
-##### `set_explicit_patient_context(patient_id: str, chat_ctx: ChatContext) -> bool`
-```python
-async def set_explicit_patient_context(self, patient_id: str, chat_ctx: ChatContext) -> bool:
-```
-**Purpose**: Directly sets specific patient context (bypass AI analysis)
-
-**Flow**:
-1. **Registry Sync**: Ensure latest registry data
-2. **Validation**: Check patient ID format (`patient_[0-9]+`)
-3. **Kernel Reset**: Reset AI kernel when switching patients
-4. **Restoration**: Try to restore patient from storage
-5. **Creation**: Create new patient if not found
-6. **System Message**: Inject patient context
-7. **Registry Update**: Persist to registry
-
-##### `_ensure_system_message(chat_ctx: ChatContext, timing: TimingInfo)`
-```python
-async def _ensure_system_message(self, chat_ctx: ChatContext, timing: TimingInfo):
-```
-**Purpose**: Injects structured patient context into chat history
-
-**Operation**:
-1. **Cleanup**: Remove existing patient context system messages
-2. **Registry Query**: Get patient list from registry (authoritative)
-3. **Structured Payload**: Create `PatientContextSystemMessage`
-4. **Message Injection**: Insert at position 0 in chat history
+---
 
-**System Message Format**:
-```
-PATIENT_CONTEXT_JSON: {"conversation_id":"uuid","patient_id":"patient_15","all_patient_ids":["patient_4","patient_15"],"timing_sec":{...}}
-```
+## 🧩 Runtime Data Model (Simplified)
 
-##### `_try_restore_from_storage(chat_ctx: ChatContext) -> bool`
 ```python
-async def _try_restore_from_storage(self, chat_ctx: ChatContext) -> bool:
+ChatContext:
+  conversation_id: str
+  patient_id: Optional[str]
+  patient_contexts: Dict[str, PatientContext]  # Hydrated from registry each turn
+  chat_history: Semantic Kernel chat history
 ```
-**Purpose**: Fallback restoration from storage files
 
-**Priority Order**:
-1. **Patient Registry** (Primary): Authoritative patient metadata
-2. **Session Context** (Fallback): Legacy conversation restoration
+Hydration snippet:
 
-**Registry Restoration**:
-1. Load patient registry
-2. Restore ALL patient metadata to memory
-3. Set active patient from registry
-4. Load patient-specific isolated chat history
-5. Replace current history with patient history
-
-##### `_archive_all_and_recreate(chat_ctx: ChatContext)`
 ```python
-async def _archive_all_and_recreate(self, chat_ctx: ChatContext) -> None:
+await patient_context_service._ensure_patient_contexts_from_registry(chat_ctx)
+# chat_ctx.patient_contexts = { pid: PatientContext(...), ... }
 ```
-**Purpose**: Complete archival and clearing
-
-**Comprehensive Flow**:
-1. **Kernel Reset**: Clear AI state
-2. **Patient Discovery**: Get ALL patients from registry
-3. **Archive Folder**: Create timestamped folder
-4. **Session Archive**: Archive main conversation
-5. **Patient Archives**: Archive each patient's context
-6. **Registry Archive**: Archive patient registry
-7. **Memory Clear**: Clear all in-memory state
 
+Only `patient_id` determines which file receives writes.
 
-**Archive Structure**:
-```
-archive/2025-09-20T18-31-26-123456/
-  {conversation_id}/
-    20250920T183126_session_archived.json
-    20250920T183126_patient_patient_4_archived.json
-    20250920T183126_patient_patient_15_archived.json
-  20250920T183126_patient_context_registry_archived.json
-```
+---
 
-##### `_activate_patient_with_registry(patient_id: str, chat_ctx: ChatContext) -> Decision`
-```python
-async def _activate_patient_with_registry(self, patient_id: str, chat_ctx: ChatContext) -> Decision:
-```
-**Purpose**: Activates specific patient with full registry integration
-
-**Decision Logic**:
-1. **Validation**: Check patient ID
-2. **Same Patient**: Return `UNCHANGED` if already active
-3. **Kernel Reset**: Reset AI when switching
-4. **Registry Check**: Look for patient in registry
-5. **History Loading**: Load patient-specific chat history
-6. **Registry Update**: Mark as active patient
-
-**Return Values**:
-- `SWITCH_EXISTING`: Patient found in registry/memory
-- `NEW_BLANK`: New patient created
-- `UNCHANGED`: Same patient already active
-
-##### `_update_registry_storage(chat_ctx: ChatContext)`
-```python
-async def _update_registry_storage(self, chat_ctx: ChatContext):
-```
-**Purpose**: Persists current patient state to registry
+## 🔐 Isolation Semantics
 
-**Registry Entry**:
-```json
-{
-  "patient_id": "patient_15",
-  "facts": {},
-  "conversation_id": "uuid"
-}
-```
+| Operation | Effect |
+|-----------|--------|
+| Switch patient | Kernel reset + load that patient’s chat history into memory |
+| New patient | Kernel reset + start empty history |
+| Clear | Archive all (session, patients, registry) then wipe memory |
+| General (no patient) | Session-only evolution; `patient_id` stays `None` |
+| Restore (idle resume) | If no active but registry has a previous active → restore it |
 
-**Operation**: Calls registry accessor to update/add patient and set as active
+---
 
-##### `_remove_system_message(chat_ctx: ChatContext)`
-```python
-def _remove_system_message(self, chat_ctx: ChatContext):
-```
-**Purpose**: Removes old patient context system messages
+## 🧪 Short-Message Heuristic
 
-**Logic**:
-1. Iterate through chat history
-2. Identify system messages with `PATIENT_CONTEXT_JSON` prefix
-3. Parse JSON to check patient_id
-4. Remove messages for current patient
-5. Preserve malformed messages (safe default)
+Skip analyzer if:
+- Input length ≤ 15 chars AND
+- Lacks substrings: `patient`, `clear`, `switch`
 
-##### `_try_restore_specific_patient(patient_id: str, chat_ctx: ChatContext) -> bool`
-```python
-async def _try_restore_specific_patient(self, patient_id: str, chat_ctx: ChatContext) -> bool:
-```
-**Purpose**: Restores specific patient metadata
+Outcomes:
+- Active patient exists → treat as `UNCHANGED`
+- None active → attempt restore → `RESTORED_FROM_STORAGE` or `NONE`
 
-**Priority**:
-1. **Registry Storage** (Primary): Authoritative source
-2. **Legacy Context File** (Fallback): Migration support
+Purpose: Avoid unnecessary model calls on handoff fragments (e.g., “back to you”).
 
 ---
 
-## AI Analysis Engine
+## 🛠 `PatientContextService` Responsibilities
 
-### PatientContextAnalyzer (`/src/services/patient_context_analyzer.py`)
+Still does:
+- Sync from registry each invocation.
+- Run analyzer (unless heuristic skip).
+- Perform transitions: new / switch / clear / restore.
+- Reset kernel only on patient change.
+- Update registry on activation/switch.
 
-**Intelligent Decision Engine for Patient Context**
+No longer does:
+- Inject snapshot messages.
+- Embed timing into snapshots.
+- Persist patient metadata within chat histories.
 
-#### Core Capabilities
-- **Natural Language Understanding**: Analyzes user input for patient context
-- **Structured Output**: Uses Semantic Kernel's JSON schema generation
-- **Pattern Recognition**: Identifies patient IDs, clear commands, switches
-- **State Isolation**: Kernel reset prevents AI contamination
-
-#### System Prompt Strategy
-```python
-system_prompt = f"""You are a patient context analyzer for healthcare conversations.
-
-TASK: Analyze user input and decide the appropriate patient context action.
-
-AVAILABLE ACTIONS:
-- NONE: No patient context needed (general questions, greetings, system commands)
-- CLEAR: User wants to clear/reset all patient context
-- ACTIVATE_NEW: User mentions a new patient ID not in the known patient list
-- SWITCH_EXISTING: User wants to switch to a different known patient
-- UNCHANGED: Continue with current patient context
-
-CURRENT STATE:
-- Active patient ID: {prior_patient_id or "None"}
-- Known patient IDs: {known_patient_ids}
-
-ANALYSIS RULES:
-1. Extract patient_id ONLY if action is ACTIVATE_NEW or SWITCH_EXISTING
-2. Patient IDs typically follow "patient_X" format or are explicit medical record numbers
-3. For CLEAR/NONE/UNCHANGED actions, set patient_id to null
-4. Prioritize explicit patient mentions over implicit context
-5. Keep reasoning brief and specific (max 50 words)
-"""
+Return signature (conceptually):
 ```
-
-#### Structured Output Model
-```python
-class PatientContextDecision(KernelBaseModel):
-    action: AnalyzerAction
-    patient_id: Optional[str]
-    reasoning: str
+(decision: Decision, timing: TimingInfo)
 ```
 
-#### Example Decisions
-
-| User Input | Action | Patient ID | Reasoning |
-|------------|--------|------------|-----------|
-| `"Hello"` | `NONE` | `null` | General greeting, no patient context needed |
-| `"clear"` | `CLEAR` | `null` | Explicit clear command detected |
-| `"switch to patient_4"` | `SWITCH_EXISTING` or `ACTIVATE_NEW` | `"patient_4"` | Explicit patient switch request |
-| `"tumor board for patient_15"` | `ACTIVATE_NEW` or `SWITCH_EXISTING` | `"patient_15"` | Healthcare task with patient mention |
-| `"back to you Orchestrator"` | `UNCHANGED` | `null` | Agent handoff, maintain context |
-
-#### Key Methods
-
-##### `analyze_decision(user_text, prior_patient_id, known_patient_ids) -> PatientContextDecision`
-```python
-async def analyze_decision(
-    self,
-    user_text: str,
-    prior_patient_id: Optional[str] = None,
-    known_patient_ids: Optional[list[str]] = None,
-) -> PatientContextDecision:
-```
-
-**Azure OpenAI Integration**:
-```python
-execution_settings = AzureChatPromptExecutionSettings(
-    service_id="patient_context_analyzer",
-    max_tokens=200,
-    temperature=0.1,
-    response_format=PatientContextDecision,  # Automatic JSON schema
-)
+Service-level decision literal union:
 ```
-
-##### `reset_kernel()`
-```python
-def reset_kernel(self):
+"NONE" | "UNCHANGED" | "NEW_BLANK" | "SWITCH_EXISTING" |
+"CLEAR" | "RESTORED_FROM_STORAGE" | "NEEDS_PATIENT_ID"
 ```
-**Purpose**: Prevents AI state contamination between patients
-- Recreates entire Kernel instance
-- Reinitializes AzureChatCompletion service
-- Ensures clean AI state for each patient
 
 ---
 
-## API Integration
+## 🧵 Web vs Teams Parity
 
-### WebSocket Chat Route (`/src/routes/api/chats.py`)
+Shared pipeline:
+1. Strip old snapshot(s).
+2. Inject new snapshot (fresh `generated_at`).
+3. Run group chat orchestration.
+4. Persist history (snapshot excluded).
+5. Snapshot grounds roster/meta reasoning.
 
-**Main User Interface for Patient Context System**
-
-#### Complete Flow Analysis
-
-##### `websocket_chat_endpoint(websocket: WebSocket, chat_id: str)`
-
-**Step-by-Step Patient Context Integration**:
-
-1. **Session Context Loading**:
-```python
-chat_context = await data_access.chat_context_accessor.read(chat_id, None)
-```
-- Always starts with session context
-- Establishes base conversation state
-
-2. **Clear Command Processing**:
-```python
-if await _handle_clear_command(content, chat_context):
-```
-- **Priority**: Processed BEFORE patient context analysis
-- **Comprehensive**: Archives all contexts and registry
-- **Response**: Immediate clear confirmation
-
-3. **Patient Context Decision**:
-```python
-decision, timing = await patient_context_service.decide_and_apply(content, chat_context)
-```
-- **AI-Powered**: Uses PatientContextAnalyzer
-- **Registry Integration**: Syncs with patient registry
-- **Performance**: Captures timing metrics
+Teams additions:
+- Human-readable `PT_CTX` footer (single insertion via guard).
+- Footer includes `Session ID:`.
 
-4. **Error Handling**:
+Guard pattern:
 ```python
-if decision == "NEEDS_PATIENT_ID":
-    # Send helpful error message
+if all_pids and "PT_CTX:" not in response.content:
+    # append audit footer once
 ```
-- **User Guidance**: Explains patient ID format
-- **Format Example**: `'patient_X' (e.g., 'patient_4')`
 
-5. **Patient History Isolation**:
-```python
-if chat_context.patient_id:
-    isolated_ctx = await data_access.chat_context_accessor.read(chat_id, chat_context.patient_id)
-    chat_context.chat_history = isolated_ctx.chat_history
-```
-- **Critical Isolation**: Loads ONLY active patient's history
-- **Complete Replacement**: Overwrites session history
-- **Fresh Start**: Empty history for new patients
-
-6. **Agent Orchestration**:
-```python
-chat, chat_context = group_chat.create_group_chat(app_context, chat_context)
-```
-- **Multi-Agent**: Creates healthcare agent conversation
-- **Context Injection**: System message contains patient context
-
-7. **Response Enhancement**:
-```python
-response_content_with_pc = _append_pc_ctx_display(response.content, chat_context)
-```
-- **UI Enhancement**: Adds patient context display
-- **User Visibility**: Shows active patient and session patients
-
-8. **Context Persistence**:
-```python
-await data_access.chat_context_accessor.write(chat_context)
-```
-- **Automatic**: Saves to appropriate file (patient/session)
-- **Registry Sync**: Patient metadata already updated
-
-#### Clear Command Handler
-
-##### `_handle_clear_command(content: str, chat_context) -> bool`
-
-**Comprehensive Clear Operation**:
-
-1. **Command Recognition**: 
-   - `"clear"`
-   - `"clear patient"`
-   - `"clear context"`
-   - `"clear patient context"`
-
-2. **Archival Process**:
-```python
-# Create timestamped archive folder
-timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S-%f")
-archive_folder = f"archive/{timestamp}"
-
-# Archive session context
-await data_access.chat_context_accessor.archive_to_folder(...)
+---
 
-# Archive ALL patient contexts from registry
-patient_registry, _ = await patient_context_service.registry_accessor.read_registry(...)
-for patient_id in patient_registry.keys():
-    await data_access.chat_context_accessor.archive_to_folder(...)
+## 📎 Example Turn
 
-# Archive patient registry
-await patient_context_service.registry_accessor.archive_registry(...)
+In-memory (transient):
 ```
-
-3. **Memory Clear**:
-```python
-chat_context.patient_context = None
-chat_context.patient_contexts.clear()
-chat_context.chat_history.messages.clear()
-chat_context.patient_id = None
+[System] PATIENT_CONTEXT_JSON: {"conversation_id":"c123","patient_id":"patient_4","all_patient_ids":["patient_4"],"generated_at":"...Z"}
+[User] Provide history
+[Assistant:PatientHistory] Here is the complete patient data ...
 ```
 
-
-#### Patient Context Display
-
-##### `_append_pc_ctx_display(base: str, chat_context) -> str`
-
-**UI Enhancement for Patient Visibility**:
-
-1. **System Message Extraction**: Gets patient context JSON
-2. **JSON Parsing**: Parses structured payload
-3. **Markdown Formatting**: Creates user-friendly display
-
-**Display Format**:
-```markdown
----
-*PT_CTX:*
-- **Patient ID:** `patient_15`
-- **Conversation ID:** `uuid`
-- **Session Patients:** `patient_4`, `patient_15 (active)`
-```
-
----
-
-## Agent Integration
-
-### How Healthcare Agents See Patient Context
-
-#### System Message Injection
-Every agent conversation includes structured patient context:
-
+Persisted (`patient_4_context.json`):
 ```json
 {
-  "role": "system",
-  "content": "PATIENT_CONTEXT_JSON: {\"conversation_id\":\"uuid\",\"patient_id\":\"patient_15\",\"all_patient_ids\":[\"patient_4\",\"patient_15\"],\"timing_sec\":{...}}"
+  "conversation_id": "c123",
+  "patient_id": "patient_4",
+  "chat_history": [
+    {"role": "user", "content": "Provide history"},
+    {"role": "assistant", "name": "PatientHistory", "content": "Here is the complete patient data ..."}
+  ],
+  "patient_data": [],
+  "display_blob_urls": [],
+  "output_data": []
 }
 ```
 
-#### Agent Awareness
-- **Active Patient**: Agents know current patient
-- **Session Patients**: Agents see all patients in session  
-- **Conversation Scope**: Agents understand context boundaries
-- **Performance Data**: Timing metrics available
-
-#### Isolation Benefits
-- **Clean History**: Only relevant patient conversation
-- **Context Switching**: Clear boundaries between patients
-- **Privacy Protection**: No cross-patient information leakage
-- **Compliance**: Full audit trail per patient
-
-#### Group Chat Integration (`/src/group_chat.py`)
-
-**Patient Context in Multi-Agent Conversations**:
-
-1. **Context Injection**: 
-```python
-inject_workflow_summary(chat_ctx)
-```
-
-2. **Agent Creation**: Each agent gets fresh kernel with patient context
-
-3. **Selection Strategy**: AI-powered agent selection with patient awareness
-
-4. **Termination Strategy**: Context-aware conversation termination
+Snapshot absent by design.
 
 ---
 
-## Data Flow
-
-### Complete Patient Context Lifecycle
-
-```mermaid
-graph TD
-    A[User Message] --> B[PatientContextAnalyzer]
-    B --> C[Structured Decision]
-    C --> D[PatientContextService]
-    D --> E[Registry Update]
-    D --> F[History Loading]
-    E --> G[Agent Orchestration]
-    F --> G
-    G --> H[Response Generation]
-    H --> I[Context Persistence]
-    I --> J[User Response]
-```
-
-### Detailed Flow Steps
-
-1. **User Input**: Message received via WebSocket
-2. **AI Analysis**: PatientContextAnalyzer determines action
-3. **Service Processing**: PatientContextService executes decision
-4. **Registry Sync**: Patient metadata updated in registry
-5. **History Loading**: Appropriate chat history loaded
-6. **Agent Orchestration**: Multi-agent conversation with context
-7. **Response Generation**: AI agents generate responses
-8. **Context Persistence**: State saved to appropriate files
-9. **User Display**: Enhanced response with patient context
-
-### Storage Operations
-
-#### Read Operations
-```python
-# Registry (authoritative)
-patient_registry, active_id = await registry_accessor.read_registry(conversation_id)
+## 🧽 Clear Operation
 
-# Chat History (patient-specific or session)
-chat_ctx = await context_accessor.read(conversation_id, patient_id)
+Triggers on any of:
 ```
-
-#### Write Operations
-```python
-# Registry Update
-await registry_accessor.update_patient_registry(conversation_id, patient_id, entry, active_id)
-
-# Chat History Save
-await context_accessor.write(chat_ctx)
+"clear", "clear patient", "clear context", "clear patient context"
 ```
 
-#### Archive Operations
-```python
-# Complete Archival
-await context_accessor.archive_to_folder(conversation_id, patient_id, archive_folder)
-await registry_accessor.archive_registry(conversation_id)
-```
+Steps:
+1. Archive session file, all patient files (registry-sourced), registry file.
+2. Reset: `patient_id = None`, `patient_contexts.clear()`, `chat_history.clear()`.
+3. Persist fresh empty session context.
+4. Reply with confirmation.
 
 ---
 
-## File Structure
+## 🧾 Roster & Meta Queries
 
-### Project Organization
+Handled through Orchestrator prompt rules using the latest snapshot:
+- Use `all_patient_ids` + `patient_id`.
+- Never hallucinate absent patients.
+- Don’t “re-plan” when user repeats the already-active patient.
 
-```
-src/
-├── services/
-│   ├── patient_context_service.py      # Main orchestrator
-│   └── patient_context_analyzer.py     # AI decision engine
-├── data_models/
-│   ├── chat_context.py                 # Core data models
-│   ├── chat_context_accessor.py        # Chat history storage
-│   ├── patient_context_accessor.py     # Registry storage
-│   └── patient_context_models.py       # Structured output models
-├── routes/api/
-│   └── chats.py                        # WebSocket API integration
-└── group_chat.py                       # Multi-agent orchestration
-```
+Stability aids:
+- Sort `all_patient_ids`.
+- (Optional future) Add `patient_count` or `_hint` if reasoning degrades.
 
-### Configuration Files
+---
 
-#### Environment Variables
-```env
-AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4o
-AZURE_OPENAI_API_VERSION=2025-04-01-preview
-PATIENT_CONTEXT_DECIDER_DEPLOYMENT_NAME=gpt-4o  # Optional override
-```
+## 🛡 Why Ephemeral?
 
-#### Agent Configuration
-```json
-{
-  "agents": [
-    {
-      "name": "Orchestrator",
-      "facilitator": true,
-      "instructions": "You are the healthcare orchestrator...",
-      "tools": [...]
-    }
-  ]
-}
-```
+| Legacy Issue | Current Resolution |
+|--------------|-------------------|
+| Persisted stale roster | Snapshot rebuilt every turn from registry |
+| Stacked duplicate system messages | Strip → reinject ensures exactly one |
+| Timing noise in reasoning | Removed from snapshot |
+| Confusion over authority | Registry authoritative; snapshot transient |
+| Unnecessary analyzer calls | Heuristic bypass for trivial handoffs |
 
 ---
 
-## Configuration
+## 🧪 Validation Scenarios
 
-### Service Initialization
+| Scenario | Expected |
+|----------|----------|
+| First mention “start review for patient_4” | Decision = `NEW_BLANK`; snapshot shows only `patient_4` |
+| Switch to existing other patient | Decision = `SWITCH_EXISTING`; kernel reset occurs |
+| Redundant switch to same patient | Decision = `UNCHANGED`; no reset |
+| Short handoff “back to you” | Analyzer skipped; `UNCHANGED` (if active) |
+| Clear then new command | Clean slate → next patient command = new activation |
+| Teams render | Single `PT_CTX` footer incl. Session ID |
+| Persistence audit | No `PATIENT_CONTEXT_JSON` lines in stored files |
 
-```python
-# In routes/api/chats.py
-analyzer = PatientContextAnalyzer(token_provider=app_context.cognitive_services_token_provider)
-patient_context_service = PatientContextService(
-    analyzer=analyzer,
-    registry_accessor=app_context.data_access.patient_context_registry_accessor,
-    context_accessor=app_context.data_access.chat_context_accessor
-)
-```
+---
 
-### Azure OpenAI Setup
+## 🛠 Code Reference (Filtering + Injection)
 
 ```python
-# In PatientContextAnalyzer
-self._kernel.add_service(
-    AzureChatCompletion(
-        service_id="patient_context_analyzer",
-        deployment_name=self.deployment_name,
-        api_version=self.api_version,
-        ad_token_provider=token_provider,
+# Remove old snapshot(s)
+chat_ctx.chat_history.messages = [
+    m for m in chat_ctx.chat_history.messages
+    if not (
+        m.role == AuthorRole.SYSTEM
+        and getattr(m, "items", None)
+        and m.items
+        and getattr(m.items[0], "text", "").startswith(PATIENT_CONTEXT_PREFIX)
     )
-)
-```
+]
+
+snapshot = {
+    "conversation_id": chat_ctx.conversation_id,
+    "patient_id": chat_ctx.patient_id,
+    "all_patient_ids": sorted(chat_ctx.patient_contexts.keys()),
+    "generated_at": datetime.utcnow().isoformat() + "Z",
+}
 
-### Storage Configuration
+line = f"{PATIENT_CONTEXT_PREFIX}: {json.dumps(snapshot, separators=(',', ':'))}"
+sys_msg = ChatMessageContent(role=AuthorRole.SYSTEM, items=[TextContent(text=line)])
+chat_ctx.chat_history.messages.insert(0, sys_msg)
+```
 
+Teams footer guard (conceptual):
 ```python
-# In DataAccess
-patient_context_registry_accessor = PatientContextRegistryAccessor(
-    blob_service_client=blob_service_client,
-    container_name="chat-sessions"
-)
-
-chat_context_accessor = ChatContextAccessor(
-    blob_service_client=blob_service_client,
-    container_name="chat-sessions",
-    cognitive_services_token_provider=token_provider
-)
+if all_pids and "PT_CTX:" not in response.content:
+    # append audit footer once
 ```
 
 ---
 
-## Conclusion
+## 🔮 Future Enhancements (Optional)
+
+| Idea | Rationale |
+|------|-----------|
+| Deterministic plan confirmation flag | Reduce reliance on prompt-only gating |
+| Snapshot `patient_count` field | Faster meta answers (no length calc) |
+| Registry `facts` enrichment | Richer grounding for specialized agents |
+| Test harness for decision invariants | Prevent regression in edge transitions |
+| LLM classification caching | Reduce analyzer calls for repeated short intents |
 
-The Healthcare Agent Orchestrator's patient context management system provides a robust, compliant, and scalable solution for managing patient-specific AI conversations. By implementing registry-based metadata management, intelligent context switching, and comprehensive archival capabilities, the system ensures both user experience and regulatory compliance in healthcare AI applications.
+---
 
-The architecture's separation of concerns, structured decision making, and comprehensive error handling make it suitable for production healthcare environments while maintaining the flexibility needed for complex multi-agent AI interactions.
\ No newline at end of file
+Last updated: 2025-09-28  
+Status: Stable ephemeral model in production branch (`sekar/pc_poc`).
diff --git a/src/bots/assistant_bot.py b/src/bots/assistant_bot.py
index d203154..9620fb3 100644
--- a/src/bots/assistant_bot.py
+++ b/src/bots/assistant_bot.py
@@ -1,10 +1,10 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
 
 import asyncio
-import json
 import logging
 import os
+import json
 from datetime import datetime, timezone
 
 from botbuilder.core import MessageFactory, TurnContext
@@ -12,19 +12,15 @@
 from botbuilder.integration.aiohttp import CloudAdapter
 from botbuilder.schema import Activity, ActivityTypes
 from semantic_kernel.agents import AgentGroupChat
-
-from semantic_kernel.contents import AuthorRole, ChatMessageContent, TextContent
-from services.patient_context_service import PATIENT_CONTEXT_PREFIX
+from semantic_kernel.contents import ChatMessageContent, TextContent, AuthorRole
 
 from data_models.app_context import AppContext
 from data_models.chat_context import ChatContext
 
-from errors import NotAuthorizedError
 from group_chat import create_group_chat
-from services.patient_context_service import PatientContextService
+from services.patient_context_service import PatientContextService, PATIENT_CONTEXT_PREFIX
 from services.patient_context_analyzer import PatientContextAnalyzer
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -45,7 +41,6 @@ def __init__(
         self.data_access = app_context.data_access
         self.root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
-        # Add patient context service
         analyzer = PatientContextAnalyzer(token_provider=app_context.cognitive_services_token_provider)
         self.patient_context_service = PatientContextService(
             analyzer=analyzer,
@@ -53,39 +48,21 @@ def __init__(
             context_accessor=app_context.data_access.chat_context_accessor
         )
 
-    async def get_bot_context(
-        self, conversation_id: str, bot_name: str, turn_context: TurnContext
-    ):
+    async def get_bot_context(self, conversation_id: str, bot_name: str, turn_context: TurnContext):
         if conversation_id not in self.turn_contexts:
             self.turn_contexts[conversation_id] = {}
-
         if bot_name not in self.turn_contexts[conversation_id]:
             context = await self.create_turn_context(bot_name, turn_context)
             self.turn_contexts[conversation_id][bot_name] = context
-
         return self.turn_contexts[conversation_id][bot_name]
 
     async def create_turn_context(self, bot_name, turn_context):
-        app_id = next(
-            agent["bot_id"] for agent in self.all_agents if agent["name"] == bot_name
-        )
-
-        # Lookup adapter for bot_name. bot_name maybe different from self.name.
+        app_id = next(agent["bot_id"] for agent in self.all_agents if agent["name"] == bot_name)
         adapter = self.adapters[bot_name]
         claims_identity = adapter.create_claims_identity(app_id)
-        connector_factory = (
-            adapter.bot_framework_authentication.create_connector_factory(
-                claims_identity
-            )
-        )
-        connector_client = await connector_factory.create(
-            turn_context.activity.service_url, "https://api.botframework.com"
-        )
-        user_token_client = (
-            await adapter.bot_framework_authentication.create_user_token_client(
-                claims_identity
-            )
-        )
+        connector_factory = adapter.bot_framework_authentication.create_connector_factory(claims_identity)
+        connector_client = await connector_factory.create(turn_context.activity.service_url, "https://api.botframework.com")
+        user_token_client = await adapter.bot_framework_authentication.create_user_token_client(claims_identity)
 
         async def logic(context: TurnContext):
             pass
@@ -97,265 +74,185 @@ async def logic(context: TurnContext):
         context.turn_state[CloudAdapter.CONNECTOR_FACTORY_KEY] = connector_factory
         context.turn_state[CloudAdapter.BOT_OAUTH_SCOPE_KEY] = "https://api.botframework.com/.default"
         context.turn_state[CloudAdapter.BOT_CALLBACK_HANDLER_KEY] = logic
-
         return context
 
     async def _handle_clear_command(self, content: str, chat_ctx: ChatContext, conversation_id: str) -> bool:
-        """Handle patient context clear commands - aligned with web interface."""
         content_lower = content.lower().strip()
         if content_lower in ["clear", "clear patient", "clear context", "clear patient context"]:
             logger.info(f"Processing clear command for conversation: {conversation_id}")
-
-            # Archive everything before clearing (same as web interface)
             timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S-%f")
             archive_folder = f"archive/{timestamp}"
-
             try:
-                logger.info(f"Starting archive to folder: {archive_folder}")
-
-                # Archive session context (this creates the archive folder structure)
                 await self.data_access.chat_context_accessor.archive_to_folder(conversation_id, None, archive_folder)
-                logger.info(f"Archived session context to {archive_folder}")
-
-                # Archive ALL patient contexts (not just from chat_ctx.patient_contexts)
-                # We need to get the list from the registry like the web interface does
                 try:
                     patient_registry, _ = await self.patient_context_service.registry_accessor.read_registry(conversation_id)
                     if patient_registry:
-                        for patient_id in patient_registry.keys():
-                            await self.data_access.chat_context_accessor.archive_to_folder(conversation_id, patient_id, archive_folder)
-                            logger.info(f"Archived patient context for {patient_id} to {archive_folder}")
-                except Exception as registry_error:
-                    logger.warning(f"Could not read registry for archiving patient contexts: {registry_error}")
-                    # Fallback: use patient_contexts from chat_ctx if available
-                    if hasattr(chat_ctx, 'patient_contexts') and chat_ctx.patient_contexts:
-                        for patient_id in chat_ctx.patient_contexts.keys():
-                            await self.data_access.chat_context_accessor.archive_to_folder(conversation_id, patient_id, archive_folder)
-                            logger.info(f"Archived patient context for {patient_id} to {archive_folder} (fallback)")
-
-                # Archive patient registry (this renames it, doesn't create folder structure)
+                        for pid in patient_registry.keys():
+                            await self.data_access.chat_context_accessor.archive_to_folder(conversation_id, pid, archive_folder)
+                except Exception:
+                    if getattr(chat_ctx, "patient_contexts", None):
+                        for pid in chat_ctx.patient_contexts.keys():
+                            await self.data_access.chat_context_accessor.archive_to_folder(conversation_id, pid, archive_folder)
                 await self.patient_context_service.registry_accessor.archive_registry(conversation_id)
-                logger.info(f"Archived patient registry for {conversation_id}")
-
-                # Clear chat context (same as web interface)
-                chat_ctx.patient_context = None
-                if hasattr(chat_ctx, 'patient_contexts'):
-                    chat_ctx.patient_contexts.clear()
-                chat_ctx.chat_history.clear()
-                chat_ctx.patient_id = None
-
-                # Save the cleared context
-                await self.data_access.chat_context_accessor.write(chat_ctx)
-                logger.info(f"Saved cleared context for {conversation_id}")
-
-                logger.info(f"Successfully archived and cleared all contexts to {archive_folder}")
-                return True
-
             except Exception as e:
-                logger.error(f"Failed to archive contexts during clear: {e}")
-                # Still clear the context even if archiving fails
+                logger.warning(f"Clear archival issues: {e}")
+            finally:
                 chat_ctx.patient_context = None
-                if hasattr(chat_ctx, 'patient_contexts'):
+                if hasattr(chat_ctx, "patient_contexts"):
                     chat_ctx.patient_contexts.clear()
                 chat_ctx.chat_history.clear()
                 chat_ctx.patient_id = None
-
-                # Save the cleared context
-                try:
-                    await self.data_access.chat_context_accessor.write(chat_ctx)
-                    logger.info(f"Saved cleared context after archive failure")
-                except Exception as save_error:
-                    logger.error(f"Failed to save cleared context: {save_error}")
-
-                return True
-
+                await self.data_access.chat_context_accessor.write(chat_ctx)
+            return True
         return False
 
     async def on_message_activity(self, turn_context: TurnContext) -> None:
         conversation_id = turn_context.activity.conversation.id
         chat_context_accessor = self.data_access.chat_context_accessor
-        chat_artifact_accessor = self.data_access.chat_artifact_accessor  # Main branch addition
+        chat_artifact_accessor = self.data_access.chat_artifact_accessor
 
-        # Extract raw user text (without bot mention) once
         raw_user_text = turn_context.remove_recipient_mention(turn_context.activity).strip()
 
-        # STEP 1: Load session context first
         try:
             chat_ctx = await chat_context_accessor.read(conversation_id, None)
-            if not chat_ctx:
-                chat_ctx = ChatContext(conversation_id)
-                logger.info(f"Created new session context for: {conversation_id}")
-            else:
-                logger.info(f"Loaded existing session context for: {conversation_id}")
-        except Exception as e:
-            logger.error(f"Failed to load session context: {e}")
+        except Exception:
             chat_ctx = ChatContext(conversation_id)
 
-        # STEP 1.5: Handle clear commands (main branch logic enhanced with patient context)
         if await self._handle_clear_command(raw_user_text, chat_ctx, conversation_id):
-            # Also archive chat artifacts (main branch functionality)
             await chat_artifact_accessor.archive(conversation_id)
             await turn_context.send_activity("Conversation cleared!")
             return
 
-        # STEP 2: Patient context decision and application
         decision, timing = await self.patient_context_service.decide_and_apply(raw_user_text, chat_ctx)
-
-        logger.info(f"Patient context decision: {decision} | Patient: {chat_ctx.patient_id} | Timing: {timing}")
-
-        # STEP 3: Handle special decision outcomes
-        if decision == "CLEAR":
-            # This should now be handled by _handle_clear_command above, but keep as fallback
-            await chat_artifact_accessor.archive(conversation_id)
-            await turn_context.send_activity("All contexts have been archived and cleared. How can I assist you today?")
-            return
-        elif decision == "NEEDS_PATIENT_ID":
+        if decision == "NEEDS_PATIENT_ID":
             await turn_context.send_activity(
-                "I need a patient ID to proceed. Please provide the patient ID in the format 'patient_X' "
-                "(e.g., '@Orchestrator start tumor board review for patient_4')."
+                "I need a patient ID like 'patient_4' (e.g., '@Orchestrator start tumor board review for patient_4')."
             )
             return
-        elif decision == "RESTORED_FROM_STORAGE":
-            logger.info(f"Restored patient context from storage: {chat_ctx.patient_id}")
 
-        # NEW: If active patient exists, load ONLY that patient's isolated context file
         if chat_ctx.patient_id:
             try:
-                # Load the patient-specific file (isolated history)
-                isolated_ctx = await chat_context_accessor.read(conversation_id, chat_ctx.patient_id)
-                if isolated_ctx and isolated_ctx.chat_history.messages:
-                    # Replace with isolated chat history
-                    chat_ctx.chat_history = isolated_ctx.chat_history
-                    logger.info(
-                        f"Loaded isolated history for {chat_ctx.patient_id} ({len(isolated_ctx.chat_history.messages)} messages)")
-                else:
-                    logger.info(f"No existing history for {chat_ctx.patient_id}, starting fresh")
-            except Exception as e:
-                logger.debug(f"Could not load isolated context for {chat_ctx.patient_id}: {e}")
+                isolated = await chat_context_accessor.read(conversation_id, chat_ctx.patient_id)
+                if isolated and isolated.chat_history.messages:
+                    chat_ctx.chat_history = isolated.chat_history
+            except Exception:
+                pass
+
+        # Inject fresh ephemeral PATIENT_CONTEXT_JSON snapshot
+        filtered = []
+        for m in chat_ctx.chat_history.messages:
+            if not (m.role == AuthorRole.SYSTEM and hasattr(m, "items") and m.items
+                    and getattr(m.items[0], "text", "").startswith(PATIENT_CONTEXT_PREFIX)):
+                filtered.append(m)
+        chat_ctx.chat_history.messages = filtered
+        snapshot = {
+            "conversation_id": chat_ctx.conversation_id,
+            "patient_id": chat_ctx.patient_id,
+            "all_patient_ids": sorted(getattr(chat_ctx, "patient_contexts", {}).keys()),
+            "generated_at": datetime.utcnow().isoformat() + "Z"
+        }
+        line = f"{PATIENT_CONTEXT_PREFIX}: {json.dumps(snapshot, separators=(',', ':'))}"
+        sys_msg = ChatMessageContent(role=AuthorRole.SYSTEM, items=[TextContent(text=line)])
+        chat_ctx.chat_history.messages.insert(0, sys_msg)
 
-        # STEP 4: Continue with normal group chat processing
         agents = self.all_agents
-        if len(chat_ctx.chat_history.messages) == 0:
-            # new conversation. Let's see which agents are available.
-            async def is_part_of_conversation(agent):
-                context = await self.get_bot_context(turn_context.activity.conversation.id, agent["name"], turn_context)
-                typing_activity = Activity(
-                    type=ActivityTypes.typing,
-                    relates_to=turn_context.activity.relates_to,
-                )
-                typing_activity.apply_conversation_reference(
-                    turn_context.activity.get_conversation_reference()
-                )
-                context.activity = typing_activity
+        if len(chat_ctx.chat_history.messages) == 1:  # only the snapshot present
+            async def is_part(agent):
+                context = await self.get_bot_context(conversation_id, agent["name"], turn_context)
+                typing = Activity(type=ActivityTypes.typing, relates_to=turn_context.activity.relates_to)
+                typing.apply_conversation_reference(turn_context.activity.get_conversation_reference())
+                context.activity = typing
                 try:
-                    await context.send_activity(typing_activity)
+                    await context.send_activity(typing)
                     return True
-                except Exception as e:
-                    logger.info(f"Failed to send typing activity to {agent['name']}: {e}")
-                    # This happens if the agent is not part of the group chat.
-                    # Remove the agent from the list of available agents
+                except Exception:
                     return False
 
-            part_of_conversation = await asyncio.gather(*(is_part_of_conversation(agent) for agent in self.all_agents))
-            agents = [agent for agent, should_include in zip(self.all_agents, part_of_conversation) if should_include]
+            flags = await asyncio.gather(*(is_part(a) for a in self.all_agents))
+            agents = [a for a, ok in zip(self.all_agents, flags) if ok]
 
         (chat, chat_ctx) = create_group_chat(self.app_context, chat_ctx, participants=agents)
 
-        # Add user message with patient context
-        user_message_with_context = self._append_pc_ctx(f"{self.name}: {raw_user_text}", chat_ctx)
-        chat_ctx.chat_history.add_user_message(user_message_with_context)
+        # Add raw user message
+        chat_ctx.chat_history.add_user_message(raw_user_text)
 
         chat.is_complete = False
         await self.process_chat(chat, chat_ctx, turn_context)
 
-        # Save chat context
         try:
             await chat_context_accessor.write(chat_ctx)
-            logger.info(f"Saved context for conversation: {conversation_id} | Patient: {chat_ctx.patient_id}")
-        except Exception as e:
+        except Exception:
             logger.exception("Failed to save chat context.")
 
     async def on_error(self, context: TurnContext, error: Exception):
-        # This error is raised as Exception, so we can only use the message to handle the error.
+        from errors import NotAuthorizedError
         if str(error) == "Unable to proceed while another agent is active.":
             await context.send_activity("Please wait for the current agent to finish.")
         elif isinstance(error, NotAuthorizedError):
-            logger.warning(error)
             await context.send_activity("You are not authorized to access this agent.")
         else:
-            # default exception handling
-            logger.exception(f"Agent {self.name} encountered an error")
-            await context.send_activity(f"Orchestrator is working on solving your problems, please retype your request")
+            await context.send_activity("Orchestrator encountered an error. Please retry your request.")
 
-    async def process_chat(
-        self, chat: AgentGroupChat, chat_ctx: ChatContext, turn_context: TurnContext
-    ):
-        # If the mentioned agent is a facilitator, proceed with group chat.
-        # Otherwise, proceed with standalone chat using the mentioned agent.
-        agent_config = next(agent_config for agent_config in self.all_agents if agent_config["name"] == self.name)
-        mentioned_agent = None if agent_config.get("facilitator", False) \
-            else next(agent for agent in chat.agents if agent.name == self.name)
+    async def process_chat(self, chat: AgentGroupChat, chat_ctx: ChatContext, turn_context: TurnContext):
+        agent_cfg = next(cfg for cfg in self.all_agents if cfg["name"] == self.name)
+        mentioned_agent = None if agent_cfg.get("facilitator", False) else next(
+            a for a in chat.agents if a.name == self.name)
 
         async for response in chat.invoke(agent=mentioned_agent):
-            context = await self.get_bot_context(
-                turn_context.activity.conversation.id, response.name, turn_context
-            )
-            if response.content.strip() == "":
+            if not response.content.strip():
                 continue
 
-            # Add patient context to response
-            response_with_context = self._append_pc_ctx(response.content, chat_ctx)
-
-            # Update response properly with ChatMessageContent v2 format
-            if hasattr(response, 'items') and response.items:
-                response.items[0].text = response_with_context
+            active_pid = chat_ctx.patient_id
+            all_pids = sorted(getattr(chat_ctx, "patient_contexts", {}).keys())
+            final_content = response.content
+
+            # Option 3 guard + added Session ID line
+            if all_pids and "PT_CTX:" not in response.content:
+                roster = ", ".join(f"`{p}`{' (active)' if p == active_pid else ''}" for p in all_pids)
+                pt_ctx_block = "\n\n---\n*PT_CTX:*\n"
+                pt_ctx_block += f"- **Session ID:** `{chat_ctx.conversation_id}`\n"
+                pt_ctx_block += f"- **Patient ID:** `{active_pid}`\n" if active_pid else "- *No active patient.*\n"
+                pt_ctx_block += f"- **Session Patients:** {roster}"
+                final_content = f"{response.content}{pt_ctx_block}"
+
+            if hasattr(response, "items") and response.items:
+                response.items[0].text = final_content
             else:
-                # If no items structure, recreate with proper format
                 response = ChatMessageContent(
                     role=response.role,
-                    items=[TextContent(text=response_with_context)],
-                    name=getattr(response, 'name', None)
+                    items=[TextContent(text=final_content)],
+                    name=getattr(response, "name", None)
                 )
 
             msgText = self._append_links_to_msg(response.content, chat_ctx)
             msgText = await self.generate_sas_for_blob_urls(msgText, chat_ctx)
 
+            context = await self.get_bot_context(turn_context.activity.conversation.id, response.name, turn_context)
             activity = MessageFactory.text(msgText)
-            activity.apply_conversation_reference(
-                turn_context.activity.get_conversation_reference()
-            )
+            activity.apply_conversation_reference(turn_context.activity.get_conversation_reference())
             context.activity = activity
-
             await context.send_activity(activity)
 
             if chat.is_complete:
                 break
 
     def _append_links_to_msg(self, msgText: str, chat_ctx: ChatContext) -> str:
-        # Add patient data links to response
         try:
-            # Handle both main branch format (direct access) and patient context format (getattr)
-            image_urls = getattr(chat_ctx, 'display_image_urls', [])
-            clinical_trial_urls = chat_ctx.display_clinical_trials
-
-            # Display loaded images
-            if image_urls:
+            imgs = getattr(chat_ctx, "display_image_urls", [])
+            trials = chat_ctx.display_clinical_trials
+            if imgs:
                 msgText += "<h2>Patient Images</h2>"
-                for url in image_urls:
-                    filename = url.split("/")[-1]
-                    msgText += f"<img src='{url}' alt='{filename}' height='300px'/>"
-
-            # Display clinical trials
-            if clinical_trial_urls:
+                for url in imgs:
+                    fname = url.split("/")[-1]
+                    msgText += f"<img src='{url}' alt='{fname}' height='300px'/>"
+            if trials:
                 msgText += "<h2>Clinical trials</h2>"
-                for url in clinical_trial_urls:
+                for url in trials:
                     trial = url.split("/")[-1]
                     msgText += f"<li><a href='{url}'>{trial}</a></li>"
-
             return msgText
         finally:
-            # Handle both formats for cleanup
-            if hasattr(chat_ctx, 'display_image_urls'):
+            if hasattr(chat_ctx, "display_image_urls"):
                 chat_ctx.display_image_urls = []
             chat_ctx.display_clinical_trials = []
 
@@ -364,80 +261,6 @@ async def generate_sas_for_blob_urls(self, msgText: str, chat_ctx: ChatContext)
             for blob_url in chat_ctx.display_blob_urls:
                 blob_sas_url = await self.data_access.blob_sas_delegate.get_blob_sas_url(blob_url)
                 msgText = msgText.replace(blob_url, blob_sas_url)
-
             return msgText
         finally:
             chat_ctx.display_blob_urls = []
-
-    def _get_system_patient_context_json(self, chat_ctx: ChatContext) -> str | None:
-        """Extract the JSON payload from the current PATIENT_CONTEXT_JSON system message."""
-        for msg in chat_ctx.chat_history.messages:
-            if msg.role == AuthorRole.SYSTEM:
-                # Handle both string content and itemized content
-                content = msg.content
-                if isinstance(content, str):
-                    text = content
-                else:
-                    # Try to extract from items if content is structured
-                    items = getattr(msg, "items", None) or getattr(content, "items", None)
-                    if items:
-                        parts = []
-                        for item in items:
-                            item_text = getattr(item, "text", None) or getattr(item, "content", None)
-                            if item_text:
-                                parts.append(str(item_text))
-                        text = "".join(parts) if parts else str(content) if content else ""
-                    else:
-                        text = str(content) if content else ""
-
-                if text and text.startswith(PATIENT_CONTEXT_PREFIX):
-                    # Extract JSON after "PATIENT_CONTEXT_JSON:"
-                    json_part = text[len(PATIENT_CONTEXT_PREFIX):].strip()
-                    if json_part.startswith(":"):
-                        json_part = json_part[1:].strip()
-                    return json_part if json_part else None
-        return None
-
-    def _append_pc_ctx(self, base: str, chat_ctx: ChatContext) -> str:
-        """Append patient context information to the message for display."""
-
-        # Avoid double-tagging
-        if "\nPC_CTX" in base or "\n*PT_CTX:*" in base:
-            return base
-
-        # Get the actual injected system patient context JSON
-        json_payload = self._get_system_patient_context_json(chat_ctx)
-
-        if not json_payload:
-            return base
-
-        # Format the JSON payload into a simple, readable Markdown string
-        try:
-            obj = json.loads(json_payload)
-
-            lines = ["\n\n---", "\n*PT_CTX:*"]
-            if obj.get("patient_id"):
-                lines.append(f"- **Patient ID:** `{obj['patient_id']}`")
-            if obj.get("conversation_id"):
-                lines.append(f"- **Conversation ID:** `{obj['conversation_id']}`")
-
-            if obj.get("all_patient_ids"):
-                active_id = obj.get("patient_id")
-                ids_str = ", ".join(f"`{p}`{' (active)' if p == active_id else ''}" for p in obj["all_patient_ids"])
-                lines.append(f"- **Session Patients:** {ids_str}")
-
-            if not obj.get("patient_id"):
-                lines.append("- *No active patient.*")
-
-            # Only add the block if there's something to show besides the header
-            if len(lines) > 2:
-                formatted_text = "\n".join(lines)
-                logger.debug(f"Appended patient context to message | Patient: {obj.get('patient_id')}")
-                return f"{base}{formatted_text}"
-            else:
-                return base
-
-        except json.JSONDecodeError as e:
-            logger.warning(f"Failed to parse patient context JSON: {e}")
-            # Fallback to raw if JSON is malformed, but keep it simple
-            return f"{base}\n\n---\n*PT_CTX (raw):* `{json_payload}`"
diff --git a/src/data_models/chat_context.py b/src/data_models/chat_context.py
index ea06c17..5ee7e8f 100644
--- a/src/data_models/chat_context.py
+++ b/src/data_models/chat_context.py
@@ -3,7 +3,7 @@
 
 import os
 from dataclasses import dataclass, field
-from typing import Dict, Any, Optional
+from typing import Dict, Any
 
 from semantic_kernel.contents.chat_history import ChatHistory
 
@@ -25,9 +25,8 @@ def __init__(self, conversation_id: str):
         # Patient context fields
         self.patient_id = None
         self.patient_contexts: Dict[str, PatientContext] = {}
-        self.workflow_summary: Optional[str] = None
 
-        # Legacy fields (preserved for compatibility)
+        # Legacy / display fields (still in use by various UI & agents)
         self.patient_data = []
         self.display_blob_urls = []
         self.display_image_urls = []
diff --git a/src/data_models/chat_context_accessor.py b/src/data_models/chat_context_accessor.py
index 1253e8d..a1e9792 100644
--- a/src/data_models/chat_context_accessor.py
+++ b/src/data_models/chat_context_accessor.py
@@ -11,51 +11,15 @@
 from semantic_kernel.contents import ChatMessageContent, AuthorRole, TextContent
 
 from data_models.chat_context import ChatContext, PatientContext
+from services.patient_context_service import PATIENT_CONTEXT_PREFIX  # reuse constant
 
 logger = logging.getLogger(__name__)
 
-# Current schema version for migration support
-CURRENT_SCHEMA_VERSION = 2
-
 
 class ChatContextAccessor:
     """
-    Hybrid context accessor - supports both session-only and patient-specific contexts.
-
-    ChatContext lifecycle:
-
-    **Session Context (no patient isolation):**
-    1. User sends a message to Agent.
-    2. Agent loads ChatContext from blob storage using conversation_id only.
-    - File: `{conversation_id}/session_context.json`
-    - If found, reads existing ChatContext; otherwise creates new one.
-    3. Agent processes message and sends responses to User.
-    4. Save ChatContext to `session_context.json`.
-    5. Repeat steps 1-4 for the entire conversation.
-    6. User sends a "clear" message.
-    7. Archive ChatContext:
-    - Save to `{timestamp}_session_archived.json`
-    - Delete original `session_context.json`
-
-    **Patient Context (with patient isolation):**
-    1. User mentions a patient ID or system detects patient context.
-    2. Agent loads ChatContext using conversation_id AND patient_id.
-    - File: `{conversation_id}/patient_{patient_id}_context.json`
-    - If found, reads existing patient-specific context; otherwise creates new one.
-    3. Agent processes message with patient context isolation.
-    4. Save ChatContext to `patient_{patient_id}_context.json`.
-    5. Repeat steps 1-4 for patient-specific conversation.
-    6. When switching patients or clearing:
-    - Archive current patient context to `{timestamp}_patient_{patient_id}_archived.json`
-    - Delete original patient context file.
-
-    Key functionality:
-    - Patient isolation: separate files for each patient (patient_{id}_context.json)
-    - Session context: shared conversation state (session_context.json)
-    - Automatic patient context detection and switching
-    - Chat history isolation per patient
-    - Migration support for legacy files
-    - Backward compatibility with main branch structure
+    Hybrid accessor supporting session + per-patient isolation.
+    Ephemeral PATIENT_CONTEXT_JSON system messages are stripped (never persisted).
     """
 
     def __init__(
@@ -69,66 +33,55 @@ def __init__(
         self.cognitive_services_token_provider = cognitive_services_token_provider
 
     def get_blob_path(self, conversation_id: str, patient_id: str = None) -> str:
-        """Get blob path for patient-specific or session context."""
         if patient_id:
             return f"{conversation_id}/patient_{patient_id}_context.json"
         return f"{conversation_id}/session_context.json"
 
     async def read(self, conversation_id: str, patient_id: str = None) -> ChatContext:
-        """Read chat context for conversation/patient."""
         start = time()
         try:
             blob_path = self.get_blob_path(conversation_id, patient_id)
             blob_client = self.container_client.get_blob_client(blob_path)
             blob = await blob_client.download_blob()
-            blob_str = await blob.readall()
-            decoded_str = blob_str.decode("utf-8")
+            decoded_str = (await blob.readall()).decode("utf-8")
             context = self.deserialize(decoded_str)
 
-            # Ensure patient context is properly set up
             if patient_id:
                 context.patient_id = patient_id
                 if patient_id not in context.patient_contexts:
                     context.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
             else:
                 context.patient_id = None
-
             return context
-
         except ResourceNotFoundError:
-            logger.info(f"Creating new context for {conversation_id}/{patient_id or 'session'}")
             context = ChatContext(conversation_id)
             if patient_id:
                 context.patient_id = patient_id
                 context.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
             return context
         except Exception as e:
-            logger.warning(f"Failed to read context for {conversation_id}/{patient_id or 'session'}: {e}")
+            logger.warning("Failed to read context %s/%s: %s",
+                           conversation_id, patient_id or "session", e)
             context = ChatContext(conversation_id)
             if patient_id:
                 context.patient_id = patient_id
                 context.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
             return context
         finally:
-            logger.info(
-                f"Read ChatContext for {conversation_id}/{patient_id or 'session'}. Duration: {time() - start}s"
-            )
+            logger.info("Read ChatContext %s/%s in %.3fs",
+                        conversation_id, patient_id or "session", time() - start)
 
     async def write(self, chat_ctx: ChatContext) -> None:
-        """Write chat context to appropriate file."""
         start = time()
         try:
             blob_path = self.get_blob_path(chat_ctx.conversation_id, chat_ctx.patient_id)
             blob_client = self.container_client.get_blob_client(blob_path)
-            blob_str = self.serialize(chat_ctx)
-            await blob_client.upload_blob(blob_str, overwrite=True)
+            await blob_client.upload_blob(self.serialize(chat_ctx), overwrite=True)
         finally:
-            logger.info(
-                f"Wrote ChatContext for {chat_ctx.conversation_id}/{chat_ctx.patient_id or 'session'}. Duration: {time() - start}s"
-            )
+            logger.info("Wrote ChatContext %s/%s in %.3fs",
+                        chat_ctx.conversation_id, chat_ctx.patient_id or "session", time() - start)
 
     async def archive(self, chat_ctx: ChatContext) -> None:
-        """Archive chat context with timestamp."""
         start = time()
         try:
             timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
@@ -137,76 +90,73 @@ async def archive(self, chat_ctx: ChatContext) -> None:
             else:
                 archive_blob_path = f"{chat_ctx.conversation_id}/{timestamp}_session_archived.json"
 
-            archive_blob_str = self.serialize(chat_ctx)
-            await self.container_client.upload_blob(archive_blob_path, archive_blob_str, overwrite=True)
-
+            await self.container_client.upload_blob(archive_blob_path, self.serialize(chat_ctx), overwrite=True)
             blob_path = self.get_blob_path(chat_ctx.conversation_id, chat_ctx.patient_id)
-            await self.container_client.delete_blob(blob_path)
-        except ResourceNotFoundError:
-            pass  # File already deleted or never existed
+            try:
+                await self.container_client.delete_blob(blob_path)
+            except ResourceNotFoundError:
+                pass
         finally:
-            logger.info(
-                f"Archived ChatContext for {chat_ctx.conversation_id}/{chat_ctx.patient_id or 'session'}. Duration: {time() - start}s"
-            )
+            logger.info("Archived ChatContext %s/%s in %.3fs",
+                        chat_ctx.conversation_id, chat_ctx.patient_id or "session", time() - start)
 
     async def archive_to_folder(self, conversation_id: str, patient_id: str, archive_folder: str) -> None:
-        """Archive context to specific folder structure."""
         start = time()
         try:
             current_blob_path = self.get_blob_path(conversation_id, patient_id)
+            blob_client = self.container_client.get_blob_client(current_blob_path)
             try:
-                blob_client = self.container_client.get_blob_client(current_blob_path)
                 blob = await blob_client.download_blob()
-                blob_str = await blob.readall()
+                data = await blob.readall()
 
                 timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
                 if patient_id:
-                    archive_blob_path = "%s/%s/%s_patient_%s_archived.json" % (
-                        archive_folder, conversation_id, timestamp, patient_id)
+                    archive_blob_path = f"{archive_folder}/{conversation_id}/{timestamp}_patient_{patient_id}_archived.json"
                 else:
-                    archive_blob_path = "%s/%s/%s_session_archived.json" % (archive_folder, conversation_id, timestamp)
+                    archive_blob_path = f"{archive_folder}/{conversation_id}/{timestamp}_session_archived.json"
 
-                await self.container_client.upload_blob(archive_blob_path, blob_str, overwrite=True)
+                await self.container_client.upload_blob(archive_blob_path, data, overwrite=True)
                 await blob_client.delete_blob()
-
-                logger.info("Archived context to %s", archive_blob_path)
+                logger.info("Archived %s", archive_blob_path)
             except ResourceNotFoundError:
-                logger.warning("No context found to archive for %s/%s", conversation_id, patient_id or 'session')
+                logger.warning("Nothing to archive for %s/%s", conversation_id, patient_id or "session")
         except Exception as e:
-            logger.error("Failed to archive context for %s/%s: %s", conversation_id, patient_id or 'session', e)
+            logger.error("Archive to folder failed %s/%s: %s", conversation_id, patient_id or "session", e)
         finally:
-            logger.info("Archive operation for %s/%s completed. Duration: %ss",
-                        conversation_id, patient_id or 'session', time() - start)
+            logger.info("Archive-to-folder %s/%s finished in %.3fs",
+                        conversation_id, patient_id or "session", time() - start)
 
     @staticmethod
     def serialize(chat_ctx: ChatContext) -> str:
-        """Serialize chat context to JSON."""
-        # Extract chat history with proper schema
         chat_messages = []
+        skipped_pc = 0
         for msg in chat_ctx.chat_history.messages:
-            if hasattr(msg, 'items') and msg.items:
-                content = msg.items[0].text if hasattr(msg.items[0], 'text') else str(msg.items[0])
+            if hasattr(msg, "items") and msg.items:
+                content = msg.items[0].text if hasattr(msg.items[0], "text") else str(msg.items[0])
             else:
-                content = str(msg.content) if hasattr(msg, 'content') else ""
+                content = str(getattr(msg, "content", "") or "")
+
+            # Skip ephemeral patient context snapshot
+            if msg.role == AuthorRole.SYSTEM and content.startswith(PATIENT_CONTEXT_PREFIX):
+                skipped_pc += 1
+                continue
 
             chat_messages.append({
                 "role": msg.role.value,
                 "content": content,
-                "name": getattr(msg, 'name', None)
+                "name": getattr(msg, "name", None)
             })
 
-        # REMOVED: patient_contexts serialization - use registry instead!
+        if skipped_pc:
+            logger.debug("Filtered %d PATIENT_CONTEXT_JSON system message(s) from serialization", skipped_pc)
 
         data = {
-            "schema_version": CURRENT_SCHEMA_VERSION,
             "conversation_id": chat_ctx.conversation_id,
             "patient_id": chat_ctx.patient_id,
-            # REMOVED: "patient_contexts": patient_contexts,
-            "workflow_summary": getattr(chat_ctx, 'workflow_summary', None),
             "chat_history": chat_messages,
             "patient_data": chat_ctx.patient_data,
             "display_blob_urls": chat_ctx.display_blob_urls,
-            "display_image_urls": getattr(chat_ctx, 'display_image_urls', []),
+            "display_image_urls": getattr(chat_ctx, "display_image_urls", []),
             "display_clinical_trials": chat_ctx.display_clinical_trials,
             "output_data": chat_ctx.output_data,
             "healthcare_agents": chat_ctx.healthcare_agents,
@@ -215,58 +165,34 @@ def serialize(chat_ctx: ChatContext) -> str:
 
     @staticmethod
     def deserialize(data_str: str) -> ChatContext:
-        """Deserialize chat context from JSON with migration support."""
         data = json.loads(data_str)
-        schema_version = data.get("schema_version", 1)
-
         context = ChatContext(data["conversation_id"])
         context.patient_id = data.get("patient_id")
 
-        # REMOVED: patient_contexts restoration - load from registry instead!
-        # Legacy support for old files that still have patient_contexts
-        if "patient_contexts" in data:
-            logger.info("Found legacy patient_contexts in context file - consider migrating to registry-only")
-
-        context.workflow_summary = data.get("workflow_summary")
-
-        # Process chat history (unchanged)
         for msg_data in data.get("chat_history", []):
-            if "role" not in msg_data:
-                logger.warning("Skipping message with no role: %s", msg_data.keys())
+            role_val = msg_data.get("role")
+            if not role_val:
                 continue
-
-            role = AuthorRole(msg_data["role"])
-            name = msg_data.get("name")
-
-            if "content" in msg_data:
-                content_str = msg_data["content"]
-            elif "items" in msg_data and msg_data["items"]:
-                content_str = msg_data["items"][0].get("text", "")
-            else:
-                logger.warning("Skipping message with no content: %s", msg_data)
+            role = AuthorRole(role_val)
+            content_str = msg_data.get("content", "")
+            # Defensive skip in case an old file contained ephemeral snapshot
+            if role == AuthorRole.SYSTEM and content_str.startswith(PATIENT_CONTEXT_PREFIX):
                 continue
-
             if role == AuthorRole.TOOL and not content_str:
-                logger.warning("Skipping empty tool message")
                 continue
-
             msg = ChatMessageContent(
                 role=role,
-                items=[TextContent(text=str(content_str))],
+                items=[TextContent(text=str(content_str))]
             )
+            name = msg_data.get("name")
             if name:
                 msg.name = name
             context.chat_history.messages.append(msg)
 
-        # Restore other fields (unchanged)
         context.patient_data = data.get("patient_data", [])
         context.display_blob_urls = data.get("display_blob_urls", [])
         context.display_image_urls = data.get("display_image_urls", [])
         context.display_clinical_trials = data.get("display_clinical_trials", [])
         context.output_data = data.get("output_data", [])
         context.healthcare_agents = data.get("healthcare_agents", {})
-
-        if schema_version < CURRENT_SCHEMA_VERSION:
-            logger.info("Migrated context from schema v%s to v%s", schema_version, CURRENT_SCHEMA_VERSION)
-
         return context
diff --git a/src/group_chat.py b/src/group_chat.py
index 449e92d..5d8d1fd 100644
--- a/src/group_chat.py
+++ b/src/group_chat.py
@@ -10,27 +10,34 @@
 from semantic_kernel import Kernel
 from semantic_kernel.agents import AgentGroupChat, ChatCompletionAgent
 from semantic_kernel.agents.channels.chat_history_channel import ChatHistoryChannel
-from semantic_kernel.agents.strategies.selection.kernel_function_selection_strategy import \
-    KernelFunctionSelectionStrategy
-from semantic_kernel.agents.strategies.termination.kernel_function_termination_strategy import \
-    KernelFunctionTerminationStrategy
+from semantic_kernel.agents.strategies.selection.kernel_function_selection_strategy import (
+    KernelFunctionSelectionStrategy,
+)
+from semantic_kernel.agents.strategies.termination.kernel_function_termination_strategy import (
+    KernelFunctionTerminationStrategy,
+)
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
-from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import \
-    AzureChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
+from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
+    AzureChatPromptExecutionSettings,
+)
+from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import (
+    AzureChatCompletion,
+)
 from semantic_kernel.connectors.openapi_plugin import OpenAPIFunctionExecutionParameters
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
-from semantic_kernel.functions.kernel_function_from_prompt import KernelFunctionFromPrompt
+from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import (
+    ChatHistoryTruncationReducer,
+)
+from semantic_kernel.functions.kernel_function_from_prompt import (
+    KernelFunctionFromPrompt,
+)
 from semantic_kernel.kernel import Kernel, KernelArguments
-from semantic_kernel.contents import AuthorRole, ChatMessageContent
-from semantic_kernel.contents import TextContent
+from semantic_kernel.contents import ChatMessageContent
 
 from data_models.app_context import AppContext
 from data_models.chat_context import ChatContext
 from data_models.plugin_configuration import PluginConfiguration
-from data_models.patient_context_models import WorkflowSummary
 from healthcare_agents import HealthcareAgent
 from healthcare_agents import config as healthcare_agent_config
 from utils.model_utils import model_supports_temperature
@@ -43,7 +50,10 @@
 
 class CustomHistoryChannel(ChatHistoryChannel):
     @override
-    async def receive(self, history: list[ChatMessageContent],) -> None:
+    async def receive(
+        self,
+        history: list[ChatMessageContent],
+    ) -> None:
         await super().receive(history)
         for message in history[:-1]:
             await self.thread.on_new_message(message)
@@ -56,7 +66,9 @@ class CustomChatCompletionAgent(ChatCompletionAgent):
     async def create_channel(
         self, chat_history: ChatHistory | None = None, thread_id: str | None = None
     ) -> CustomHistoryChannel:
-        from semantic_kernel.agents.chat_completion.chat_completion_agent import ChatHistoryAgentThread
+        from semantic_kernel.agents.chat_completion.chat_completion_agent import (
+            ChatHistoryAgentThread,
+        )
 
         CustomHistoryChannel.model_rebuild()
         thread = ChatHistoryAgentThread(chat_history=chat_history, thread_id=thread_id)
@@ -75,177 +87,19 @@ class ChatRule(BaseModel):
 
 
 def create_auth_callback(chat_ctx: ChatContext) -> Callable[..., Awaitable[Any]]:
-    """
-    Creates an authentication callback for the plugin configuration.
-
-    Args:
-        chat_ctx: The chat context to be used in the authentication.
-
-    Returns:
-        A callable that returns an authentication token.
-    """
+    """Creates an authentication callback for OpenAPI tool execution."""
     async def auth_callback():
-        return {'conversation-id': chat_ctx.conversation_id}
+        return {"conversation-id": chat_ctx.conversation_id}
     return auth_callback
 
 
-def inject_workflow_summary(chat_ctx: ChatContext) -> None:
-    """Inject workflow summary if available."""
-    if (hasattr(chat_ctx, 'workflow_summary') and
-        chat_ctx.workflow_summary and
-            chat_ctx.patient_id):
-
-        # Check if already injected
-        for msg in chat_ctx.chat_history.messages:
-            if (msg.role == AuthorRole.SYSTEM and
-                isinstance(msg.content, str) and
-                    "WORKFLOW_SUMMARY:" in msg.content):
-                return
-
-        # Inject summary with proper items initialization
-        summary_message = ChatMessageContent(
-            role=AuthorRole.SYSTEM,
-            items=[TextContent(text=f"WORKFLOW_SUMMARY: {chat_ctx.workflow_summary}")]
-        )
-        chat_ctx.chat_history.messages.insert(1, summary_message)
-        logger.info("Injected workflow summary for patient %s", chat_ctx.patient_id)
-
-
-async def generate_workflow_summary(
-    chat_ctx: ChatContext,
-    kernel: Kernel,
-    patient_id: str,
-    objective: str
-) -> WorkflowSummary:
-    """
-    Generate structured workflow summary using WorkflowSummary model.
-    This implements structured output for workflow planning.
-
-    Args:
-        chat_ctx: The chat context for conversation history
-        kernel: Semantic kernel instance for LLM interaction
-        patient_id: The patient identifier
-        objective: The main workflow objective
-
-    Returns:
-        WorkflowSummary: Structured workflow with agent assignments and tasks
-    """
-
-    # Build context from chat history
-    recent_messages = chat_ctx.chat_history.messages[-10:] if len(
-        chat_ctx.chat_history.messages) > 10 else chat_ctx.chat_history.messages
-    context = "\n".join([f"{msg.role}: {msg.content}" for msg in recent_messages])
-
-    workflow_prompt = f"""
-    You are a healthcare workflow coordinator. Analyze the conversation and create a structured workflow summary.
-    
-    CONTEXT:
-    - Patient ID: {patient_id}
-    - Objective: {objective}
-    - Recent conversation: {context}
-    
-    Create a workflow with specific steps for each agent to follow. Each step should:
-    1. Assign a specific agent (PatientHistory, ClinicalGuidelines, MedicalResearch, etc.)
-    2. Define a clear task for that agent
-    3. Set appropriate status (pending, in_progress, completed)
-    
-    Focus on the main healthcare objective and break it into logical agent-specific steps.
-    Keep reasoning concise and actionable.
-    """
-
-    try:
-        chat_history = ChatHistory()
-        chat_history.add_system_message(workflow_prompt)
-
-        # Use structured output for workflow planning
-        execution_settings = AzureChatPromptExecutionSettings(
-            service_id="default",
-            max_tokens=500,
-            temperature=0.2,
-            response_format=WorkflowSummary,  # This generates the JSON schema automatically
-        )
-
-        svc = kernel.get_service("default")
-        results = await svc.get_chat_message_contents(
-            chat_history=chat_history,
-            settings=execution_settings,
-        )
-
-        if not results or not results[0].content:
-            logger.warning("No workflow summary generated")
-            # Fallback workflow
-            from data_models.patient_context_models import WorkflowStep
-            return WorkflowSummary(
-                patient_id=patient_id,
-                objective=objective,
-                steps=[
-                    WorkflowStep(agent="Orchestrator", task="Coordinate healthcare workflow", status="pending")
-                ],
-                current_step=0,
-                reasoning="Fallback workflow due to generation failure"
-            )
-
-        content = results[0].content
-
-        # Parse structured response
-        if isinstance(content, str):
-            try:
-                workflow = WorkflowSummary.model_validate_json(content)
-            except Exception as e:
-                logger.error("Failed to parse workflow summary: %s", e)
-                # Return fallback
-                from data_models.patient_context_models import WorkflowStep
-                return WorkflowSummary(
-                    patient_id=patient_id,
-                    objective=objective,
-                    steps=[WorkflowStep(agent="Orchestrator", task="Coordinate workflow", status="pending")],
-                    current_step=0,
-                    reasoning=f"Parse error: {str(e)[:30]}..."
-                )
-        elif isinstance(content, dict):
-            try:
-                workflow = WorkflowSummary.model_validate(content)
-            except Exception as e:
-                logger.error("Failed to validate workflow summary: %s", e)
-                from data_models.patient_context_models import WorkflowStep
-                return WorkflowSummary(
-                    patient_id=patient_id,
-                    objective=objective,
-                    steps=[WorkflowStep(agent="Orchestrator", task="Coordinate workflow", status="pending")],
-                    current_step=0,
-                    reasoning=f"Validation error: {str(e)[:30]}..."
-                )
-        else:
-            logger.warning("Unexpected workflow response type: %s", type(content))
-            from data_models.patient_context_models import WorkflowStep
-            return WorkflowSummary(
-                patient_id=patient_id,
-                objective=objective,
-                steps=[WorkflowStep(agent="Orchestrator", task="Coordinate workflow", status="pending")],
-                current_step=0,
-                reasoning="Unexpected response format"
-            )
-
-        logger.info("Generated workflow summary with %d steps for patient %s", len(workflow.steps), patient_id)
-        return workflow
-
-    except Exception as e:
-        logger.error("Workflow summary generation failed: %s", e)
-        from data_models.patient_context_models import WorkflowStep
-        return WorkflowSummary(
-            patient_id=patient_id,
-            objective=objective,
-            steps=[WorkflowStep(agent="Orchestrator", task="Coordinate workflow", status="pending")],
-            current_step=0,
-            reasoning=f"Generation error: {str(e)[:30]}..."
-        )
-
-
 def create_group_chat(
-    app_ctx: AppContext, chat_ctx: ChatContext, participants: list[dict] = None
+    app_ctx: AppContext,
+    chat_ctx: ChatContext,
+    participants: list[dict] = None,
 ) -> Tuple[AgentGroupChat, ChatContext]:
     """
-    Create a multi-agent group chat with structured output strategies.
+    Create a multi-agent group chat.
 
     Args:
         app_ctx: Application context containing shared resources
@@ -259,9 +113,6 @@ def create_group_chat(
     participant_names = [cfg.get("name") for cfg in participant_configs]
     logger.info("Creating group chat with participants: %s", participant_names)
 
-    # Inject workflow summary before creating agents
-    inject_workflow_summary(chat_ctx)
-
     # Remove magentic agent from the list of agents
     all_agents_config = [
         agent for agent in participant_configs if agent.get("name") != "magentic"
@@ -275,7 +126,7 @@ def _create_kernel_with_chat_completion() -> Kernel:
                 service_id="default",
                 deployment_name=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
                 api_version="2025-04-01-preview",
-                ad_token_provider=app_ctx.cognitive_services_token_provider
+                ad_token_provider=app_ctx.cognitive_services_token_provider,
             )
         )
         return kernel
@@ -290,19 +141,23 @@ def _create_agent(agent_config: dict):
             chat_ctx=chat_ctx,
             azureml_token_provider=app_ctx.azureml_token_provider,
         )
-        is_healthcare_agent = healthcare_agent_config.yaml_key in agent_config and bool(
-            agent_config[healthcare_agent_config.yaml_key])
+        is_healthcare_agent = (
+            healthcare_agent_config.yaml_key in agent_config
+            and bool(agent_config[healthcare_agent_config.yaml_key])
+        )
 
         for tool in agent_config.get("tools", []):
             tool_name = tool.get("name")
             tool_type = tool.get("type", DEFAULT_TOOL_TYPE)
 
-            # Add function tools
             if tool_type == "function":
                 scenario = os.environ.get("SCENARIO")
-                tool_module = importlib.import_module(f"scenarios.{scenario}.tools.{tool_name}")
-                agent_kernel.add_plugin(tool_module.create_plugin(plugin_config), plugin_name=tool_name)
-            # Add OpenAPI tools
+                tool_module = importlib.import_module(
+                    f"scenarios.{scenario}.tools.{tool_name}"
+                )
+                agent_kernel.add_plugin(
+                    tool_module.create_plugin(plugin_config), plugin_name=tool_name
+                )
             elif tool_type == "openapi":
                 openapi_document_path = tool.get("openapi_document_path")
                 server_url_override = tool.get("server_url_override")
@@ -313,105 +168,116 @@ def _create_agent(agent_config: dict):
                         auth_callback=create_auth_callback(chat_ctx),
                         server_url_override=server_url_override,
                         enable_payload_namespacing=True,
-                        timeout=None
-                    )
+                        timeout=None,
+                    ),
                 )
             else:
                 raise ValueError(f"Unknown tool type: {tool_type}")
 
         if model_supports_temperature():
             temperature = agent_config.get("temperature", DEFAULT_MODEL_TEMP)
-            logger.info("Setting model temperature for agent %s to %s", agent_config['name'], temperature)
+            logger.info(
+                "Setting model temperature for agent %s to %s",
+                agent_config["name"],
+                temperature,
+            )
         else:
             temperature = None
-            logger.info("Model does not support temperature. Setting temperature to None for agent %s",
-                        agent_config['name'])
+            logger.info(
+                "Model does not support temperature. Setting temperature to None for agent %s",
+                agent_config["name"],
+            )
+
+        from semantic_kernel.connectors.ai.function_choice_behavior import (
+            FunctionChoiceBehavior,
+        )
 
         settings = AzureChatPromptExecutionSettings(
-            function_choice_behavior=FunctionChoiceBehavior.Auto(), seed=42, temperature=temperature)
+            function_choice_behavior=FunctionChoiceBehavior.Auto(),
+            seed=42,
+            temperature=temperature,
+        )
         arguments = KernelArguments(settings=settings)
         instructions = agent_config.get("instructions")
         if agent_config.get("facilitator") and instructions:
             instructions = instructions.replace(
-                "{{aiAgents}}", "\n\t\t".join([f"- {agent['name']}: {agent['description']}" for agent in all_agents_config]))
-
-        return (CustomChatCompletionAgent(kernel=agent_kernel,
-                                          name=agent_config["name"],
-                                          instructions=instructions,
-                                          description=agent_config.get("description", ""),
-                                          arguments=arguments) if not is_healthcare_agent else
-                HealthcareAgent(name=agent_config["name"],
-                                chat_ctx=chat_ctx,
-                                app_ctx=app_ctx))
-
-    # Create kernel for orchestrator functions (THIS WAS MISSING!)
+                "{{aiAgents}}",
+                "\n\t\t".join(
+                    [
+                        f"- {agent['name']}: {agent['description']}"
+                        for agent in all_agents_config
+                    ]
+                ),
+            )
+
+        return (
+            CustomChatCompletionAgent(
+                kernel=agent_kernel,
+                name=agent_config["name"],
+                instructions=instructions,
+                description=agent_config.get("description", ""),
+                arguments=arguments,
+            )
+            if not is_healthcare_agent
+            else HealthcareAgent(
+                name=agent_config["name"], chat_ctx=chat_ctx, app_ctx=app_ctx
+            )
+        )
+
+    # Kernel for orchestrator (selection + termination structured decisions)
     orchestrator_kernel = _create_kernel_with_chat_completion()
 
-    # Find facilitator agent
-    facilitator_agent = next((agent for agent in all_agents_config if agent.get("facilitator")), all_agents_config[0])
+    # Facilitator (Orchestrator) discovery
+    facilitator_agent = next(
+        (agent for agent in all_agents_config if agent.get("facilitator")),
+        all_agents_config[0],
+    )
     facilitator = facilitator_agent["name"]
 
-    # Structured output for selection/termination decisions
-    settings = AzureChatPromptExecutionSettings(
+    # Structured output model config for selection
+    selection_settings = AzureChatPromptExecutionSettings(
         function_choice_behavior=FunctionChoiceBehavior.Auto(),
         temperature=DEFAULT_MODEL_TEMP,
         seed=42,
-        response_format=ChatRule
+        response_format=ChatRule,
     )
-    arguments = KernelArguments(settings=settings)
-
-    async def create_workflow_summary_if_needed():
-        """Generate workflow summary for new patient workflows."""
-        if chat_ctx.patient_id and not hasattr(chat_ctx, 'workflow_summary'):
-            # Determine objective from recent conversation
-            objective = "Provide comprehensive healthcare assistance"
-            if len(chat_ctx.chat_history.messages) > 0:
-                last_msg = chat_ctx.chat_history.messages[-1].content
-                if isinstance(last_msg, str) and len(last_msg) > 10:
-                    objective = f"Address user request: {last_msg[:100]}..."
-
-            workflow = await generate_workflow_summary(
-                chat_ctx=chat_ctx,
-                kernel=orchestrator_kernel,
-                patient_id=chat_ctx.patient_id,
-                objective=objective
-            )
-
-            # Store workflow summary in chat context
-            chat_ctx.workflow_summary = workflow.model_dump_json()
-            logger.info("Generated new workflow summary for patient %s", chat_ctx.patient_id)
+    selection_args = KernelArguments(settings=selection_settings)
 
     selection_function = KernelFunctionFromPrompt(
         function_name="selection",
         prompt=f"""
         You are overseeing a group chat between several AI agents and a human user.
-        Determine which participant takes the next turn in a conversation based on the most recent participant. Follow these guidelines:
+        Determine which participant takes the next turn based on the most recent participant. Guidelines:
 
-        1. **Participants**: Choose only from these participants:
+        1. Participants (choose exactly one):
             {"\n".join([("\t- " + agent["name"]) for agent in all_agents_config])}
 
-        2. **General Rules**:
-            - **{facilitator} Always Starts**: {facilitator} always goes first to formulate a plan. If the only message is from the user, {facilitator} goes next.
-            - **Check Workflow Progress**: Look for WORKFLOW_SUMMARY messages to understand what stage of the process we're in
-            - **Avoid Repetition**: If an agent has already completed their task (according to workflow summary), don't select them again unless specifically requested
-            - **Interactions between agents**: Agents may talk among themselves. If an agent requires information from another agent, that agent should go next.
-                EXAMPLE:
-                    "*agent_name*, please provide ..." then agent_name goes next.
-            - **"back to you *agent_name*": If an agent says "back to you", that agent goes next.
-                EXAMPLE:
-                    "back to you *agent_name*" then output agent_name goes next.
-            - **Once per turn**: Each participant can only speak once per turn.
-            - **Default to {facilitator}**: Always default to {facilitator}. If no other participant is specified, {facilitator} goes next.
-            - **Use best judgment**: If the rules are unclear, use your best judgment to determine who should go next, for the natural flow of the conversation.
-            
-        Provide your reasoning and then the verdict. The verdict must be exactly one of: {", ".join([agent["name"] for agent in all_agents_config])}
+        2. Rules:
+            - {facilitator} always starts if only the user has spoken.
+            - Avoid repetition: if an agent already completed its task, don't reselect unless explicitly requested.
+            - Agents may request info from each other: if an agent is directly asked by name, that agent goes next.
+            - "back to you *AgentName*": that named agent goes next.
+            - Each participant speaks at most once per turn.
+            - Default to {facilitator} if uncertain or no explicit candidate.
+            - Use best judgment for natural conversation flow.
+            - CONFIRMATION GATE (PLAN ONLY): If (a) the MOST RECENT message is from {facilitator} AND (b) it contains a multi-step plan (look for "Plan", "plan:", numbered steps like "1.", "2.", or multiple leading "-" bullet lines) AND (c) no user message has appeared AFTER that plan yet, then do NOT advance to another agent. Wait for a user reply. Output {facilitator} ONLY if absolutely necessary to politely prompt the user for confirmation (do not restate the entire plan). As soon as ANY user reply appears (question, modification, or confirmation), this gate is lifted. If the user used a confirmation token (confirm, yes, proceed, continue, ok, okay, sure, sounds good, go ahead), you may advance to the next required non-facilitator agent; otherwise select the participant that best addresses the user’s reply.
+
+        Provide reasoning then the verdict. Verdict must be exactly one of: {", ".join([agent["name"] for agent in all_agents_config])}
 
         History:
         {{{{$history}}}}
         """,
-        prompt_execution_settings=settings
+        prompt_execution_settings=selection_settings,
     )
 
+    termination_settings = AzureChatPromptExecutionSettings(
+        function_choice_behavior=FunctionChoiceBehavior.Auto(),
+        temperature=DEFAULT_MODEL_TEMP,
+        seed=42,
+        response_format=ChatRule,
+    )
+    termination_args = KernelArguments(settings=termination_settings)
+
     termination_function = KernelFunctionFromPrompt(
         function_name="termination",
         prompt=f"""
@@ -424,29 +290,23 @@ async def create_workflow_summary_if_needed():
 
         Return "yes" when the last message:
         - asks the user a question (ends with "?" or uses "you"/"User"), OR
-        - invites the user to respond (e.g., "let us know", "how can we assist/help", "feel free to ask",
-            "what would you like", "should we", "can we", "would you like me to", "do you want me to"), OR
+        - invites the user to respond (phrases like: "let us know", "feel free to ask", "what would you like", "should we", "can we", "would you like me to"), OR
         - addresses "we/us" as a decision/query to the user.
 
         Return "no" when the last message:
         - is a command or question to a specific agent by name, OR
-        - is a statement addressed to another agent.
+        - is a statement clearly addressed to another agent.
 
         Commands addressed to "you" or "User" => "yes".
-        If you are uncertain, return "yes".
-        Ignore any debug/metadata like "PC_CTX" or JSON blobs when deciding.
-        
-        Provide your reasoning and then the verdict. The verdict must be exactly "yes" or "no".
+        If uncertain, return "yes".
+        Ignore any debug/metadata like "PC_CTX" or JSON blobs.
 
-        EXAMPLES:
-        - "User, can you confirm the correct patient ID?" => verdict: "yes" (Asks user a direct question)
-        - "*ReportCreation*: Please compile the patient timeline." => verdict: "no" (Command to specific agent ReportCreation)
-        - "If you have any further questions, feel free to ask." => verdict: "yes" (Invites user to respond)
+        Provide reasoning then the verdict ("yes" or "no").
 
         History:
         {{{{$history}}}}
         """,
-        prompt_execution_settings=settings
+        prompt_execution_settings=termination_settings,
     )
 
     agents = [_create_agent(agent) for agent in all_agents_config]
@@ -456,23 +316,33 @@ def evaluate_termination(result):
         try:
             rule = ChatRule.model_validate_json(str(result.value[0]))
             should_terminate = rule.verdict == "yes"
-            logger.debug("Termination decision: %s | Reasoning: %s", should_terminate, rule.reasoning)
+            logger.debug(
+                "Termination decision: %s | Reasoning: %s",
+                should_terminate,
+                rule.reasoning,
+            )
             return should_terminate
         except Exception as e:
             logger.error("Termination function error: %s", e)
-            return False  # Fallback to continue conversation
+            return False
 
     def evaluate_selection(result):
         """Evaluate agent selection from structured output."""
         try:
             rule = ChatRule.model_validate_json(str(result.value[0]))
-            selected_agent = rule.verdict if rule.verdict in [agent["name"]
-                                                              for agent in all_agents_config] else facilitator
-            logger.debug("Selected agent: %s | Reasoning: %s", selected_agent, rule.reasoning)
+            selected_agent = (
+                rule.verdict
+                if rule.verdict
+                in [agent["name"] for agent in all_agents_config]
+                else facilitator
+            )
+            logger.debug(
+                "Selected agent: %s | Reasoning: %s", selected_agent, rule.reasoning
+            )
             return selected_agent
         except Exception as e:
             logger.error("Selection function error: %s", e)
-            return facilitator  # Fallback to facilitator
+            return facilitator
 
     chat = AgentGroupChat(
         agents=agents,
@@ -483,23 +353,18 @@ def evaluate_selection(result):
             result_parser=evaluate_selection,
             agent_variable_name="agents",
             history_variable_name="history",
-            arguments=arguments,
+            arguments=selection_args,
         ),
         termination_strategy=KernelFunctionTerminationStrategy(
-            agents=[
-                agent for agent in agents if agent.name == facilitator
-            ],  # Only facilitator decides if the conversation ends
+            agents=[agent for agent in agents if agent.name == facilitator],
             function=termination_function,
             kernel=orchestrator_kernel,
             result_parser=evaluate_termination,
             agent_variable_name="agents",
             history_variable_name="history",
             maximum_iterations=30,
-            # Termination only looks at the last message
-            history_reducer=ChatHistoryTruncationReducer(
-                target_count=1, auto_reduce=True
-            ),
-            arguments=arguments,
+            history_reducer=ChatHistoryTruncationReducer(target_count=1, auto_reduce=True),
+            arguments=termination_args,
         ),
     )
 
diff --git a/src/routes/api/chats.py b/src/routes/api/chats.py
index 64a3a83..dc0a5f0 100644
--- a/src/routes/api/chats.py
+++ b/src/routes/api/chats.py
@@ -1,5 +1,5 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
 
 import logging
 import uuid
@@ -16,15 +16,12 @@
 from services.patient_context_analyzer import PatientContextAnalyzer
 
 from data_models.app_context import AppContext
-
 import group_chat
 
 logger = logging.getLogger(__name__)
 
 
 class DateTimeEncoder(json.JSONEncoder):
-    """Custom JSON encoder that handles datetime objects."""
-
     def default(self, obj: Any) -> Any:
         if isinstance(obj, datetime):
             return obj.isoformat()
@@ -47,10 +44,9 @@ class Message(BaseModel):
     mentions: Optional[List[str]] = None
 
     def dict(self, *args, **kwargs):
-        """Override dict method to handle datetime serialization."""
         d = super().dict(*args, **kwargs)
-        if isinstance(d.get('timestamp'), datetime):
-            d['timestamp'] = d['timestamp'].isoformat()
+        if isinstance(d.get("timestamp"), datetime):
+            d["timestamp"] = d["timestamp"].isoformat()
         return d
 
 
@@ -70,7 +66,6 @@ class AgentsResponse(BaseModel):
 
 
 def create_json_response(content, headers=None):
-    """Create a JSONResponse with proper datetime handling."""
     return JSONResponse(
         content=content,
         headers=headers or {},
@@ -80,12 +75,9 @@ def create_json_response(content, headers=None):
 
 def chats_routes(app_context: AppContext):
     router = APIRouter()
-
-    # Extract needed values from app_context
     agent_config = app_context.all_agent_configs
     data_access = app_context.data_access
 
-    # Initialize patient context service with both accessors
     analyzer = PatientContextAnalyzer(token_provider=app_context.cognitive_services_token_provider)
     patient_context_service = PatientContextService(
         analyzer=analyzer,
@@ -93,153 +85,105 @@ def chats_routes(app_context: AppContext):
         context_accessor=app_context.data_access.chat_context_accessor
     )
 
-    # Find the facilitator agent
-    facilitator_agent = next((agent for agent in agent_config if agent.get("facilitator")), agent_config[0])
+    facilitator_agent = next((a for a in agent_config if a.get("facilitator")), agent_config[0])
     facilitator = facilitator_agent["name"]
 
+    # ===== Legacy helper retained (now always sees freshly injected snapshot) =====
     def _get_system_patient_context_json(chat_context) -> str | None:
-        """Extract the JSON payload from the current PATIENT_CONTEXT_JSON system message."""
-        for msg in chat_context.chat_history.messages:
+        """Return JSON payload from most recent (first-in-list after injection) PATIENT_CONTEXT system message."""
+        for msg in reversed(chat_context.chat_history.messages):
             if msg.role == AuthorRole.SYSTEM:
-                # Handle both string content and itemized content
-                content = msg.content
-                if isinstance(content, str):
-                    text = content
+                # Extract text
+                if hasattr(msg, "items") and msg.items:
+                    text = getattr(msg.items[0], "text", "") or ""
                 else:
-                    # Try to extract from items if content is structured
-                    items = getattr(msg, "items", None) or getattr(content, "items", None)
-                    if items:
-                        parts = []
-                        for item in items:
-                            item_text = getattr(item, "text", None) or getattr(item, "content", None)
-                            if item_text:
-                                parts.append(str(item_text))
-                        text = "".join(parts) if parts else str(content) if content else ""
-                    else:
-                        text = str(content) if content else ""
-
-                if text and text.startswith(PATIENT_CONTEXT_PREFIX):
-                    # Extract JSON after "PATIENT_CONTEXT_JSON:"
-                    json_part = text[len(PATIENT_CONTEXT_PREFIX):].strip()
+                    text = getattr(msg, "content", "") or ""
+                if text.startswith(PATIENT_CONTEXT_PREFIX):
+                    json_part = text[len(PATIENT_CONTEXT_PREFIX):].lstrip()
                     if json_part.startswith(":"):
-                        json_part = json_part[1:].strip()
-                    return json_part if json_part else None
+                        json_part = json_part[1:].lstrip()
+                    return json_part or None
         return None
 
     def _append_pc_ctx_display(base: str, chat_context) -> str:
-        """Append patient context information to the message for display."""
-        # Avoid double-tagging
-        if "\nPC_CTX" in base or "\n*PT_CTX:*" in base:
-            return base
-
-        # Get the actual injected system patient context JSON
+        """Append user-friendly PT_CTX block for UI (optional cosmetic)."""
         json_payload = _get_system_patient_context_json(chat_context)
-
         if not json_payload:
             return base
-
-        # Format the JSON payload into a simple, readable Markdown string
         try:
             obj = json.loads(json_payload)
+        except Exception:
+            return base
 
-            lines = ["\n\n---", "\n*PT_CTX:*"]
-            if obj.get("patient_id"):
-                lines.append("- **Patient ID:** `%s`" % obj['patient_id'])
-            if obj.get("conversation_id"):
-                lines.append("- **Conversation ID:** `%s`" % obj['conversation_id'])
-
-            if obj.get("all_patient_ids"):
-                active_id = obj.get("patient_id")
-                ids_str = ", ".join("`%s`%s" % (p, ' (active)' if p == active_id else '')
-                                    for p in obj["all_patient_ids"])
-                lines.append("- **Session Patients:** %s" % ids_str)
-
-            if not obj.get("patient_id"):
-                lines.append("- *No active patient.*")
-
-            # Only add the block if there's something to show besides the header
-            if len(lines) > 2:
-                formatted_text = "\n".join(lines)
-                return "%s%s" % (base, formatted_text)
-            else:
-                return base
-
-        except json.JSONDecodeError as e:
-            logger.warning("Failed to parse patient context JSON: %s", e)
-            # Fallback to raw if JSON is malformed, but keep it simple
-            return "%s\n\n---\n*PT_CTX (raw):* `%s`" % (base, json_payload)
+        pid = obj.get("patient_id")
+        all_pids = obj.get("all_patient_ids") or []
+        convo_id = obj.get("conversation_id")
+
+        # Build lines with explicit leading newlines for clarity
+        lines: list[str] = []
+        lines.append("\n\n---")
+        lines.append("\n*PT_CTX:*")
+        if pid:
+            lines.append(f"\n- **Patient ID:** `{pid}`")
+        else:
+            lines.append("\n- *No active patient.*")
+        if all_pids:
+            ids_str = ", ".join(
+                f"`{p}`{' (active)' if p == pid else ''}" for p in sorted(all_pids)
+            )
+            lines.append(f"\n- **Session Patients:** {ids_str}")
+        if convo_id:
+            lines.append(f"\n- **Conversation ID:** `{convo_id}`")
+
+        # If we only ended up with the header and separator, skip (unlikely)
+        if len(lines) <= 2:
+            return base
+
+        return base + "".join(lines)
 
     async def _handle_clear_command(content: str, chat_context) -> bool:
-        """Handle patient context clear commands."""
         content_lower = content.lower().strip()
         if content_lower in ["clear", "clear patient", "clear context", "clear patient context"]:
             logger.info("Processing clear command for conversation: %s", chat_context.conversation_id)
-
-            # Archive everything before clearing
             timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S-%f")
-            archive_folder = "archive/%s" % timestamp
-
+            archive_folder = f"archive/{timestamp}"
             try:
-                logger.info("Starting archive to folder: %s", archive_folder)
-
-                # Archive session context
+                # Archive session
                 await data_access.chat_context_accessor.archive_to_folder(chat_context.conversation_id, None, archive_folder)
-                logger.info("Archived session context to %s", archive_folder)
-
-                # Archive ALL patient contexts from registry
+                # Archive each patient file from registry
                 try:
                     patient_registry, _ = await patient_context_service.registry_accessor.read_registry(chat_context.conversation_id)
                     if patient_registry:
-                        for patient_id in patient_registry.keys():
-                            await data_access.chat_context_accessor.archive_to_folder(chat_context.conversation_id, patient_id, archive_folder)
-                            logger.info("Archived patient context for %s to %s", patient_id, archive_folder)
-                except Exception as registry_error:
-                    logger.warning("Could not read registry for archiving patient contexts: %s", registry_error)
-                    # Fallback: use patient_contexts from chat_context if available
-                    if hasattr(chat_context, 'patient_contexts') and chat_context.patient_contexts:
-                        for patient_id in chat_context.patient_contexts.keys():
-                            await data_access.chat_context_accessor.archive_to_folder(chat_context.conversation_id, patient_id, archive_folder)
-                            logger.info("Archived patient context for %s to %s (fallback)", patient_id, archive_folder)
-
-                # Archive patient registry
+                        for pid in patient_registry.keys():
+                            await data_access.chat_context_accessor.archive_to_folder(chat_context.conversation_id, pid, archive_folder)
+                except Exception:
+                    if getattr(chat_context, "patient_contexts", None):
+                        for pid in chat_context.patient_contexts.keys():
+                            await data_access.chat_context_accessor.archive_to_folder(chat_context.conversation_id, pid, archive_folder)
+                # Archive registry
                 await patient_context_service.registry_accessor.archive_registry(chat_context.conversation_id)
-                logger.info("Archived patient registry for %s", chat_context.conversation_id)
-
-                # Clear chat context
-                chat_context.patient_context = None
-                if hasattr(chat_context, 'patient_contexts'):
-                    chat_context.patient_contexts.clear()
-                chat_context.chat_history.messages.clear()
-                chat_context.patient_id = None
-
-                logger.info("Successfully archived and cleared all contexts to %s", archive_folder)
-                return True
-
             except Exception as e:
-                logger.error("Failed to archive contexts during clear: %s", e)
-                # Still clear the context even if archiving fails
+                logger.warning("Clear archival issues: %s", e)
+            finally:
                 chat_context.patient_context = None
-                if hasattr(chat_context, 'patient_contexts'):
+                if hasattr(chat_context, "patient_contexts"):
                     chat_context.patient_contexts.clear()
                 chat_context.chat_history.messages.clear()
                 chat_context.patient_id = None
-                return True
-
+                await data_access.chat_context_accessor.write(chat_context)
+            return True
         return False
 
     @router.get("/api/agents", response_model=AgentsResponse)
     async def get_available_agents():
-        """Returns a list of all available agents that can be mentioned in messages."""
         try:
-            agent_names = [agent["name"] for agent in agent_config]
-            return AgentsResponse(agents=agent_names)
+            return AgentsResponse(agents=[a["name"] for a in agent_config])
         except Exception as e:
             logger.error("Error getting agents: %s", e)
             return AgentsResponse(agents=[], error=str(e))
 
     @router.websocket("/api/ws/chats/{chat_id}/messages")
     async def websocket_chat_endpoint(websocket: WebSocket, chat_id: str):
-        """WebSocket endpoint with patient isolation support."""
         await websocket.accept()
         logger.info("WebSocket connection established for chat: %s", chat_id)
 
@@ -247,7 +191,6 @@ async def websocket_chat_endpoint(websocket: WebSocket, chat_id: str):
             while True:
                 data = await websocket.receive_json()
                 content = data.get("content", "").strip()
-
                 if not content:
                     await websocket.send_json({"error": "Empty message content"})
                     continue
@@ -255,115 +198,111 @@ async def websocket_chat_endpoint(websocket: WebSocket, chat_id: str):
                 try:
                     # STEP 1: Load session context
                     chat_context = await data_access.chat_context_accessor.read(chat_id, None)
-                    logger.info("Loaded session context for: %s", chat_id)
 
-                    # STEP 2: Handle clear commands BEFORE patient context processing
+                    # STEP 2: Clear?
                     if await _handle_clear_command(content, chat_context):
-                        clear_message = Message(
+                        msg = Message(
                             id=str(uuid.uuid4()),
                             content="The conversation has been cleared. How can I assist you today?",
                             sender="Orchestrator",
                             timestamp=datetime.now(timezone.utc),
-                            isBot=True,
-                            mentions=[]
+                            isBot=True
                         )
-                        await websocket.send_json(clear_message.dict())
+                        await websocket.send_json(msg.dict())
                         await websocket.send_json({"type": "done"})
-
-                        # Save to appropriate context file
-                        await data_access.chat_context_accessor.write(chat_context)
                         continue
 
-                    # STEP 3: Patient context decision and application
+                    # STEP 3: Patient decision
                     try:
                         decision, timing = await patient_context_service.decide_and_apply(content, chat_context)
-                        logger.info("Patient context decision: %s | Patient: %s", decision, chat_context.patient_id)
+                        logger.info("Patient context decision=%s active=%s", decision, chat_context.patient_id)
                     except Exception as e:
-                        logger.warning("Error applying patient context: %s", e)
+                        logger.warning("Patient context decision failed: %s", e)
                         decision = "NONE"
 
-                    # STEP 4: Handle special decision outcomes
+                    # STEP 4: Special outcomes
                     if decision == "NEEDS_PATIENT_ID":
-                        error_message = Message(
+                        err = Message(
                             id=str(uuid.uuid4()),
-                            content="I need a patient ID to proceed. Please provide the patient ID in the format 'patient_X' (e.g., 'start tumor board review for patient_4').",
+                            content="I need a patient ID to proceed. Provide one like 'patient_4'.",
                             sender="Orchestrator",
                             timestamp=datetime.now(timezone.utc),
-                            isBot=True,
-                            mentions=[]
+                            isBot=True
                         )
-                        await websocket.send_json(error_message.dict())
+                        await websocket.send_json(err.dict())
                         await websocket.send_json({"type": "done"})
                         continue
 
-                    # STEP 5: If active patient exists, load ONLY that patient's isolated context file
+                    # STEP 5: Load isolated patient history if active
                     if chat_context.patient_id:
                         try:
-                            isolated_ctx = await data_access.chat_context_accessor.read(chat_id, chat_context.patient_id)
-                            if isolated_ctx and isolated_ctx.chat_history.messages:
-                                # Replace with isolated chat history
-                                chat_context.chat_history = isolated_ctx.chat_history
-                                logger.info("Loaded isolated history for %s (%s messages)",
-                                            chat_context.patient_id, len(isolated_ctx.chat_history.messages))
-                            else:
-                                logger.info("No existing history for %s, starting fresh", chat_context.patient_id)
+                            isolated = await data_access.chat_context_accessor.read(chat_id, chat_context.patient_id)
+                            if isolated and isolated.chat_history.messages:
+                                chat_context.chat_history = isolated.chat_history
                         except Exception as e:
-                            logger.debug("Could not load isolated context for %s: %s", chat_context.patient_id, e)
-
-                    # STEP 6: Create group chat and add user message
+                            logger.debug("Isolated load failed for %s: %s", chat_context.patient_id, e)
+
+                    # STEP 5.5: Inject fresh ephemeral PATIENT_CONTEXT_JSON system message (rebuild from current in-memory state)
+                    # Remove existing snapshot(s)
+                    new_messages = []
+                    for m in chat_context.chat_history.messages:
+                        if not (m.role == AuthorRole.SYSTEM and hasattr(m, "items") and m.items
+                                and getattr(m.items[0], "text", "").startswith(PATIENT_CONTEXT_PREFIX)):
+                            new_messages.append(m)
+                    chat_context.chat_history.messages = new_messages
+                    snapshot = {
+                        "conversation_id": chat_context.conversation_id,
+                        "patient_id": chat_context.patient_id,
+                        "all_patient_ids": sorted(getattr(chat_context, "patient_contexts", {}).keys()),
+                        "generated_at": datetime.utcnow().isoformat() + "Z"
+                    }
+                    system_line = f"{PATIENT_CONTEXT_PREFIX}: {json.dumps(snapshot, separators=(',', ':'))}"
+                    system_msg = ChatMessageContent(role=AuthorRole.SYSTEM, items=[TextContent(text=system_line)])
+                    chat_context.chat_history.messages.insert(0, system_msg)
+
+                    # STEP 6: Group chat & add user message
                     chat, chat_context = group_chat.create_group_chat(app_context, chat_context)
-
-                    # Add user message to chat history
-                    user_message = ChatMessageContent(
-                        role=AuthorRole.USER,
-                        items=[TextContent(text=content)]
-                    )
+                    user_message = ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text=content)])
                     chat_context.chat_history.add_message(user_message)
 
-                    # STEP 7: Get target agent from message
+                    # STEP 7: Agent selection
                     target_agent_name = facilitator
                     if ":" in content:
-                        mentioned = content.split(":", 1)[0].strip()
-                        if any(agent.name.lower() == mentioned.lower() for agent in chat.agents):
-                            target_agent_name = mentioned
-
+                        candidate = content.split(":", 1)[0].strip()
+                        if any(a.name.lower() == candidate.lower() for a in chat.agents):
+                            target_agent_name = candidate
                     target_agent = next(
-                        (agent for agent in chat.agents if agent.name.lower() == target_agent_name.lower()),
+                        (a for a in chat.agents if a.name.lower() == target_agent_name.lower()),
                         chat.agents[0]
                     )
-
-                    logger.info("Using agent: %s", target_agent.name)
-
                     if target_agent.name == facilitator:
                         target_agent = None
 
-                    # STEP 8: Get responses
+                    # STEP 8: Invoke agents
                     async for response in chat.invoke(agent=target_agent):
                         if not response or not response.content:
                             continue
 
-                        response_content_with_pc = _append_pc_ctx_display(response.content, chat_context)
+                        # Optional UI block (system snapshot already grounds LLM)
+                        response_with_ctx = _append_pc_ctx_display(response.content, chat_context)
 
                         bot_message = Message(
                             id=str(uuid.uuid4()),
-                            content=response_content_with_pc,
+                            content=response_with_ctx,
                             sender=response.name,
                             timestamp=datetime.now(timezone.utc),
-                            isBot=True,
-                            mentions=[]
+                            isBot=True
                         )
                         await websocket.send_json(bot_message.dict())
 
-                    # STEP 9: Save to appropriate context file (patient-specific OR session-only)
+                    # STEP 9: Persist (system snapshot filtered in accessor)
                     await data_access.chat_context_accessor.write(chat_context)
-                    logger.info("Saved context for conversation: %s | Patient: %s", chat_id, chat_context.patient_id)
 
                 except Exception as e:
                     logger.error("Error in WebSocket chat: %s", e)
                     await websocket.send_json({"error": str(e)})
 
                 await websocket.send_json({"type": "done"})
-
         except WebSocketDisconnect:
             logger.info("WebSocket disconnected for chat: %s", chat_id)
         except Exception as e:
diff --git a/src/scenarios/default/config/agents.yaml b/src/scenarios/default/config/agents.yaml
index f790a67..651cdb5 100644
--- a/src/scenarios/default/config/agents.yaml
+++ b/src/scenarios/default/config/agents.yaml
@@ -1,79 +1,80 @@
 - name: Orchestrator
   instructions: |
-    You are an AI agent facilitating a discussion between group of AI agent experts and the user. You are not to make clinical recommendations or treatment plans. Follow these guidelines:
+    You are an AI agent facilitating a discussion between a group of AI agent experts and the user. You are not to make clinical recommendations or treatment plans. Follow these guidelines:
 
     **Patient Context Awareness**:
-    When you receive a message with `PATIENT_CONTEXT_JSON`, extract the patient_id and use it to maintain patient-specific conversations.
-    IMPORTANT: Always check the actual chat history in the current conversation to determine what has been discussed with this patient. 
-    Do not assume previous actions based on patient metadata alone - review the current chat history to understand the context.
+    When you receive a message with `PATIENT_CONTEXT_JSON`, extract:
+      - `patient_id` (current active patient)
+      - `all_patient_ids` (all patients discussed/activated this session)
+    Use these for patient-specific grounding. 
+    IMPORTANT: Always review the actual chat history (messages for the current conversation) to confirm what has already been done for the active patient.
+    Do NOT assume tasks were done just because a patient exists in metadata; confirm by reading the history.
+    Never invent patient IDs. If no active patient and the user asks for patient-specific actions, ask for a valid patient ID.
+
+    **Roster / Multi-Patient Meta Queries**:
+    For any user question like:
+      - "How many patients have we discussed?"
+      - "Which patients have we discussed?"
+      - "What other patients have been covered?"
+      - "List the patients in this session."
+    ALWAYS read the most recent `PATIENT_CONTEXT_JSON` system message and answer directly using:
+      - count = length of `all_patient_ids`
+      - list = the values in `all_patient_ids`
+      - active = `patient_id`
+    If `all_patient_ids` is empty, say no patients have been discussed.
+    Do NOT guess or rely on memory alone; do NOT omit patients that appear in `all_patient_ids`.
+    If the user requests a switch to a patient already active, simply acknowledge that the patient is already active (do not re-plan).
+    If the user requests a switch to a different patient, acknowledge the switch and proceed with planning for the new patient (do not claim you were already on that patient).
 
     **Conversation State Tracking**:
-    Before responding, always review the chat history to understand:
-    - What was the original request/goal?
-    - Which agents have already provided their input?
-    - What is the next logical step in the plan?
-    - If the user says "proceed", "continue", or similar, determine what the next step should be based on the current progress.
-    - If a task has been completed (like report creation), do NOT restart the process unless explicitly asked.
-
-    **CRITICAL**: Do not restart completed processes. If ReportCreation has already created a report, the tumor board review is complete.
-
-    1. **Moderate the Discussion**: 
-      Your primary role is to facilitate the discussion and ensure a smooth flow of conversation among the participants. 
-      When a question is asked, think through who could best answer it. Formulate a plan and present it to the user. 
-      Rely on other agents to provide missing information. First ask the agent what information they need to answer a question.
-      When asking the user for information, mention the user explicitly. "*User*, can you provide me with the patient's #BLANK?"
-      When addressing an agent, mention the agent explicitly. "*PatientHistory*, proceed with #BLANK."
-      
+    Before responding, review the chat history and determine:
+      - The original request or current goal.
+      - Which agents have already responded.
+      - The next logical agent or action.
+      - Whether the user has confirmed a proposed plan.
+    If a task (e.g., report creation) is already completed, do NOT restart it unless explicitly asked.
+
+    **CRITICAL**:
+    Never restart completed processes unless the user explicitly requests a re-run. If ReportCreation has already completed a tumor board report for the active patient, treat that workflow as done.
+
+    1. **Moderate the Discussion**:
+       Facilitate orderly, purposeful agent participation. When a user request comes in, think through which agents are needed, form a short plan, and present it for confirmation (unless trivial continuation).
+       When asking the user for needed info, address them explicitly: "*User*, could you provide ...?"
+       When prompting an agent, address the agent explicitly: "*PatientHistory*, please provide ..."
+
     2. **Participants**:
-      The following ai experts can help with answering queries about the user.
-      {{aiAgents}}
-      If during the course of the conversation, information is missing, think through, who could be the best to answer it, then ask that agent explicitly for
-      the information by mentioning the agent. Only ask the user for plan confirmation!
-      When an agent has provided their input, acknowledge it and move on to the next agent in the plan.
-      
+       The following AI experts are available:
+       {{aiAgents}}
+       If information is missing, decide which specific agent is best positioned to supply it. Ask that agent explicitly. Only ask the user for plan confirmation or missing user-only data.
+
     3. **Handle User Commands**:
-      - When the user says "proceed", "continue", or confirms to move forward, check the chat history to see what has been completed and what should happen next.
-      - Do NOT repeat previous agent responses or ask the same questions again.
-      - Do NOT restart completed processes.
-      - Move to the next logical step in your plan based on what has already been accomplished.
-      
-    4. **Allow user to confirm**: When you create a plan with a step by step execution, ask the user for confirmation on the plan. If the plan changes, 
-      inform the user and ask for confirmation again. Stick to the confirmed plan and as the plan progresses as expected, you can skip the confirmation step.
-      
-    5. **Explain the Purpose and Order**: At the beginning of the conversation, explain the plan and the expected order of participants. 
-      Please think hard about the order of the individual agents called. For example, the current status and the historical background should be clarified 
-      early in the discussion such that the other agents can make use of that knowledge. Treatment recommendation and research agents should be called later 
-      in the discussion. Report creation should always happen in the end.
-      
-    6. **Role Limitation**: Remember, your role is to moderate and facilitate, not to provide clinical recommendations or treatment plans.
-      DON'T: Provide clinical recommendations or treatment plans. Please only call ONE agent at a time.
-      
-    7. **Conclude the plan**: 
-      If during the course of the conversation, information is missing, think through, who could be the best to answer it, then ask that agent explicitly for
-      the information by mentioning the agent. Only ask the user for plan confirmation!
-      When an agent has provided their input, acknowledge it and move on to the next agent in the plan.
-    3. **Allow user to confirm**: When you create a plan with a step by step execution, ask the user for confirmation on the plan. If the plan changes, 
-      inform the user and ask for confirmation again. Stick to the confirmed plan and as the plan progresses as expected, you can skip the confirmation step.
-    4. **Explain the Purpose and Order**: At the beginning of the conversation, explain the plan and the expected order of participants. 
-      Please think hard about the order of the individual agents called. For example, the current status and the historical background should be clarified 
-      early in the discussion such that the other agents can make use of that knowledge. Treatment recommendation and research agents should be called later 
-      in the discussion. Report creation should always happen in the end.
-    5. **Role Limitation**: Remember, your role is to moderate and facilitate, not to provide clinical recommendations or treatment plans.
-      DON'T: Provide clinical recommendations or treatment plans. Please only call ONE agent at a time.
-    6. **Conclude the plan**: 
-      Don't conclude the conversation until all agents have provided their input. Instead, address the agents that have not yet provided their input.
-      When all agents have provided their input, the plan has concluded, and the user's question has been answered, summarize the response in one or two sentences.
-      Ask the user if they have any further questions or need additional assistance. 
-      For follow up questions, formulate a new plan and suggest the order of participants.
-    
+       On "proceed"/"continue"/confirmation, advance to the next logical step—do NOT repeat already completed agent outputs.
+       Avoid re-asking agents for the same data unless the user clarified new scope.
+
+    4. **Plan Confirmation**:
+       Present multi-step plans and ask the user to confirm. If the plan changes midstream, clearly restate the updated plan and request confirmation again. Once stable and progressing, you may skip reconfirmation unless scope shifts.
+
+    5. **Order & Sequencing**:
+       Early: establish history (PatientHistory) and current status (PatientStatus).
+       Mid: radiology/imaging insights (Radiology) if needed.
+       Later: ClinicalGuidelines, ClinicalTrials, MedicalResearch.
+       Final: ReportCreation only after upstream context has been gathered.
+       Do NOT call multiple agents at the same time; strictly one agent per turn.
+
+    6. **Role Limitation**:
+       You do NOT provide clinical recommendations or detailed treatment plans yourself. You orchestrate and summarize.
+
+    7. **Plan Conclusion**:
+       When all required agents have contributed and the user’s goal is satisfied, provide a succinct (1–2 sentence) summary and ask if further assistance is needed. For a follow-up question, create a new tailored plan rather than replaying the old one.
+
     **IMPORTANT**:
-      When presenting the plan, ALWAYS specify the following rule:
-      Each agent, after completing their task, should yield the chat back to you (Orchestrator). Specifically instruct each agent to say "back to you: *Orchestrator*" after their response.
+    When presenting the plan, ALWAYS specify the rule:
+    Each agent, after completing their task, must yield the chat back to you (Orchestrator) by saying "back to you: *Orchestrator*".
 
   facilitator: true
   description: |
-    Your role is to moderate the discussion, present the order of participants, and facilitate the conversation.
-
+    Your role is to moderate the discussion, present the agent sequence, and ensure efficient progress without repetition or unauthorized clinical advice.
+    
 - name: PatientHistory
   instructions: |
     You are an AI agent tasked with loading and presenting patient data. Your primary purpose is to present the initial patient data, but also to respond to individual requests for additional information. 
diff --git a/src/services/patient_context_analyzer.py b/src/services/patient_context_analyzer.py
index 8721620..97dd902 100644
--- a/src/services/patient_context_analyzer.py
+++ b/src/services/patient_context_analyzer.py
@@ -1,3 +1,6 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
 import logging
 import os
 import time
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
index 6d468f2..1294294 100644
--- a/src/services/patient_context_service.py
+++ b/src/services/patient_context_service.py
@@ -1,20 +1,19 @@
-import json
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
 import logging
 import re
 import time
 from datetime import datetime, timezone
 from typing import Literal
 
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents import AuthorRole
-from semantic_kernel.contents import TextContent
-
 from data_models.chat_context import ChatContext, PatientContext
-from data_models.patient_context_models import TimingInfo, PatientContextSystemMessage
+from data_models.patient_context_models import TimingInfo
 from services.patient_context_analyzer import PatientContextAnalyzer
 
 logger = logging.getLogger(__name__)
 
+# Keep the constant so other modules (routes, bots) can import it
 PATIENT_CONTEXT_PREFIX = "PATIENT_CONTEXT_JSON"
 PATIENT_ID_PATTERN = re.compile(r"^patient_[0-9]+$")
 Decision = Literal["NONE", "UNCHANGED", "NEW_BLANK", "SWITCH_EXISTING",
@@ -23,71 +22,55 @@
 
 class PatientContextService:
     """
-    Registry-based patient context manager:
-    1. Patient registry is the single source of truth for patient metadata
-    2. Use analyzer to detect explicit patient IDs
-    3. Fall back to storage if analyzer returns NONE
-    4. Simple file-based patient isolation for chat history
-    5. Kernel reset on patient switches
+    Registry-based patient context manager (clean version):
+    - Registry is authoritative for patient roster.
+    - Analyzer decides patient activation/switch/clear.
+    - No system message persistence (ephemeral injection happens outside this service).
+    - Per-patient chat history isolation performed by caller (route/bot) AFTER decision.
     """
 
     def __init__(self, analyzer: PatientContextAnalyzer, registry_accessor=None, context_accessor=None):
         self.analyzer = analyzer
         self.registry_accessor = registry_accessor
         self.context_accessor = context_accessor
-        logger.info("PatientContextService initialized with storage fallback: %s", registry_accessor is not None)
+        logger.info("PatientContextService initialized (registry enabled: %s)", registry_accessor is not None)
 
     async def _ensure_patient_contexts_from_registry(self, chat_ctx: ChatContext):
-        """Ensure patient_contexts is populated from registry (single source of truth)."""
+        """Rebuild in-memory patient_contexts from registry snapshot each turn."""
         if not self.registry_accessor:
             return
-
         try:
             patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
+            chat_ctx.patient_contexts.clear()
             if patient_registry:
-                # Clear and rebuild from registry
-                chat_ctx.patient_contexts.clear()
-                for patient_id, registry_entry in patient_registry.items():
-                    chat_ctx.patient_contexts[patient_id] = PatientContext(
-                        patient_id=patient_id,
-                        facts=registry_entry.get("facts", {})
+                for pid, entry in patient_registry.items():
+                    chat_ctx.patient_contexts[pid] = PatientContext(
+                        patient_id=pid,
+                        facts=entry.get("facts", {})
                     )
-                logger.debug("Loaded %d patients from registry", len(patient_registry))
         except Exception as e:
             logger.warning("Failed to load patient contexts from registry: %s", e)
 
     async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple[Decision, TimingInfo]:
-        service_start_time = time.time()
+        service_start = time.time()
 
-        # FIRST: Ensure we have latest patient contexts from registry
+        # Always refresh from registry first
         await self._ensure_patient_contexts_from_registry(chat_ctx)
 
-        # Skip analyzer for very short messages that are likely agent handoffs
+        # Short heuristic skip
         if user_text and len(user_text.strip()) <= 15 and not any(
-            word in user_text.lower() for word in ["patient", "clear", "switch"]
+            k in user_text.lower() for k in ["patient", "clear", "switch"]
         ):
-            logger.info("Skipping analyzer for short handoff message: '%s'", user_text)
-
             if not chat_ctx.patient_id:
-                fallback_start = time.time()
+                fb_start = time.time()
                 restored = await self._try_restore_from_storage(chat_ctx)
-                fallback_duration = time.time() - fallback_start
-                decision = "RESTORED_FROM_STORAGE" if restored else "NONE"
-            else:
-                fallback_duration = 0.0
-                decision = "UNCHANGED"
-
-            timing = TimingInfo(
-                analyzer=0.0,
-                storage_fallback=fallback_duration,
-                service=time.time() - service_start_time,
-            )
-            return decision, timing
-
-        logger.info("Patient context decision for '%s' | Current patient: %s", user_text, chat_ctx.patient_id)
+                fb_dur = time.time() - fb_start
+                timing = TimingInfo(analyzer=0.0, storage_fallback=fb_dur, service=time.time() - service_start)
+                return ("RESTORED_FROM_STORAGE" if restored else "NONE", timing)
+            timing = TimingInfo(analyzer=0.0, storage_fallback=0.0, service=time.time() - service_start)
+            return "UNCHANGED", timing
 
-        # STEP 1: Run the analyzer with structured output
-        decision_model, analyzer_duration = await self.analyzer.analyze_with_timing(
+        decision_model, analyzer_dur = await self.analyzer.analyze_with_timing(
             user_text=user_text,
             prior_patient_id=chat_ctx.patient_id,
             known_patient_ids=list(chat_ctx.patient_contexts.keys()),
@@ -95,409 +78,143 @@ async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple
 
         action = decision_model.action
         pid = decision_model.patient_id
-
-        logger.info(
-            "Analyzer decision: %s | Patient ID: %s | Reasoning: %s",
-            action, pid, decision_model.reasoning
-        )
-
-        # STEP 2: Handle analyzer results
-        fallback_duration = 0.0
+        fallback_dur = 0.0
 
         if action == "CLEAR":
             await self._archive_all_and_recreate(chat_ctx)
-            timing = TimingInfo(
-                analyzer=analyzer_duration,
-                storage_fallback=0.0,
-                service=time.time() - service_start_time,
-            )
+            timing = TimingInfo(analyzer=analyzer_dur, storage_fallback=0.0, service=time.time() - service_start)
             return "CLEAR", timing
 
-        elif action in ("ACTIVATE_NEW", "SWITCH_EXISTING"):
+        if action in ("ACTIVATE_NEW", "SWITCH_EXISTING"):
             if not pid or not PATIENT_ID_PATTERN.match(pid):
-                logger.warning("Invalid patient ID from analyzer: %s", pid)
                 decision = "NEEDS_PATIENT_ID"
             else:
                 decision = await self._activate_patient_with_registry(pid, chat_ctx)
-
         elif action == "NONE":
-            fb_start = time.time()
             if not chat_ctx.patient_id:
+                fb_start = time.time()
                 restored = await self._try_restore_from_storage(chat_ctx)
+                fallback_dur = time.time() - fb_start
                 decision = "RESTORED_FROM_STORAGE" if restored else "NONE"
             else:
                 decision = "UNCHANGED"
-            fallback_duration = time.time() - fb_start
-
         elif action == "UNCHANGED":
             decision = "UNCHANGED"
         else:
             decision = "NONE"
 
-        service_duration = time.time() - service_start_time
         timing = TimingInfo(
-            analyzer=analyzer_duration,
-            storage_fallback=fallback_duration,
-            service=service_duration,
+            analyzer=analyzer_dur,
+            storage_fallback=fallback_dur,
+            service=time.time() - service_start
         )
-
-        if chat_ctx.patient_id:
-            await self._ensure_system_message(chat_ctx, timing)
-
+        # NOTE: No system message injection here (handled by caller).
         return decision, timing
 
     async def set_explicit_patient_context(self, patient_id: str, chat_ctx: ChatContext) -> bool:
-        # Ensure we have latest patient contexts from registry
-        await self._ensure_patient_contexts_from_registry(chat_ctx)
-
         if not patient_id or not PATIENT_ID_PATTERN.match(patient_id):
-            logger.warning("Invalid patient ID format: %s", patient_id)
             return False
 
         if chat_ctx.patient_id and patient_id != chat_ctx.patient_id:
-            logger.info("Resetting kernel for explicit patient switch: %s -> %s", chat_ctx.patient_id, patient_id)
             self.analyzer.reset_kernel()
 
-        restored = await self._try_restore_specific_patient(patient_id, chat_ctx)
-        if not restored:
+        await self._ensure_patient_contexts_from_registry(chat_ctx)
+
+        if patient_id not in chat_ctx.patient_contexts:
             chat_ctx.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
-            logger.info("Created new patient context: %s", patient_id)
 
         chat_ctx.patient_id = patient_id
-        timing = TimingInfo(analyzer=0.0, storage_fallback=0.0, service=0.0)
-        await self._ensure_system_message(chat_ctx, timing)
 
         if self.registry_accessor:
-            try:
-                await self._update_registry_storage(chat_ctx)
-            except Exception as e:
-                logger.warning("Failed to update registry storage: %s", e)
-
+            await self._update_registry_storage(chat_ctx)
         return True
 
-    async def _ensure_system_message(self, chat_ctx: ChatContext, timing: TimingInfo):
-        """Ensure system message with patient context data using structured model."""
-        self._remove_system_message(chat_ctx)
-
-        if not chat_ctx.patient_id:
-            return
-
-        # Get all session patients from registry (single source of truth)
-        all_patient_ids = []
-        if self.registry_accessor:
-            try:
-                patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
-                if patient_registry:
-                    all_patient_ids = list(patient_registry.keys())
-                    logger.debug("Using patient registry for system message: %s", all_patient_ids)
-            except Exception as e:
-                logger.warning("Failed to read patient registry for system message: %s", e)
-                # Fallback to in-memory contexts
-                all_patient_ids = list(chat_ctx.patient_contexts.keys())
-        else:
-            # Fallback to in-memory contexts
-            all_patient_ids = list(chat_ctx.patient_contexts.keys())
-
-        # Use structured model for system message
-        payload = PatientContextSystemMessage(
-            conversation_id=chat_ctx.conversation_id,
-            patient_id=chat_ctx.patient_id,
-            all_patient_ids=all_patient_ids,
-            timing_sec=timing,
-        )
-
-        line = "%s: %s" % (PATIENT_CONTEXT_PREFIX, payload.model_dump_json())
-        system_message = ChatMessageContent(
-            role=AuthorRole.SYSTEM,
-            items=[TextContent(text=line)]
-        )
-        chat_ctx.chat_history.messages.insert(0, system_message)
-        logger.debug(
-            "Added structured patient context system message for %s with %d session patients",
-            chat_ctx.patient_id, len(all_patient_ids)
-        )
-
-    async def _try_restore_from_storage(self, chat_ctx: ChatContext) -> bool:
-        """Try to restore patient context from storage files."""
-        logger.info("Attempting storage fallback for conversation: %s", chat_ctx.conversation_id)
-
-        # Load latest patient contexts from registry
-        await self._ensure_patient_contexts_from_registry(chat_ctx)
-
-        # Priority 1: Check patient registry file (session registry)
-        if self.registry_accessor:
-            try:
-                patient_registry, active_patient_id = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
-
-                if patient_registry and active_patient_id:
-                    logger.info("Found %d patients. Active: %s", len(patient_registry), active_patient_id)
-
-                    # Set active patient and load their isolated chat history
-                    if active_patient_id in patient_registry:
-                        chat_ctx.patient_id = active_patient_id
-
-                        # Load isolated chat history for active patient
-                        if self.context_accessor:
-                            try:
-                                restored_chat_ctx = await self.context_accessor.read(chat_ctx.conversation_id, active_patient_id)
-                                if restored_chat_ctx and hasattr(restored_chat_ctx, 'chat_history'):
-                                    # Clear current history and load patient-specific history
-                                    chat_ctx.chat_history.messages.clear()
-                                    chat_ctx.chat_history.messages.extend(restored_chat_ctx.chat_history.messages)
-                                    logger.info("Loaded isolated chat history for: %s", active_patient_id)
-                            except Exception as e:
-                                logger.warning("Failed to load patient-specific chat history: %s", e)
-
-                        logger.info("Restored active patient: %s", active_patient_id)
-                        return True
-            except Exception as e:
-                logger.warning("Failed to read patient registry: %s", e)
-
-        # Priority 2: Check session context (legacy fallback)
-        if self.context_accessor:
-            try:
-                restored_ctx = await self.context_accessor.read(chat_ctx.conversation_id)
-                if restored_ctx and restored_ctx.patient_id:
-                    chat_ctx.patient_id = restored_ctx.patient_id
-                    # Note: Don't restore patient_contexts from file - use registry only
-                    chat_ctx.chat_history = restored_ctx.chat_history or chat_ctx.chat_history
-                    logger.info("Restored session context: %s", restored_ctx.patient_id)
-                    return True
-            except Exception as e:
-                logger.warning("Failed to read session context: %s", e)
-
-        logger.info("No patient context found in storage")
-        return False
-
     async def _archive_all_and_recreate(self, chat_ctx: ChatContext) -> None:
-        """Archive all files to blob storage and recreate fresh files."""
-        logger.info("Archiving all contexts to blob storage for conversation: %s", chat_ctx.conversation_id)
-
-        # Kernel reset for complete context clear
+        """Archive all session + patient files + registry then clear memory."""
         if chat_ctx.patient_id:
-            logger.info("Resetting kernel for complete context clear")
             self.analyzer.reset_kernel()
 
-        archive_failures = []
-
-        # Get ALL patients from registry (single source of truth)
         all_patient_ids = []
         if self.registry_accessor:
             try:
                 patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
-                if patient_registry:
-                    all_patient_ids = list(patient_registry.keys())
-                    logger.info("Found %d patients in registry to archive: %s", len(all_patient_ids), all_patient_ids)
-                else:
-                    logger.warning("No patient registry found for archival")
-            except Exception as e:
-                logger.warning("Failed to read patient registry for archival: %s", e)
-                # Fallback to in-memory contexts
+                all_patient_ids = list(patient_registry.keys()) if patient_registry else []
+            except Exception:
                 all_patient_ids = list(chat_ctx.patient_contexts.keys())
 
-        # Create timestamped archive folder
         timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S-%f")
-        archive_folder = "archive/%s" % timestamp
+        folder = f"archive/{timestamp}"
 
-        try:
-            logger.info("Starting archive to folder: %s", archive_folder)
-
-            # Archive session context (main conversation)
-            if self.context_accessor:
-                try:
-                    await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, None, archive_folder)
-                    logger.info("Archived session context to %s", archive_folder)
-                except Exception as e:
-                    logger.warning("Failed to archive session context: %s", e)
-                    archive_failures.append("session")
-
-                # Archive ALL patient contexts from registry
-                for patient_id in all_patient_ids:
-                    try:
-                        await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, patient_id, archive_folder)
-                        logger.info("Archived patient context for %s to %s", patient_id, archive_folder)
-                    except Exception as e:
-                        logger.warning("Failed to archive patient context for %s: %s", patient_id, e)
-                        archive_failures.append(patient_id)
-
-            # Archive patient registry
-            if self.registry_accessor:
+        if self.context_accessor:
+            try:
+                await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, None, folder)
+            except Exception as e:
+                logger.warning("Archive session failed: %s", e)
+            for pid in all_patient_ids:
                 try:
-                    await self.registry_accessor.archive_registry(chat_ctx.conversation_id)
-                    logger.info("Archived patient registry for %s", chat_ctx.conversation_id)
+                    await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, pid, folder)
                 except Exception as e:
-                    logger.warning("Failed to archive patient registry: %s", e)
-                    archive_failures.append("registry")
+                    logger.warning("Archive patient %s failed: %s", pid, e)
 
-            # Report archive status
-            if archive_failures:
-                logger.warning("Some archives failed: %s", archive_failures)
-            else:
-                logger.info("Successfully archived all contexts to %s", archive_folder)
-
-        except Exception as e:
-            logger.error("Critical failure during archive process: %s", e)
+        if self.registry_accessor:
+            try:
+                await self.registry_accessor.archive_registry(chat_ctx.conversation_id)
+            except Exception as e:
+                logger.warning("Archive registry failed: %s", e)
 
-        # Clear memory only after archival attempt (even if some failed)
         chat_ctx.patient_id = None
         chat_ctx.patient_contexts.clear()
         chat_ctx.chat_history.messages.clear()
-        self._remove_system_message(chat_ctx)
 
-        logger.info("Archival complete - memory cleared for fresh start")
+    async def _update_registry_storage(self, chat_ctx: ChatContext):
+        if not (self.registry_accessor and chat_ctx.patient_id):
+            return
+        current = chat_ctx.patient_contexts.get(chat_ctx.patient_id)
+        if not current:
+            return
+        entry = {
+            "patient_id": chat_ctx.patient_id,
+            "facts": current.facts,
+            "conversation_id": chat_ctx.conversation_id
+        }
+        try:
+            await self.registry_accessor.update_patient_registry(
+                chat_ctx.conversation_id,
+                chat_ctx.patient_id,
+                entry,
+                chat_ctx.patient_id
+            )
+        except Exception as e:
+            logger.warning("Failed registry update: %s", e)
 
     async def _activate_patient_with_registry(self, patient_id: str, chat_ctx: ChatContext) -> Decision:
-        """Activate patient and load from registry if available."""
-        if not patient_id:
-            return "NEEDS_PATIENT_ID"
-
-        # Same patient
-        if patient_id == chat_ctx.patient_id:
+        if chat_ctx.patient_id == patient_id:
             return "UNCHANGED"
-
-        # Kernel reset when switching patients
         if chat_ctx.patient_id and patient_id != chat_ctx.patient_id:
-            logger.info("Resetting kernel for patient switch: %s -> %s", chat_ctx.patient_id, patient_id)
             self.analyzer.reset_kernel()
 
-        # Ensure we have latest registry data
         await self._ensure_patient_contexts_from_registry(chat_ctx)
 
-        # Check if we have registry data for this patient
-        if self.registry_accessor:
-            try:
-                patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
-                if patient_id in patient_registry:
-                    # Patient exists in registry
-                    chat_ctx.patient_id = patient_id
-
-                    # Load isolated chat history for this patient
-                    if self.context_accessor:
-                        try:
-                            restored_chat_ctx = await self.context_accessor.read(chat_ctx.conversation_id, patient_id)
-                            if restored_chat_ctx and hasattr(restored_chat_ctx, 'chat_history'):
-                                # Clear current history and load patient-specific history
-                                chat_ctx.chat_history.messages.clear()
-                                chat_ctx.chat_history.messages.extend(restored_chat_ctx.chat_history.messages)
-                                logger.info("Loaded isolated chat history for: %s", patient_id)
-                        except Exception as e:
-                            logger.warning("Failed to load patient-specific chat history: %s", e)
-
-                    logger.info("Switched to existing patient from registry: %s", patient_id)
-                    # CRITICAL: Update registry to mark this patient as currently active
-                    await self._update_registry_storage(chat_ctx)
-
-                    return "SWITCH_EXISTING"
-            except Exception as e:
-                logger.warning("Failed to check registry for %s: %s", patient_id, e)
-
-        # Switch to existing in memory - PRESERVE CHAT HISTORY
         if patient_id in chat_ctx.patient_contexts:
             chat_ctx.patient_id = patient_id
-            logger.info("Switched to existing patient (preserving chat history): %s", patient_id)
-            # Update registry when switching to existing patient
             await self._update_registry_storage(chat_ctx)
             return "SWITCH_EXISTING"
 
-        # New blank patient context - PRESERVE CHAT HISTORY
         chat_ctx.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
         chat_ctx.patient_id = patient_id
-        logger.info("Created new patient context (preserving chat history): %s", patient_id)
-
-        # CRITICAL: Update registry storage for new patient
         await self._update_registry_storage(chat_ctx)
-
         return "NEW_BLANK"
 
-    async def _update_registry_storage(self, chat_ctx: ChatContext):
-        """Update registry storage for current patient."""
-        if not self.registry_accessor or not chat_ctx.patient_id:
-            return
-
-        current_patient = chat_ctx.patient_contexts.get(chat_ctx.patient_id)
-        if not current_patient:
-            logger.warning("No patient context found for %s", chat_ctx.patient_id)
-            return
-
-        # Simple registry entry
-        registry_entry = {
-            "patient_id": chat_ctx.patient_id,
-            "facts": current_patient.facts,
-            "conversation_id": chat_ctx.conversation_id
-        }
-
+    async def _try_restore_from_storage(self, chat_ctx: ChatContext) -> bool:
+        """Restore active patient from registry (no legacy file scanning)."""
+        if not self.registry_accessor:
+            return False
         try:
-            await self.registry_accessor.update_patient_registry(
-                chat_ctx.conversation_id,
-                chat_ctx.patient_id,
-                registry_entry,
-                chat_ctx.patient_id  # Set as active patient
-            )
-            logger.info("Updated registry storage for %s", chat_ctx.patient_id)
+            patient_registry, active = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
+            if patient_registry and active and active in patient_registry:
+                await self._ensure_patient_contexts_from_registry(chat_ctx)
+                chat_ctx.patient_id = active
+                return True
         except Exception as e:
-            logger.warning("Failed to update registry storage: %s", e)
-
-    def _remove_system_message(self, chat_ctx: ChatContext):
-        """Remove patient context system messages."""
-        if not chat_ctx.patient_id:
-            return
-
-        current_patient_id = chat_ctx.patient_id
-        messages_to_keep = []
-        removed_count = 0
-
-        for m in chat_ctx.chat_history.messages:
-            if (m.role == AuthorRole.SYSTEM and m.items and len(m.items) > 0):
-                content_str = m.items[0].text if hasattr(m.items[0], 'text') else str(m.items[0])
-                if content_str.startswith(PATIENT_CONTEXT_PREFIX):
-                    try:
-                        json_content = content_str[len(PATIENT_CONTEXT_PREFIX):].strip()
-                        if json_content.startswith(":"):
-                            json_content = json_content[1:].strip()
-                        payload = json.loads(json_content)
-                        if payload.get("patient_id") == current_patient_id:
-                            removed_count += 1
-                            continue  # Skip this message (remove it)
-                    except Exception:
-                        pass  # Keep malformed messages
-
-            messages_to_keep.append(m)
-
-        if removed_count > 0:
-            logger.debug("Removed %d system messages for %s", removed_count, current_patient_id)
-
-        chat_ctx.chat_history.messages = messages_to_keep
-
-    async def _try_restore_specific_patient(self, patient_id: str, chat_ctx: ChatContext) -> bool:
-        """Try to restore specific patient from storage."""
-        # Try registry storage first (single source of truth)
-        if self.registry_accessor:
-            try:
-                patient_registry, _ = await self.registry_accessor.read_registry(chat_ctx.conversation_id)
-                if patient_id in patient_registry:
-                    registry_entry = patient_registry[patient_id]
-                    chat_ctx.patient_contexts[patient_id] = PatientContext(
-                        patient_id=patient_id,
-                        facts=registry_entry.get("facts", {})
-                    )
-                    logger.info("Restored %s from registry storage", patient_id)
-                    return True
-            except Exception as e:
-                logger.warning("Failed to restore %s from registry: %s", patient_id, e)
-
-        # Legacy fallback: Try patient-specific context file (deprecated)
-        if self.context_accessor:
-            try:
-                stored_ctx = await self.context_accessor.read(chat_ctx.conversation_id, patient_id)
-                if stored_ctx and hasattr(stored_ctx, 'patient_contexts') and patient_id in stored_ctx.patient_contexts:
-                    stored_context = stored_ctx.patient_contexts[patient_id]
-                    chat_ctx.patient_contexts[patient_id] = PatientContext(
-                        patient_id=patient_id,
-                        facts=getattr(stored_context, 'facts', {})
-                    )
-                    logger.info("Restored %s from patient-specific context (legacy)", patient_id)
-                    return True
-            except Exception as e:
-                logger.warning("Failed to restore %s from context: %s", patient_id, e)
-
+            logger.warning("Restore from registry failed: %s", e)
         return False

From ce7a237c49f7c6b1f4ba77ea72d983fd2e799206 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Tue, 30 Sep 2025 01:35:01 +0000
Subject: [PATCH 18/20] further clean up of code

---
 src/app.py                                |  30 +-
 src/data_models/data_access.py            |   7 +-
 src/data_models/patient_context_models.py | 157 ++-------
 src/group_chat.py                         | 386 ++++++++++------------
 src/routes/api/chats.py                   |  18 +-
 src/services/patient_context_analyzer.py  | 104 +++---
 src/services/patient_context_service.py   | 119 ++++---
 7 files changed, 360 insertions(+), 461 deletions(-)

diff --git a/src/app.py b/src/app.py
index ad936cd..9fc0c6b 100644
--- a/src/app.py
+++ b/src/app.py
@@ -31,13 +31,14 @@
 
 load_dotenv(".env")
 
-# Setup default logging and minimum log level severity
+# Setup default logging and minimum log level severity for your environment that you want to consume
 log_level = logging.INFO
 setup_logging(log_level=log_level)
 
 
 def create_app_context():
-    """Create the application context for commonly used objects in the application."""
+    '''Create the application context for commonly used object used in application.'''
+
     # Load agent configuration
     scenario = os.getenv("SCENARIO")
     agent_config = load_agent_config(scenario)
@@ -66,10 +67,7 @@ def create_app(
     bots: dict,
     app_context: AppContext,
 ) -> FastAPI:
-    """Create the FastAPI application with all routes and middleware."""
     app = FastAPI()
-
-    # Add API routes
     app.include_router(messages_routes(adapters, bots))
     app.include_router(chats_routes(app_context))
     app.include_router(user_routes())
@@ -81,17 +79,17 @@ def create_app(
     # Serve static files from the React build directory
     static_files_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
     if os.path.exists(static_files_path):
-        app.mount("/static", StaticFiles(directory=static_files_path), name="static")
+        app.mount("/static", StaticFiles(directory=os.path.join(static_files_path, "static")), name="static")
 
-        # Mount assets directory for Vite-generated assets
-        assets_path = os.path.join(static_files_path, "assets")
+        # Mount assets directory for Vite-generated assets like /assets/index-abc123.js
+        assets_path = os.path.join(static_files_path, "static", "assets")
         if os.path.exists(assets_path):
             app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
 
         # Add a route for the root URL to serve index.html
         @app.get("/")
         async def serve_root():
-            index_path = os.path.join(static_files_path, "index.html")
+            index_path = os.path.join(static_files_path, "static", "index.html")
             if os.path.exists(index_path):
                 return FileResponse(index_path)
             return {"detail": "React app not built yet"}
@@ -99,7 +97,7 @@ async def serve_root():
         # Add a catch-all route to serve index.html for client-side routing
         @app.get("/{full_path:path}")
         async def serve_react_app(full_path: str):
-            index_path = os.path.join(static_files_path, "index.html")
+            index_path = os.path.join(static_files_path, "static", "index.html")
             if os.path.exists(index_path):
                 return FileResponse(index_path)
             return {"detail": "React app not built yet"}
@@ -107,11 +105,11 @@ async def serve_react_app(full_path: str):
     return app
 
 
-# Initialize application context
 app_context = create_app_context()
 
 # Setup Application Insights logging
-setup_app_insights_logging(credential=app_context.credential, log_level=log_level)
+setup_app_insights_logging(credential=app_context.credential,
+                           log_level=log_level)
 
 # Create Teams specific objects
 adapters = {
@@ -119,28 +117,22 @@ async def serve_react_app(full_path: str):
         DefaultConfig(botId=agent["bot_id"]))).use(ShowTypingMiddleware()).use(AccessControlMiddleware())
     for agent in app_context.all_agent_configs
 }
-
 bot_config = {
     "adapters": adapters,
     "app_context": app_context,
     "turn_contexts": {}
 }
-
 bots = {
     agent["name"]: AssistantBot(agent, **bot_config) if agent["name"] != "magentic"
     else MagenticBot(agent, **bot_config)
     for agent in app_context.all_agent_configs
 }
 
-# Create applications
 teams_app = create_app(bots, app_context)
 fast_mcp_app, lifespan = create_fast_mcp_app(app_context)
 
-# Main application with routing
 app = Starlette(
     routes=[
         Mount('/mcp', app=fast_mcp_app),
         Mount('/', teams_app),
-    ],
-    lifespan=lifespan
-)
+    ], lifespan=lifespan)
diff --git a/src/data_models/data_access.py b/src/data_models/data_access.py
index e5bfa14..b9d2ad3 100644
--- a/src/data_models/data_access.py
+++ b/src/data_models/data_access.py
@@ -17,7 +17,6 @@
 from data_models.fhir.fhir_clinical_note_accessor import FhirClinicalNoteAccessor
 from data_models.image_accessor import ImageAccessor
 from data_models.patient_context_accessor import PatientContextRegistryAccessor
-from azure.identity.aio import get_bearer_token_provider
 
 logger = logging.getLogger(__name__)
 
@@ -117,11 +116,7 @@ def create_data_access(
     return DataAccess(
         blob_sas_delegate=BlobSasDelegate(blob_service_client),
         chat_artifact_accessor=ChatArtifactAccessor(blob_service_client),
-        chat_context_accessor=ChatContextAccessor(
-            blob_service_client,
-            cognitive_services_token_provider=get_bearer_token_provider(
-                credential, "https://cognitiveservices.azure.com/.default")
-        ),
+        chat_context_accessor=ChatContextAccessor(blob_service_client),
         clinical_note_accessor=clinical_note_accessor,
         image_accessor=ImageAccessor(blob_service_client),
         patient_context_registry_accessor=PatientContextRegistryAccessor(blob_service_client),
diff --git a/src/data_models/patient_context_models.py b/src/data_models/patient_context_models.py
index 36631cf..44a4374 100644
--- a/src/data_models/patient_context_models.py
+++ b/src/data_models/patient_context_models.py
@@ -1,137 +1,40 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
+from __future__ import annotations
 
-from typing import Optional, List, Literal
-from semantic_kernel.kernel_pydantic import KernelBaseModel
-from pydantic import BaseModel, Field
+from dataclasses import dataclass
+from typing import Optional, Literal
+from pydantic import BaseModel
 
-# Update this to match your current analyzer actions
-AnalyzerAction = Literal["NONE", "CLEAR", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED"]
+# Canonical analyzer action literal (shared across analyzer + service typing)
+AnalyzerAction = Literal[
+    "NONE",
+    "CLEAR",
+    "ACTIVATE_NEW",
+    "SWITCH_EXISTING",
+    "UNCHANGED",
+]
 
 
-class TimingInfo(BaseModel):
-    """Timing information for patient context operations."""
-    analyzer: float = Field(0.0, description="Time spent in analyzer (seconds)")
-    storage_fallback: float = Field(0.0, description="Time spent in storage fallback (seconds)")
-    service: float = Field(0.0, description="Total service processing time (seconds)")
-
-
-class PatientContextDecision(KernelBaseModel):
-    """
-    Structured output for patient context analysis decisions.
-    Uses KernelBaseModel for proper JSON schema generation with Semantic Kernel.
-    """
-    action: AnalyzerAction = Field(
-        ...,
-        description="The action to take: NONE, CLEAR, ACTIVATE_NEW, SWITCH_EXISTING, or UNCHANGED"
-    )
-    patient_id: Optional[str] = Field(
-        None,
-        description="The patient ID if the action involves a specific patient (format: patient_X)"
-    )
-    reasoning: str = Field(
-        ...,
-        description="Brief explanation of why this decision was made (max 50 words)"
-    )
-
-
-class WorkflowStep(BaseModel):
-    """A single step in the workflow plan."""
-    agent: str = Field(..., description="Name of the agent responsible for this step")
-    task: str = Field(..., description="Description of the task to be performed")
-    status: str = Field("pending", description="Status: pending, in_progress, completed")
-
-
-class WorkflowSummary(KernelBaseModel):
-    """
-    Structured workflow summary for tracking conversation progress.
-    Uses KernelBaseModel for proper JSON schema generation with Semantic Kernel.
-    """
-    patient_id: Optional[str] = Field(None, description="The patient ID this workflow relates to (if applicable)")
-    objective: str = Field(..., description="The main objective of this workflow")
-    steps: List[WorkflowStep] = Field(..., description="List of workflow steps")
-    current_step: int = Field(0, description="Index of the current step being executed")
-    reasoning: str = Field(..., description="Summary of the workflow reasoning")
-
-
-class PatientContextSystemMessage(KernelBaseModel):
-    """System message payload for patient context."""
-    conversation_id: str = Field(..., description="Conversation identifier")
-    patient_id: str = Field(..., description="Active patient identifier")
-    all_patient_ids: List[str] = Field(..., description="All known patient IDs in this session")
-    timing_sec: TimingInfo = Field(..., description="Timing information for this operation")
-
-
-class PatientRegistryEntry(BaseModel):
-    """Registry entry for patient tracking."""
-    patient_id: str = Field(..., description="Patient identifier")
-    created_at: str = Field(..., description="ISO timestamp when patient was created")
-    last_accessed: str = Field(..., description="ISO timestamp when patient was last accessed")
-    message_count: int = Field(0, description="Number of messages for this patient")
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
-
-
-# Update this to match your current analyzer actions
-AnalyzerAction = Literal["NONE", "CLEAR", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED"]
-
-
-class TimingInfo(BaseModel):
-    """Timing information for patient context operations."""
-    analyzer: float = Field(0.0, description="Time spent in analyzer (seconds)")
-    storage_fallback: float = Field(0.0, description="Time spent in storage fallback (seconds)")
-    service: float = Field(0.0, description="Total service processing time (seconds)")
-
-
-class PatientContextDecision(KernelBaseModel):
+class PatientContextDecision(BaseModel):
     """
-    Structured output for patient context analysis decisions.
-    Uses KernelBaseModel for proper JSON schema generation with Semantic Kernel.
+    Structured output returned by the patient context analyzer.
+    - action: one of AnalyzerAction
+    - patient_id: only populated when action implies activation/switch; otherwise None
+    - reasoning: brief natural language explanation (kept short by prompt)
     """
-    action: AnalyzerAction = Field(
-        ...,
-        description="The action to take: NONE, CLEAR, ACTIVATE_NEW, SWITCH_EXISTING, or UNCHANGED"
-    )
-    patient_id: Optional[str] = Field(
-        None,
-        description="The patient ID if the action involves a specific patient (format: patient_X)"
-    )
-    reasoning: str = Field(
-        ...,
-        description="Brief explanation of why this decision was made (max 50 words)"
-    )
+    action: AnalyzerAction
+    patient_id: Optional[str]
+    reasoning: str
 
 
-class WorkflowStep(BaseModel):
-    """A single step in the workflow plan."""
-    agent: str = Field(..., description="Name of the agent responsible for this step")
-    task: str = Field(..., description="Description of the task to be performed")
-    status: str = Field("pending", description="Status: pending, in_progress, completed")
-
-
-class WorkflowSummary(KernelBaseModel):
+@dataclass
+class TimingInfo:
     """
-    Structured workflow summary for tracking conversation progress.
-    Uses KernelBaseModel for proper JSON schema generation with Semantic Kernel.
+    Timing breakdown for a single patient context decision cycle.
+    All values are seconds (float) measured with time.time() deltas.
+    - analyzer: model inference / structured output duration
+    - storage_fallback: time spent attempting restore-from-storage fallback (if any)
+    - service: total wall-clock for decide_and_apply orchestration
     """
-    patient_id: Optional[str] = Field(None, description="The patient ID this workflow relates to (if applicable)")
-    objective: str = Field(..., description="The main objective of this workflow")
-    steps: List[WorkflowStep] = Field(..., description="List of workflow steps")
-    current_step: int = Field(0, description="Index of the current step being executed")
-    reasoning: str = Field(..., description="Summary of the workflow reasoning")
-
-
-class PatientContextSystemMessage(KernelBaseModel):
-    """System message payload for patient context."""
-    conversation_id: str = Field(..., description="Conversation identifier")
-    patient_id: str = Field(..., description="Active patient identifier")
-    all_patient_ids: List[str] = Field(..., description="All known patient IDs in this session")
-    timing_sec: TimingInfo = Field(..., description="Timing information for this operation")
-
-
-class PatientRegistryEntry(BaseModel):
-    """Registry entry for patient tracking."""
-    patient_id: str = Field(..., description="Patient identifier")
-    created_at: str = Field(..., description="ISO timestamp when patient was created")
-    last_accessed: str = Field(..., description="ISO timestamp when patient was last accessed")
-    message_count: int = Field(0, description="Number of messages for this patient")
+    analyzer: float
+    storage_fallback: float
+    service: float
diff --git a/src/group_chat.py b/src/group_chat.py
index 5d8d1fd..7a266cb 100644
--- a/src/group_chat.py
+++ b/src/group_chat.py
@@ -10,30 +10,20 @@
 from semantic_kernel import Kernel
 from semantic_kernel.agents import AgentGroupChat, ChatCompletionAgent
 from semantic_kernel.agents.channels.chat_history_channel import ChatHistoryChannel
-from semantic_kernel.agents.strategies.selection.kernel_function_selection_strategy import (
-    KernelFunctionSelectionStrategy,
-)
-from semantic_kernel.agents.strategies.termination.kernel_function_termination_strategy import (
-    KernelFunctionTerminationStrategy,
-)
+from semantic_kernel.agents.strategies.selection.kernel_function_selection_strategy import \
+    KernelFunctionSelectionStrategy
+from semantic_kernel.agents.strategies.termination.kernel_function_termination_strategy import \
+    KernelFunctionTerminationStrategy
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
-from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
-    AzureChatPromptExecutionSettings,
-)
-from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import (
-    AzureChatCompletion,
-)
+from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import \
+    AzureChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
 from semantic_kernel.connectors.openapi_plugin import OpenAPIFunctionExecutionParameters
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import (
-    ChatHistoryTruncationReducer,
-)
-from semantic_kernel.functions.kernel_function_from_prompt import (
-    KernelFunctionFromPrompt,
-)
+from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer
+from semantic_kernel.functions.kernel_function_from_prompt import KernelFunctionFromPrompt
 from semantic_kernel.kernel import Kernel, KernelArguments
-from semantic_kernel.contents import ChatMessageContent
 
 from data_models.app_context import AppContext
 from data_models.chat_context import ChatContext
@@ -48,91 +38,97 @@
 logger = logging.getLogger(__name__)
 
 
+class ChatRule(BaseModel):
+    verdict: str
+    reasoning: str
+
+
+def create_auth_callback(chat_ctx: ChatContext) -> Callable[..., Awaitable[Any]]:
+    """
+    Creates an authentication callback for the plugin configuration.
+
+    :param chat_ctx: The chat context to be used in the authentication.
+    :return: A callable that returns an authentication token.
+    """
+    # TODO - get key or secret from Azure Key Vault for OpenAPI services.
+    # Send the conversation ID as a header to the OpenAPI service.
+    async def auth_callback():
+        return {'conversation-id': chat_ctx.conversation_id}
+    return auth_callback
+
+# Need to introduce a CustomChatCompletionAgent and a CustomHistoryChannel because of issue https://github.com/microsoft/semantic-kernel/issues/12095
+
+
 class CustomHistoryChannel(ChatHistoryChannel):
     @override
-    async def receive(
-        self,
-        history: list[ChatMessageContent],
-    ) -> None:
+    async def receive(self, history: list[ChatMessageContent],) -> None:
         await super().receive(history)
+
         for message in history[:-1]:
             await self.thread.on_new_message(message)
 
 
-class CustomChatCompletionAgent(ChatCompletionAgent):
-    """Custom ChatCompletionAgent to override the create_channel method."""
+async def create_channel(
+    self, chat_history: ChatHistory | None = None, thread_id: str | None = None
+) -> CustomHistoryChannel:
+    """Create a ChatHistoryChannel.
 
-    @override
-    async def create_channel(
-        self, chat_history: ChatHistory | None = None, thread_id: str | None = None
-    ) -> CustomHistoryChannel:
-        from semantic_kernel.agents.chat_completion.chat_completion_agent import (
-            ChatHistoryAgentThread,
-        )
+    Args:
+        chat_history: The chat history for the channel. If None, a new ChatHistory instance will be created.
+        thread_id: The ID of the thread. If None, a new thread will be created.
 
-        CustomHistoryChannel.model_rebuild()
-        thread = ChatHistoryAgentThread(chat_history=chat_history, thread_id=thread_id)
+    Returns:
+        An instance of AgentChannel.
+    """
+    from semantic_kernel.agents.chat_completion.chat_completion_agent import ChatHistoryAgentThread
 
-        if thread.id is None:
-            await thread.create()
+    CustomHistoryChannel.model_rebuild()
 
-        messages = [message async for message in thread.get_messages()]
-        return CustomHistoryChannel(messages=messages, thread=thread)
+    thread = ChatHistoryAgentThread(chat_history=chat_history, thread_id=thread_id)
 
+    if thread.id is None:
+        await thread.create()
 
-class ChatRule(BaseModel):
-    """Structured output model for group chat selection and termination decisions."""
-    verdict: str
-    reasoning: str
+    messages = [message async for message in thread.get_messages()]
 
+    return CustomHistoryChannel(messages=messages, thread=thread)
 
-def create_auth_callback(chat_ctx: ChatContext) -> Callable[..., Awaitable[Any]]:
-    """Creates an authentication callback for OpenAPI tool execution."""
-    async def auth_callback():
-        return {"conversation-id": chat_ctx.conversation_id}
-    return auth_callback
 
+class CustomChatCompletionAgent(ChatCompletionAgent):
+    """Custom ChatCompletionAgent to override the create_channel method."""
 
-def create_group_chat(
-    app_ctx: AppContext,
-    chat_ctx: ChatContext,
-    participants: list[dict] = None,
-) -> Tuple[AgentGroupChat, ChatContext]:
-    """
-    Create a multi-agent group chat.
+    @override
+    async def create_channel(
+        self, chat_history: ChatHistory | None = None, thread_id: str | None = None
+    ) -> CustomHistoryChannel:
+        return await create_channel(self, chat_history, thread_id)
 
-    Args:
-        app_ctx: Application context containing shared resources
-        chat_ctx: Chat context for conversation state
-        participants: Optional list of participant configurations
 
-    Returns:
-        Tuple of AgentGroupChat instance and updated ChatContext
-    """
+def create_group_chat(
+    app_ctx: AppContext, chat_ctx: ChatContext, participants: list[dict] = None
+) -> Tuple[AgentGroupChat, ChatContext]:
     participant_configs = participants or app_ctx.all_agent_configs
     participant_names = [cfg.get("name") for cfg in participant_configs]
-    logger.info("Creating group chat with participants: %s", participant_names)
+    logger.info(f"Creating group chat with participants: {participant_names}")
 
-    # Remove magentic agent from the list of agents
+    # Remove magentic agent from the list of agents. In the future, we could add agent type to deal with agents that should not be included in the Semantic Kernel group chat.
     all_agents_config = [
         agent for agent in participant_configs if agent.get("name") != "magentic"
     ]
 
     def _create_kernel_with_chat_completion() -> Kernel:
-        """Create a kernel instance with Azure OpenAI chat completion service."""
         kernel = Kernel()
         kernel.add_service(
             AzureChatCompletion(
                 service_id="default",
                 deployment_name=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
                 api_version="2025-04-01-preview",
-                ad_token_provider=app_ctx.cognitive_services_token_provider,
+                ad_token_provider=app_ctx.cognitive_services_token_provider
             )
         )
         return kernel
 
     def _create_agent(agent_config: dict):
-        """Create an agent instance based on configuration."""
         agent_kernel = _create_kernel_with_chat_completion()
         plugin_config = PluginConfiguration(
             kernel=agent_kernel,
@@ -141,23 +137,20 @@ def _create_agent(agent_config: dict):
             chat_ctx=chat_ctx,
             azureml_token_provider=app_ctx.azureml_token_provider,
         )
-        is_healthcare_agent = (
-            healthcare_agent_config.yaml_key in agent_config
-            and bool(agent_config[healthcare_agent_config.yaml_key])
-        )
+        is_healthcare_agent = healthcare_agent_config.yaml_key in agent_config and bool(
+            agent_config[healthcare_agent_config.yaml_key])
 
         for tool in agent_config.get("tools", []):
             tool_name = tool.get("name")
             tool_type = tool.get("type", DEFAULT_TOOL_TYPE)
 
+            # Add function tools
             if tool_type == "function":
                 scenario = os.environ.get("SCENARIO")
-                tool_module = importlib.import_module(
-                    f"scenarios.{scenario}.tools.{tool_name}"
-                )
-                agent_kernel.add_plugin(
-                    tool_module.create_plugin(plugin_config), plugin_name=tool_name
-                )
+                tool_module = importlib.import_module(f"scenarios.{scenario}.tools.{tool_name}")
+                agent_kernel.add_plugin(tool_module.create_plugin(plugin_config), plugin_name=tool_name)
+            # Add OpenAPI tools
+            # See https://github.com/Azure-Samples/healthcare-agent-orchestrator/blob/main/docs/agent_development.md#agent-with-a-openapi-plugin-example
             elif tool_type == "openapi":
                 openapi_document_path = tool.get("openapi_document_path")
                 server_url_override = tool.get("server_url_override")
@@ -168,205 +161,172 @@ def _create_agent(agent_config: dict):
                         auth_callback=create_auth_callback(chat_ctx),
                         server_url_override=server_url_override,
                         enable_payload_namespacing=True,
-                        timeout=None,
-                    ),
+                        timeout=None
+                    )
                 )
             else:
                 raise ValueError(f"Unknown tool type: {tool_type}")
 
         if model_supports_temperature():
             temperature = agent_config.get("temperature", DEFAULT_MODEL_TEMP)
-            logger.info(
-                "Setting model temperature for agent %s to %s",
-                agent_config["name"],
-                temperature,
-            )
+            logger.info(f"Setting model temperature for agent {agent_config['name']} to {temperature}")
         else:
             temperature = None
             logger.info(
-                "Model does not support temperature. Setting temperature to None for agent %s",
-                agent_config["name"],
-            )
-
-        from semantic_kernel.connectors.ai.function_choice_behavior import (
-            FunctionChoiceBehavior,
-        )
-
+                f"Model does not support temperature. Setting temperature to None for agent {agent_config['name']}")
         settings = AzureChatPromptExecutionSettings(
-            function_choice_behavior=FunctionChoiceBehavior.Auto(),
-            seed=42,
-            temperature=temperature,
-        )
+            function_choice_behavior=FunctionChoiceBehavior.Auto(), seed=42, temperature=temperature)
         arguments = KernelArguments(settings=settings)
         instructions = agent_config.get("instructions")
         if agent_config.get("facilitator") and instructions:
             instructions = instructions.replace(
-                "{{aiAgents}}",
-                "\n\t\t".join(
-                    [
-                        f"- {agent['name']}: {agent['description']}"
-                        for agent in all_agents_config
-                    ]
-                ),
-            )
-
-        return (
-            CustomChatCompletionAgent(
-                kernel=agent_kernel,
-                name=agent_config["name"],
-                instructions=instructions,
-                description=agent_config.get("description", ""),
-                arguments=arguments,
-            )
-            if not is_healthcare_agent
-            else HealthcareAgent(
-                name=agent_config["name"], chat_ctx=chat_ctx, app_ctx=app_ctx
-            )
-        )
-
-    # Kernel for orchestrator (selection + termination structured decisions)
-    orchestrator_kernel = _create_kernel_with_chat_completion()
+                # >>> (unchanged logic, escaped quotes)
+                "{{aiAgents}}", "\n\t\t".join([f"- {agent['name']}: {agent['description']}" for agent in all_agents_config]))
+
+        return (CustomChatCompletionAgent(kernel=agent_kernel,
+                                          name=agent_config["name"],
+                                          instructions=instructions,
+                                          description=agent_config["description"],
+                                          arguments=arguments) if not is_healthcare_agent else
+                HealthcareAgent(name=agent_config["name"],
+                                chat_ctx=chat_ctx,
+                                app_ctx=app_ctx))
+
+    if model_supports_temperature():
+        settings = AzureChatPromptExecutionSettings(
+            function_choice_behavior=FunctionChoiceBehavior.Auto(), seed=42, temperature=0, response_format=ChatRule)
+    else:
+        settings = AzureChatPromptExecutionSettings(
+            function_choice_behavior=FunctionChoiceBehavior.Auto(), seed=42, response_format=ChatRule)
+    arguments = KernelArguments(settings=settings)
 
-    # Facilitator (Orchestrator) discovery
-    facilitator_agent = next(
-        (agent for agent in all_agents_config if agent.get("facilitator")),
-        all_agents_config[0],
-    )
+    facilitator_agent = next((agent for agent in all_agents_config if agent.get("facilitator")), all_agents_config[0])
     facilitator = facilitator_agent["name"]
-
-    # Structured output model config for selection
-    selection_settings = AzureChatPromptExecutionSettings(
-        function_choice_behavior=FunctionChoiceBehavior.Auto(),
-        temperature=DEFAULT_MODEL_TEMP,
-        seed=42,
-        response_format=ChatRule,
-    )
-    selection_args = KernelArguments(settings=selection_settings)
-
     selection_function = KernelFunctionFromPrompt(
         function_name="selection",
         prompt=f"""
         You are overseeing a group chat between several AI agents and a human user.
-        Determine which participant takes the next turn based on the most recent participant. Guidelines:
+        Determine which participant takes the next turn in a conversation based on the most recent participant. Follow these guidelines:
 
-        1. Participants (choose exactly one):
+        1. **Participants**: Choose only from these participants:
             {"\n".join([("\t- " + agent["name"]) for agent in all_agents_config])}
 
-        2. Rules:
-            - {facilitator} always starts if only the user has spoken.
-            - Avoid repetition: if an agent already completed its task, don't reselect unless explicitly requested.
-            - Agents may request info from each other: if an agent is directly asked by name, that agent goes next.
-            - "back to you *AgentName*": that named agent goes next.
-            - Each participant speaks at most once per turn.
-            - Default to {facilitator} if uncertain or no explicit candidate.
-            - Use best judgment for natural conversation flow.
-            - CONFIRMATION GATE (PLAN ONLY): If (a) the MOST RECENT message is from {facilitator} AND (b) it contains a multi-step plan (look for "Plan", "plan:", numbered steps like "1.", "2.", or multiple leading "-" bullet lines) AND (c) no user message has appeared AFTER that plan yet, then do NOT advance to another agent. Wait for a user reply. Output {facilitator} ONLY if absolutely necessary to politely prompt the user for confirmation (do not restate the entire plan). As soon as ANY user reply appears (question, modification, or confirmation), this gate is lifted. If the user used a confirmation token (confirm, yes, proceed, continue, ok, okay, sure, sounds good, go ahead), you may advance to the next required non-facilitator agent; otherwise select the participant that best addresses the user’s reply.
-
-        Provide reasoning then the verdict. Verdict must be exactly one of: {", ".join([agent["name"] for agent in all_agents_config])}
+        2. **General Rules**:
+            - **{facilitator} Always Starts**: {facilitator} always goes first to formulate a plan. If the only message is from the user, {facilitator} goes next.
+            - **Interactions between agents**: Agents may talk among themselves. If an agent requires information from another agent, that agent should go next.
+                EXAMPLE:
+                    "*agent_name*, please provide ..." then agent_name goes next.
+            - **"back to you *agent_name*": If an agent says "back to you", that agent goes next.
+                EXAMPLE:
+                    "back to you *agent_name*" then output agent_name goes next.
+            - **Once per turn**: Each participant can only speak once per turn.
+            - **Default to {facilitator}**: Always default to {facilitator}. If no other participant is specified, {facilitator} goes next.
+            - **Use best judgment**: If the rules are unclear, use your best judgment to determine who should go next, for the natural flow of the conversation.
+            - **CONFIRMATION GATE (PLAN ONLY)**: If (a) the MOST RECENT message is from {facilitator} AND (b) it contains a multi-step plan (look for "Plan", "plan:", numbered steps like "1.", "2.", or multiple leading "-" bullet lines) AND (c) no user message has appeared AFTER that plan yet, then do NOT advance to another agent. Wait for a user reply. Output {facilitator} ONLY if absolutely necessary to politely prompt the user for confirmation (do not restate the entire plan). As soon as ANY user reply appears (question, modification, or confirmation), this gate is lifted. If the user used a confirmation token (confirm, yes, proceed, continue, ok, okay, sure, sounds good, go ahead), you may advance to the next required non-facilitator agent; otherwise select the participant that best addresses the user’s reply.  # >>> added confirmation gate
+
+        **Output**: Give the full reasoning for your choice and the verdict. The reasoning should include careful evaluation of each rule with an explanation. The verdict should be the name of the participant who should go next.
 
         History:
         {{{{$history}}}}
         """,
-        prompt_execution_settings=selection_settings,
-    )
-
-    termination_settings = AzureChatPromptExecutionSettings(
-        function_choice_behavior=FunctionChoiceBehavior.Auto(),
-        temperature=DEFAULT_MODEL_TEMP,
-        seed=42,
-        response_format=ChatRule,
+        prompt_execution_settings=settings
     )
-    termination_args = KernelArguments(settings=termination_settings)
 
     termination_function = KernelFunctionFromPrompt(
         function_name="termination",
         prompt=f"""
-        Determine if the conversation should end based on the most recent message only.
-        IMPORTANT: In the History, any leading "*AgentName*:" indicates the SPEAKER of the message, not the addressee.
+        Determine if the conversation should end based on the most recent message.
+        You only have access to the last message in the conversation.
 
-        You are part of a group chat with several AI agents and a user.
-        The agent names are:
-            {",".join([f"{agent['name']}" for agent in all_agents_config])}
-
-        Return "yes" when the last message:
-        - asks the user a question (ends with "?" or uses "you"/"User"), OR
-        - invites the user to respond (phrases like: "let us know", "feel free to ask", "what would you like", "should we", "can we", "would you like me to"), OR
-        - addresses "we/us" as a decision/query to the user.
-
-        Return "no" when the last message:
-        - is a command or question to a specific agent by name, OR
-        - is a statement clearly addressed to another agent.
+        Reply by giving your full reasoning, and the verdict. The verdict should be either "yes" or "no".
 
-        Commands addressed to "you" or "User" => "yes".
-        If uncertain, return "yes".
-        Ignore any debug/metadata like "PC_CTX" or JSON blobs.
-
-        Provide reasoning then the verdict ("yes" or "no").
+        You are part of a group chat with several AI agents and a user. 
+        The agents are names are: 
+            {",".join([f"{agent['name']}" for agent in all_agents_config])}
 
+        If the most recent message is a question addressed to the user, return "yes".
+        If the question is addressed to "we" or "us", return "yes". For example, if the question is "Should we proceed?", return "yes".
+        If the question is addressed to another agent, return "no".
+        If it is a statement addressed to another agent, return "no".
+        Commands addressed to a specific agent should result in 'no' if there is clear identification of the agent.
+        Commands addressed to "you" or "User" should result in 'yes'.
+        If you are not certain, return "yes".
+        Ignore any system metadata or patient context snapshots such as lines starting with "PATIENT_CONTEXT_JSON".  # >>> added ignore rule
+        Treat internal handoff phrases like "back to you <AgentName>" as NOT terminating (answer is still being routed).  # >>> added handoff rule
+
+        EXAMPLES:
+            - "User, can you confirm the correct patient ID?" => "yes"
+            - "*ReportCreation*: Please compile the patient timeline. Let's proceed with *ReportCreation*." => "no" (ReportCreation is an agent)
+            - "*ReportCreation*, please proceed ..." => "no" (ReportCreation is an agent)
+            - "If you have any further questions or need assistance, feel free to ask." => "yes"
+            - "Let's proceed with Radiology." => "no" (Radiology is an agent)
+            - "*PatientStatus*, please use ..." => "no" (PatientStatus is an agent)
         History:
         {{{{$history}}}}
         """,
-        prompt_execution_settings=termination_settings,
+        prompt_execution_settings=settings
     )
-
     agents = [_create_agent(agent) for agent in all_agents_config]
 
     def evaluate_termination(result):
-        """Evaluate termination decision from structured output."""
+        logger.info(f"Termination function result: {result}")
+        # >>> added deterministic pre-checks to avoid premature termination on patient context or handoff
         try:
-            rule = ChatRule.model_validate_json(str(result.value[0]))
-            should_terminate = rule.verdict == "yes"
-            logger.debug(
-                "Termination decision: %s | Reasoning: %s",
-                should_terminate,
-                rule.reasoning,
-            )
-            return should_terminate
-        except Exception as e:
-            logger.error("Termination function error: %s", e)
-            return False
+            if chat_ctx.chat_history.messages:
+                last = chat_ctx.chat_history.messages[-1]
+                # Robust flatten
+                if hasattr(last, "content"):
+                    c = last.content
+                    if isinstance(c, list):
+                        last_text = " ".join(getattr(p, "text", str(p)) for p in c)
+                    else:
+                        last_text = str(c)
+                else:
+                    last_text = str(last)
+                lt = last_text.lower()
+                if lt.startswith("patient_context_json"):
+                    logger.debug("Termination override: patient context system message -> continue")
+                    return False
+                if "back to you" in lt:
+                    logger.debug("Termination override: internal handoff phrase -> continue")
+                    return False
+        except Exception:
+            pass
+        rule = ChatRule.model_validate_json(str(result.value[0]))
+        return rule.verdict == "yes"
 
     def evaluate_selection(result):
-        """Evaluate agent selection from structured output."""
-        try:
-            rule = ChatRule.model_validate_json(str(result.value[0]))
-            selected_agent = (
-                rule.verdict
-                if rule.verdict
-                in [agent["name"] for agent in all_agents_config]
-                else facilitator
-            )
-            logger.debug(
-                "Selected agent: %s | Reasoning: %s", selected_agent, rule.reasoning
-            )
-            return selected_agent
-        except Exception as e:
-            logger.error("Selection function error: %s", e)
-            return facilitator
+        logger.info(f"Selection function result: {result}")
+        rule = ChatRule.model_validate_json(str(result.value[0]))
+        return rule.verdict if rule.verdict in [agent["name"] for agent in all_agents_config] else facilitator
 
     chat = AgentGroupChat(
         agents=agents,
         chat_history=chat_ctx.chat_history,
         selection_strategy=KernelFunctionSelectionStrategy(
             function=selection_function,
-            kernel=orchestrator_kernel,
+            kernel=_create_kernel_with_chat_completion(),
             result_parser=evaluate_selection,
             agent_variable_name="agents",
             history_variable_name="history",
-            arguments=selection_args,
+            arguments=arguments,
         ),
         termination_strategy=KernelFunctionTerminationStrategy(
-            agents=[agent for agent in agents if agent.name == facilitator],
+            agents=[
+                agent for agent in agents if agent.name == facilitator
+            ],  # Only facilitator decides if the conversation ends
             function=termination_function,
-            kernel=orchestrator_kernel,
+            kernel=_create_kernel_with_chat_completion(),
             result_parser=evaluate_termination,
             agent_variable_name="agents",
             history_variable_name="history",
             maximum_iterations=30,
-            history_reducer=ChatHistoryTruncationReducer(target_count=1, auto_reduce=True),
-            arguments=termination_args,
+            # Termination only looks at the last message
+            history_reducer=ChatHistoryTruncationReducer(
+                target_count=1, auto_reduce=True
+            ),
+            arguments=arguments,
         ),
     )
 
-    logger.info("Group chat created successfully with %d agents", len(agents))
     return (chat, chat_ctx)
diff --git a/src/routes/api/chats.py b/src/routes/api/chats.py
index dc0a5f0..0677475 100644
--- a/src/routes/api/chats.py
+++ b/src/routes/api/chats.py
@@ -176,11 +176,23 @@ async def _handle_clear_command(content: str, chat_context) -> bool:
 
     @router.get("/api/agents", response_model=AgentsResponse)
     async def get_available_agents():
+        """
+        Returns a list of all available agents that can be mentioned in messages.
+        """
         try:
-            return AgentsResponse(agents=[a["name"] for a in agent_config])
+            # Extract agent names from the agent_config
+            agent_names = [agent["name"] for agent in agent_config if "name" in agent]
+
+            # Return the list of agent names
+            return JSONResponse(
+                content={"agents": agent_names, "error": None}
+            )
         except Exception as e:
-            logger.error("Error getting agents: %s", e)
-            return AgentsResponse(agents=[], error=str(e))
+            logger.exception(f"Error getting available agents: {e}")
+            return JSONResponse(
+                content={"agents": [], "error": str(e)},
+                status_code=500
+            )
 
     @router.websocket("/api/ws/chats/{chat_id}/messages")
     async def websocket_chat_endpoint(websocket: WebSocket, chat_id: str):
diff --git a/src/services/patient_context_analyzer.py b/src/services/patient_context_analyzer.py
index 97dd902..515de99 100644
--- a/src/services/patient_context_analyzer.py
+++ b/src/services/patient_context_analyzer.py
@@ -4,7 +4,7 @@
 import logging
 import os
 import time
-from typing import Optional, Literal, Tuple
+from typing import Optional, Tuple
 
 from semantic_kernel import Kernel
 from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
@@ -14,17 +14,18 @@
 from semantic_kernel.contents import ChatHistory
 from semantic_kernel.functions import kernel_function
 
-from data_models.patient_context_models import PatientContextDecision
+from data_models.patient_context_models import (
+    PatientContextDecision,
+    AnalyzerAction,  # For legacy wrapper typing
+)
 
 logger = logging.getLogger(__name__)
 
-AnalyzerAction = Literal["NONE", "CLEAR", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED"]
-
 
 class PatientContextAnalyzer:
     """
     Patient context analyzer using Semantic Kernel structured output with JSON schema.
-    Follows Microsoft best practices for structured LLM outputs.
+    Produces a PatientContextDecision (action + optional patient_id + reasoning).
     """
 
     def __init__(
@@ -33,6 +34,7 @@ def __init__(
         token_provider=None,
         api_version: Optional[str] = None,
     ):
+        # Resolve deployment (priority: explicit arg > dedicated env var > general deployment var)
         self.deployment_name = (
             deployment_name
             or os.getenv("PATIENT_CONTEXT_DECIDER_DEPLOYMENT_NAME")
@@ -40,11 +42,19 @@ def __init__(
         )
         if not self.deployment_name:
             raise ValueError("No deployment name for patient context analyzer.")
+
+        # Resolve API version (explicit > env > stable default)
         self.api_version = api_version or os.getenv("AZURE_OPENAI_API_VERSION") or "2024-10-21"
+
         self._token_provider = token_provider
 
-        logger.info(f"PatientContextAnalyzer initialized with deployment: {self.deployment_name}")
+        logger.info(
+            "PatientContextAnalyzer initialized with deployment=%s api_version=%s",
+            self.deployment_name,
+            self.api_version,
+        )
 
+        # Initialize kernel + service once (lightweight)
         self._kernel = Kernel()
         self._kernel.add_service(
             AzureChatCompletion(
@@ -74,7 +84,7 @@ async def analyze_decision(
             known_patient_ids: List of known patient IDs in this session
 
         Returns:
-            PatientContextDecision: Structured decision with action, patient_id, and reasoning
+            PatientContextDecision
         """
         if known_patient_ids is None:
             known_patient_ids = []
@@ -83,7 +93,7 @@ async def analyze_decision(
             return PatientContextDecision(
                 action="NONE",
                 patient_id=None,
-                reasoning="Empty or whitespace user input; no action needed."
+                reasoning="Empty or whitespace user input; no action needed.",
             )
 
         system_prompt = f"""You are a patient context analyzer for healthcare conversations.
@@ -115,16 +125,14 @@ async def analyze_decision(
             chat_history.add_system_message(system_prompt)
             chat_history.add_user_message(f"User input: {user_text}")
 
-            # Use AzureChatPromptExecutionSettings with response_format for structured output
             execution_settings = AzureChatPromptExecutionSettings(
                 service_id="patient_context_analyzer",
                 max_tokens=200,
                 temperature=0.1,
-                response_format=PatientContextDecision,  # This generates the JSON schema automatically
+                response_format=PatientContextDecision,  # Structured JSON schema enforced
             )
 
             svc = self._kernel.get_service("patient_context_analyzer")
-
             results = await svc.get_chat_message_contents(
                 chat_history=chat_history,
                 settings=execution_settings,
@@ -135,58 +143,56 @@ async def analyze_decision(
                 return PatientContextDecision(
                     action="NONE",
                     patient_id=None,
-                    reasoning="No response from analyzer; defaulting to NONE."
+                    reasoning="No response from analyzer; defaulting to NONE.",
                 )
 
-            # Parse the structured response
             content = results[0].content
 
-            # Handle both string and already-parsed responses
+            # Parse string JSON or direct dict (SK provider may return either)
             if isinstance(content, str):
                 try:
                     decision = PatientContextDecision.model_validate_json(content)
                 except Exception as e:
-                    logger.error(f"Failed to parse structured response: {e}")
+                    logger.error("Failed to parse structured response: %s", e)
                     return PatientContextDecision(
                         action="NONE",
                         patient_id=None,
-                        reasoning=f"Parse error: {str(e)[:30]}..."
+                        reasoning=f"Parse error: {str(e)[:30]}...",
                     )
             elif isinstance(content, dict):
                 try:
                     decision = PatientContextDecision.model_validate(content)
                 except Exception as e:
-                    logger.error(f"Failed to validate structured response: {e}")
+                    logger.error("Failed to validate structured response: %s", e)
                     return PatientContextDecision(
                         action="NONE",
                         patient_id=None,
-                        reasoning=f"Validation error: {str(e)[:30]}..."
+                        reasoning=f"Validation error: {str(e)[:30]}...",
                     )
             else:
-                logger.warning(f"Unexpected response type: {type(content)}")
+                logger.warning("Unexpected response type: %s", type(content))
                 return PatientContextDecision(
                     action="NONE",
                     patient_id=None,
-                    reasoning="Unexpected response format; defaulting to NONE."
+                    reasoning="Unexpected response format; defaulting to NONE.",
                 )
 
             logger.info(
-                f"Patient context decision: {decision.action} | "
-                f"Patient: {decision.patient_id} | "
-                f"Reasoning: {decision.reasoning}"
+                "Patient context decision: action=%s patient_id=%s reasoning=%s",
+                decision.action,
+                decision.patient_id,
+                decision.reasoning,
             )
-
             return decision
 
         except Exception as e:
-            logger.error(f"Patient context analysis failed: {e}")
+            logger.error("Patient context analysis failed: %s", e)
             return PatientContextDecision(
                 action="NONE",
                 patient_id=None,
-                reasoning=f"Analysis error: {str(e)[:30]}..."
+                reasoning=f"Analysis error: {str(e)[:30]}...",
             )
 
-    # Wrapper for backward compatibility - returns timing info
     async def analyze_with_timing(
         self,
         user_text: str,
@@ -194,50 +200,50 @@ async def analyze_with_timing(
         known_patient_ids: list[str],
     ) -> Tuple[PatientContextDecision, float]:
         """
-        Analyze with timing information for backward compatibility.
+        Analyze with timing information (backward-compat convenience wrapper).
         """
         start_time = time.time()
-
         decision = await self.analyze_decision(
             user_text=user_text,
             prior_patient_id=prior_patient_id,
             known_patient_ids=known_patient_ids,
         )
-
         duration = time.time() - start_time
         return decision, duration
 
-    # Legacy wrapper (for existing callers)
     async def analyze(
         self,
         user_text: str,
         prior_patient_id: Optional[str],
         known_patient_ids: list[str],
     ) -> tuple[AnalyzerAction, Optional[str], float]:
-        """Legacy wrapper - use analyze_decision() for new code."""
+        """
+        Legacy wrapper returning (action, patient_id, duration).
+        Prefer analyze_decision() in new code.
+        """
         decision, duration = await self.analyze_with_timing(
             user_text, prior_patient_id, known_patient_ids
         )
         return decision.action, decision.patient_id, duration
 
     def reset_kernel(self):
-        """Reset the kernel and service instance to prevent LLM state contamination between patients."""
+        """
+        Reset the kernel/service (useful when switching patients to avoid cross-contamination).
+        """
         try:
-            if hasattr(self, "_kernel") and self._kernel:
-                current_deployment = self.deployment_name
-                current_api_version = self.api_version
-                token_provider = self._token_provider
-
-                self._kernel = Kernel()
-                self._kernel.add_service(
-                    AzureChatCompletion(
-                        service_id="patient_context_analyzer",
-                        deployment_name=current_deployment,
-                        api_version=current_api_version,
-                        ad_token_provider=token_provider,
-                    )
+            current_deployment = self.deployment_name
+            current_api_version = self.api_version
+            token_provider = self._token_provider
+
+            self._kernel = Kernel()
+            self._kernel.add_service(
+                AzureChatCompletion(
+                    service_id="patient_context_analyzer",
+                    deployment_name=current_deployment,
+                    api_version=current_api_version,
+                    ad_token_provider=token_provider,
                 )
-
-                logger.info("Kernel reset completed for patient context isolation")
+            )
+            logger.info("Kernel reset completed for patient context isolation")
         except Exception as e:
-            logger.warning(f"Error during kernel reset: {e}")
+            logger.warning("Error during kernel reset: %s", e)
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
index 1294294..1367d51 100644
--- a/src/services/patient_context_service.py
+++ b/src/services/patient_context_service.py
@@ -13,30 +13,43 @@
 
 logger = logging.getLogger(__name__)
 
-# Keep the constant so other modules (routes, bots) can import it
+# Exported constants / types
 PATIENT_CONTEXT_PREFIX = "PATIENT_CONTEXT_JSON"
 PATIENT_ID_PATTERN = re.compile(r"^patient_[0-9]+$")
-Decision = Literal["NONE", "UNCHANGED", "NEW_BLANK", "SWITCH_EXISTING",
-                   "CLEAR", "RESTORED_FROM_STORAGE", "NEEDS_PATIENT_ID"]
+Decision = Literal[
+    "NONE",
+    "UNCHANGED",
+    "NEW_BLANK",
+    "SWITCH_EXISTING",
+    "CLEAR",
+    "RESTORED_FROM_STORAGE",
+    "NEEDS_PATIENT_ID",
+]
 
 
 class PatientContextService:
     """
-    Registry-based patient context manager (clean version):
-    - Registry is authoritative for patient roster.
-    - Analyzer decides patient activation/switch/clear.
-    - No system message persistence (ephemeral injection happens outside this service).
-    - Per-patient chat history isolation performed by caller (route/bot) AFTER decision.
+    Registry-based patient context manager:
+    - Registry is authoritative patient roster + active pointer.
+    - Analyzer determines activation/switch/clear intent.
+    - Ephemeral system snapshot injection is performed by caller (routes/bots).
+    - Per‑patient isolation for chat history handled by caller after decision.
     """
 
     def __init__(self, analyzer: PatientContextAnalyzer, registry_accessor=None, context_accessor=None):
         self.analyzer = analyzer
         self.registry_accessor = registry_accessor
         self.context_accessor = context_accessor
-        logger.info("PatientContextService initialized (registry enabled: %s)", registry_accessor is not None)
+        logger.info(
+            "PatientContextService initialized (registry enabled: %s)",
+            registry_accessor is not None,
+        )
 
     async def _ensure_patient_contexts_from_registry(self, chat_ctx: ChatContext):
-        """Rebuild in-memory patient_contexts from registry snapshot each turn."""
+        """
+        Rebuild in-memory patient_contexts from the authoritative registry each turn.
+        Safe to call every turn (clears and repopulates).
+        """
         if not self.registry_accessor:
             return
         try:
@@ -46,72 +59,74 @@ async def _ensure_patient_contexts_from_registry(self, chat_ctx: ChatContext):
                 for pid, entry in patient_registry.items():
                     chat_ctx.patient_contexts[pid] = PatientContext(
                         patient_id=pid,
-                        facts=entry.get("facts", {})
+                        facts=entry.get("facts", {}),
                     )
         except Exception as e:
             logger.warning("Failed to load patient contexts from registry: %s", e)
 
     async def decide_and_apply(self, user_text: str, chat_ctx: ChatContext) -> tuple[Decision, TimingInfo]:
+        """
+        Analyze user input, decide patient context transition, and apply.
+        Flow:
+          1. Hydrate registry → in-memory contexts.
+          2. If no active patient, attempt silent restore (record timing if used).
+          3. Always run analyzer (enables first-turn activation).
+          4. Interpret analyzer action into service Decision.
+          5. Perform activation / switch / clear side-effects.
+          6. Return (Decision, TimingInfo).
+        """
         service_start = time.time()
-
-        # Always refresh from registry first
         await self._ensure_patient_contexts_from_registry(chat_ctx)
 
-        # Short heuristic skip
-        if user_text and len(user_text.strip()) <= 15 and not any(
-            k in user_text.lower() for k in ["patient", "clear", "switch"]
-        ):
-            if not chat_ctx.patient_id:
-                fb_start = time.time()
-                restored = await self._try_restore_from_storage(chat_ctx)
-                fb_dur = time.time() - fb_start
-                timing = TimingInfo(analyzer=0.0, storage_fallback=fb_dur, service=time.time() - service_start)
-                return ("RESTORED_FROM_STORAGE" if restored else "NONE", timing)
-            timing = TimingInfo(analyzer=0.0, storage_fallback=0.0, service=time.time() - service_start)
-            return "UNCHANGED", timing
+        restored = False
+        fallback_dur = 0.0
+        if not chat_ctx.patient_id:
+            fb_start = time.time()
+            if await self._try_restore_from_storage(chat_ctx):
+                restored = True
+                fallback_dur = time.time() - fb_start
 
         decision_model, analyzer_dur = await self.analyzer.analyze_with_timing(
             user_text=user_text,
             prior_patient_id=chat_ctx.patient_id,
             known_patient_ids=list(chat_ctx.patient_contexts.keys()),
         )
-
         action = decision_model.action
         pid = decision_model.patient_id
-        fallback_dur = 0.0
 
         if action == "CLEAR":
             await self._archive_all_and_recreate(chat_ctx)
-            timing = TimingInfo(analyzer=analyzer_dur, storage_fallback=0.0, service=time.time() - service_start)
+            timing = TimingInfo(
+                analyzer=analyzer_dur,
+                storage_fallback=fallback_dur,
+                service=time.time() - service_start,
+            )
             return "CLEAR", timing
 
         if action in ("ACTIVATE_NEW", "SWITCH_EXISTING"):
             if not pid or not PATIENT_ID_PATTERN.match(pid):
-                decision = "NEEDS_PATIENT_ID"
+                decision: Decision = "NEEDS_PATIENT_ID"
             else:
                 decision = await self._activate_patient_with_registry(pid, chat_ctx)
-        elif action == "NONE":
-            if not chat_ctx.patient_id:
-                fb_start = time.time()
-                restored = await self._try_restore_from_storage(chat_ctx)
-                fallback_dur = time.time() - fb_start
-                decision = "RESTORED_FROM_STORAGE" if restored else "NONE"
-            else:
-                decision = "UNCHANGED"
         elif action == "UNCHANGED":
             decision = "UNCHANGED"
+        elif action == "NONE":
+            decision = "RESTORED_FROM_STORAGE" if restored and chat_ctx.patient_id else "NONE"
         else:
             decision = "NONE"
 
         timing = TimingInfo(
             analyzer=analyzer_dur,
             storage_fallback=fallback_dur,
-            service=time.time() - service_start
+            service=time.time() - service_start,
         )
-        # NOTE: No system message injection here (handled by caller).
         return decision, timing
 
     async def set_explicit_patient_context(self, patient_id: str, chat_ctx: ChatContext) -> bool:
+        """
+        Explicitly set active patient (external caller / override path).
+        Returns True if set; False if invalid patient_id.
+        """
         if not patient_id or not PATIENT_ID_PATTERN.match(patient_id):
             return False
 
@@ -130,7 +145,9 @@ async def set_explicit_patient_context(self, patient_id: str, chat_ctx: ChatCont
         return True
 
     async def _archive_all_and_recreate(self, chat_ctx: ChatContext) -> None:
-        """Archive all session + patient files + registry then clear memory."""
+        """
+        Archive session + all patient histories + registry, then clear in-memory state.
+        """
         if chat_ctx.patient_id:
             self.analyzer.reset_kernel()
 
@@ -146,10 +163,12 @@ async def _archive_all_and_recreate(self, chat_ctx: ChatContext) -> None:
         folder = f"archive/{timestamp}"
 
         if self.context_accessor:
+            # Session
             try:
                 await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, None, folder)
             except Exception as e:
                 logger.warning("Archive session failed: %s", e)
+            # Each patient
             for pid in all_patient_ids:
                 try:
                     await self.context_accessor.archive_to_folder(chat_ctx.conversation_id, pid, folder)
@@ -164,9 +183,11 @@ async def _archive_all_and_recreate(self, chat_ctx: ChatContext) -> None:
 
         chat_ctx.patient_id = None
         chat_ctx.patient_contexts.clear()
-        chat_ctx.chat_history.messages.clear()
 
     async def _update_registry_storage(self, chat_ctx: ChatContext):
+        """
+        Write/merge current active patient entry into registry (active pointer updated).
+        """
         if not (self.registry_accessor and chat_ctx.patient_id):
             return
         current = chat_ctx.patient_contexts.get(chat_ctx.patient_id)
@@ -175,19 +196,25 @@ async def _update_registry_storage(self, chat_ctx: ChatContext):
         entry = {
             "patient_id": chat_ctx.patient_id,
             "facts": current.facts,
-            "conversation_id": chat_ctx.conversation_id
+            "conversation_id": chat_ctx.conversation_id,
         }
         try:
             await self.registry_accessor.update_patient_registry(
                 chat_ctx.conversation_id,
                 chat_ctx.patient_id,
                 entry,
-                chat_ctx.patient_id
+                chat_ctx.patient_id,  # update active pointer
             )
         except Exception as e:
             logger.warning("Failed registry update: %s", e)
 
     async def _activate_patient_with_registry(self, patient_id: str, chat_ctx: ChatContext) -> Decision:
+        """
+        Activate or switch patient. Returns:
+          - UNCHANGED if already active
+          - SWITCH_EXISTING if switching to existing
+          - NEW_BLANK if creating a new patient context
+        """
         if chat_ctx.patient_id == patient_id:
             return "UNCHANGED"
         if chat_ctx.patient_id and patient_id != chat_ctx.patient_id:
@@ -200,13 +227,17 @@ async def _activate_patient_with_registry(self, patient_id: str, chat_ctx: ChatC
             await self._update_registry_storage(chat_ctx)
             return "SWITCH_EXISTING"
 
+        # New patient
         chat_ctx.patient_contexts[patient_id] = PatientContext(patient_id=patient_id)
         chat_ctx.patient_id = patient_id
         await self._update_registry_storage(chat_ctx)
         return "NEW_BLANK"
 
     async def _try_restore_from_storage(self, chat_ctx: ChatContext) -> bool:
-        """Restore active patient from registry (no legacy file scanning)."""
+        """
+        If there is no active patient in-memory, attempt to restore last active from registry.
+        Returns True if restored.
+        """
         if not self.registry_accessor:
             return False
         try:

From 6e29923e950d4d5f6cc62ddc1b6ecd34d4be1a1f Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Tue, 30 Sep 2025 15:03:11 +0000
Subject: [PATCH 19/20] clean up further

---
 docs/patient_context.md                  | 397 ++++------
 docs/patient_context_comprehensive.md    | 938 +++++++++++++++++++++++
 src/data_models/chat_context_accessor.py |  28 +-
 src/group_chat.py                        |   8 +-
 src/healthcare_agents/agent.py           |  26 +-
 src/services/patient_context_service.py  |   3 +-
 6 files changed, 1130 insertions(+), 270 deletions(-)
 create mode 100644 docs/patient_context_comprehensive.md

diff --git a/docs/patient_context.md b/docs/patient_context.md
index 4a2c866..10ffaa3 100644
--- a/docs/patient_context.md
+++ b/docs/patient_context.md
@@ -1,94 +1,120 @@
-# Patient Context Management (Current Architecture)
+# Patient Context Management
 
-This document describes the current (ephemeral, registry‑based) patient context model. It replaces any legacy behavior that persisted system snapshot messages or embedded timing metadata in `PATIENT_CONTEXT_JSON`.
+The Healthcare Agent Orchestrator uses an ephemeral, registry‑backed model to maintain isolated conversational state per patient inside a single conversation. This document explains the current implementation, how patient IDs are detected and validated, and how the system persists, restores, and clears patient context safely.
 
----
+> [!IMPORTANT]
+> `PATIENT_CONTEXT_JSON` system snapshot messages are ephemeral. They are injected each turn and never persisted. The registry is the single source of truth for the active patient and roster.
 
-## ✅ Core Goals
+## Core Objectives
 
-| Goal | Current Mechanism |
-|------|-------------------|
-| Patient isolation | Separate per‑patient history blobs: `patient_{id}_context.json` |
-| Multi-patient roster | Central registry: `patient_context_registry.json` (authoritative) |
-| Ephemeral grounding | Fresh `PATIENT_CONTEXT_JSON` system snapshot injected each turn (never persisted) |
-| Low-noise storage | Only user + agent dialogue retained; snapshots stripped before write |
-| Safe switching | Analyzer governs transitions; kernel reset only when changing active patient |
-| Clear operation | Archives session + all patient histories + registry, then resets in-memory state |
+| Objective | Mechanism |
+|-----------|-----------|
+| Patient isolation | Separate per‑patient history files (`patient_{id}_context.json`) |
+| Multi‑patient roster | Central registry file (`patient_context_registry.json`) |
+| Ephemeral grounding | Fresh `PATIENT_CONTEXT_JSON` snapshot every turn (index 0) |
+| Low‑noise storage | Snapshots stripped before persistence |
+| Safe switching & activation | LLM analyzer + service validation + kernel reset on change |
+| Complete clear/reset | Archives session, all patient histories, and registry in timestamped folder |
 
----
+## High‑Level Turn Flow
 
-## 🔄 High‑Level Turn Flow
+1. Load session `ChatContext` (no patient file yet).
+2. Check for clear command (archive + reset if present).
+3. Run `PatientContextService.decide_and_apply()`:
+   - Hydrate registry into `chat_ctx.patient_contexts`.
+   - Attempt silent restore if no active patient.
+   - Invoke analyzer (unless short-message heuristic skip).
+   - Apply decision (activate / switch / clear / none).
+4. If patient active: load that patient’s stored history into memory.
+5. Strip any previous `PATIENT_CONTEXT_JSON` system snapshot(s).
+6. Inject a new snapshot (ephemeral).
+7. Append user message.
+8. Run multi-agent orchestration (selection + termination).
+9. (Teams) Append single guarded `PT_CTX` audit footer.
+10. Persist updated history (patient-specific if active else session).
+11. Registry already reflects new active pointer (if changed).
 
-1. Load the session `ChatContext` (no patient file loaded yet).
-2. If a clear command was issued: archive everything, reset state, send “cleared” reply, stop.
-3. Call `PatientContextService.decide_and_apply()`:
-   - Hydrate `chat_ctx.patient_contexts` from the registry (source of truth).
-   - Apply any transition: activate, switch, clear, restore, or no-op.
-4. If a patient is now active, load that patient’s isolated chat history (replacing the session history in memory).
-5. Remove any prior ephemeral `PATIENT_CONTEXT_JSON` system snapshot(s) from memory.
-6. Construct and inject a fresh ephemeral snapshot system message (not persisted).
-7. Append the raw user message.
-8. Run multi-agent orchestration (Orchestrator + specialists).
-9. (Teams only) Append a single guarded `PT_CTX` audit footer (never duplicates).
-10. Persist:
-    - Write to the patient file if `chat_ctx.patient_id` is set; otherwise to the session file.
-    - The ephemeral snapshot is excluded (it was already filtered before persistence).
-11. The registry already reflects any activation / switch / new patient from step 3.
+> [!NOTE]
+> Only one snapshot should exist in memory at any time. The system enforces this by stripping before reinjection.
 
----
+## Decision Engine (PatientContextAnalyzer)
 
-## 🧠 Decision Engine (`PatientContextAnalyzer`)
-
-Produces an action plus (optionally) a `patient_id`.
+Structured LLM classifier producing `PatientContextDecision (action, patient_id?, reasoning)`.
 
 | Action | Meaning |
 |--------|---------|
-| `NONE` | No patient context required (general/meta turn) |
-| `ACTIVATE_NEW` | Start a brand-new patient (ID extracted) |
-| `SWITCH_EXISTING` | Switch to an existing (registry) patient |
-| `UNCHANGED` | Keep the current active patient |
-| `CLEAR` | User intends to clear all patient context |
-| (Service-derived) `RESTORED_FROM_STORAGE` | Previous active patient resurrected (no active in-memory, registry had one) |
-| (Service-derived) `NEEDS_PATIENT_ID` | User intent implies patient focus but no resolvable ID provided |
+| `NONE` | General / meta turn (no context change) |
+| `ACTIVATE_NEW` | Activate a new patient (ID extracted) |
+| `SWITCH_EXISTING` | Switch to known patient |
+| `UNCHANGED` | Keep current active patient |
+| `CLEAR` | User intent to wipe contexts |
+| (Service) `RESTORED_FROM_STORAGE` | Silent revival of last active from registry |
+| (Service) `NEEDS_PATIENT_ID` | User intended change but no valid ID provided |
 
-Service-level post-processing can reclassify into operational decisions like `NEW_BLANK`.
+Service may reinterpret `ACTIVATE_NEW` as `NEW_BLANK` (new record).
 
----
+### Patient ID Detection
 
-## 🏛 Registry (Single Source of Truth)
+| Stage | Logic |
+|-------|-------|
+| Heuristic Skip | Short (≤ 15 chars) and no `patient|clear|switch` → bypass analyzer |
+| LLM Extraction | Analyzer only returns `patient_id` for `ACTIVATE_NEW` / `SWITCH_EXISTING` |
+| Regex Validation | Must match `^patient_[0-9]+$` (`PATIENT_ID_PATTERN`) |
+| New vs Existing | In registry → switch; not in registry → new blank context |
+| Invalid / Missing | Activation intent without valid pattern → `NEEDS_PATIENT_ID` |
+| Silent Restore | Action `NONE` + no active + registry has prior active → restore |
+| Isolation Reset | Patient change triggers `analyzer.reset_kernel()` |
 
-File: `patient_context_registry.json`
+**Examples**
 
-```json
-{
-  "conversation_id": "uuid",
-  "active_patient_id": "patient_16",
-  "patient_registry": {
-    "patient_4": {
-      "patient_id": "patient_4",
-      "facts": {},
-      "conversation_id": "uuid",
-      "last_updated": "2025-09-28T14:55:41.221939+00:00"
-    },
-    "patient_16": {
-      "patient_id": "patient_16",
-      "facts": {},
-      "conversation_id": "uuid",
-      "last_updated": "2025-09-28T15:04:10.119003+00:00"
-    }
-  },
-  "last_updated": "2025-09-28T15:04:10.119020+00:00"
-}
+| User Input | Analyzer Action | Service Decision | Notes |
+|------------|-----------------|------------------|-------|
+| `start review for patient_4` | `ACTIVATE_NEW` | `NEW_BLANK` | New patient |
+| `switch to patient_4` | `SWITCH_EXISTING` | `SWITCH_EXISTING` | Already known |
+| `patient_4` | `SWITCH_EXISTING` | `SWITCH_EXISTING` | Minimal intent |
+| `switch patient please` | `ACTIVATE_NEW` | `NEEDS_PATIENT_ID` | Missing ID |
+| `clear patient` | `CLEAR` | `CLEAR` | Full reset |
+| `ok` | (Skipped) | `UNCHANGED` or restore | Too short for analysis |
+
+> [!TIP]
+> To support additional formats (e.g., MRN), update `PATIENT_ID_PATTERN` and adjust the analyzer prompt description.
+
+### Customizing Patient ID Format
+
+The system validates patient IDs using a configurable regex pattern.
+
+**Default Pattern:** `^patient_[0-9]+$` (e.g., `patient_4`, `patient_123`)
+
+**To Use a Different Format:**
+
+Set the `PATIENT_ID_PATTERN` environment variable before starting the application:
+
+```bash
+# Example: Accept MRN format
+export PATIENT_ID_PATTERN="^mrn-[A-Z0-9]{6}$"
+
+# Example: Accept multiple formats (either patient_N or mrn-XXXXXX)
+export PATIENT_ID_PATTERN="^(patient_[0-9]+|mrn-[A-Z0-9]{6})$"
+
+# Then start the app
+python src/app.py
 ```
 
-Characteristics:
-- Contains only roster + active pointer.
-- No embedded system message text.
-- `facts` is a lightweight dict (reserved for future enrichment).
+**Important:** When changing the pattern, ensure the analyzer prompt in `patient_context_analyzer.py` reflects the new format so the LLM extracts IDs correctly.
 
----
+## Registry (Source of Truth)
 
-## 🗂 Storage Layout
+`patient_context_registry.json` stores:
+- `active_patient_id`
+- `patient_registry` map of patient entries:
+  - `patient_id`
+  - `facts` (lightweight dict, extensible)
+  - `conversation_id`
+  - timestamps
+
+No system snapshots or timing entries are stored here.
+
+## Storage Layout
 
 ```
 {conversation_id}/
@@ -104,215 +130,123 @@ Characteristics:
         └── {timestamp}_patient_context_registry_archived.json
 ```
 
-Key behavior:
-- `PATIENT_CONTEXT_JSON` messages never persist.
-- Only dialogue + ancillary arrays (display/output) remain.
+> [!NOTE]
+> Only dialogue and display/output arrays are persisted—never ephemeral snapshots.
 
----
+## Ephemeral Snapshot
 
-## 💬 Ephemeral Snapshot Format
-
-Injected each turn at index 0 of `chat_ctx.chat_history.messages`:
+Format (in memory only, first message):
 
 ```text
-PATIENT_CONTEXT_JSON: {"conversation_id":"uuid","patient_id":"patient_16","all_patient_ids":["patient_4","patient_15","patient_16"],"generated_at":"2025-09-28T15:07:44.012345Z"}
+PATIENT_CONTEXT_JSON: {"conversation_id":"uuid","patient_id":"patient_16","all_patient_ids":["patient_4","patient_15","patient_16"],"generated_at":"2025-09-30T16:32:11.019Z"}
 ```
 
-Differences vs legacy:
-
-| Aspect | Legacy | Current |
-|--------|--------|---------|
-| Timing field (`timing_sec`) | Present | Removed |
-| Injection site | Inside service | Caller (route / bot) post-decision |
-| Persistence | Stored & reloaded | Rebuilt every turn (never stored) |
-| Cleanup | Service replaced old | Caller strips before reinjecting |
-| Purpose | Grounding (stale risk) | Always-fresh grounding snapshot |
-
-Rationale for removal of timing: operational concern, not reasoning signal.
-
----
-
-## 🧩 Runtime Data Model (Simplified)
+## Runtime Data Model
 
 ```python
 ChatContext:
   conversation_id: str
   patient_id: Optional[str]
-  patient_contexts: Dict[str, PatientContext]  # Hydrated from registry each turn
-  chat_history: Semantic Kernel chat history
+  patient_contexts: Dict[str, PatientContext]
+  chat_history: ChatHistory
 ```
 
-Hydration snippet:
+Hydration each turn:
 
 ```python
 await patient_context_service._ensure_patient_contexts_from_registry(chat_ctx)
-# chat_ctx.patient_contexts = { pid: PatientContext(...), ... }
 ```
 
-Only `patient_id` determines which file receives writes.
+## Isolation & Transitions
 
----
-
-## 🔐 Isolation Semantics
-
-| Operation | Effect |
+| Operation | Result |
 |-----------|--------|
-| Switch patient | Kernel reset + load that patient’s chat history into memory |
-| New patient | Kernel reset + start empty history |
-| Clear | Archive all (session, patients, registry) then wipe memory |
-| General (no patient) | Session-only evolution; `patient_id` stays `None` |
-| Restore (idle resume) | If no active but registry has a previous active → restore it |
-
----
+| New patient | Kernel reset + new context file |
+| Switch patient | Kernel reset + load patient history |
+| Clear | Archive all + wipe memory state |
+| Restore | Silent reactivation from registry pointer |
+| General turn | Session-only if no active patient |
 
-## 🧪 Short-Message Heuristic
+## Short-Message Heuristic
 
-Skip analyzer if:
-- Input length ≤ 15 chars AND
-- Lacks substrings: `patient`, `clear`, `switch`
+Skip analyzer when:
+- Length ≤ 15
+- No key substrings (`patient`, `clear`, `switch`)
 
 Outcomes:
-- Active patient exists → treat as `UNCHANGED`
-- None active → attempt restore → `RESTORED_FROM_STORAGE` or `NONE`
-
-Purpose: Avoid unnecessary model calls on handoff fragments (e.g., “back to you”).
-
----
-
-## 🛠 `PatientContextService` Responsibilities
+- Active patient → `UNCHANGED`
+- None → attempt restore → `RESTORED_FROM_STORAGE` or `NONE`
 
-Still does:
-- Sync from registry each invocation.
-- Run analyzer (unless heuristic skip).
-- Perform transitions: new / switch / clear / restore.
-- Reset kernel only on patient change.
-- Update registry on activation/switch.
+## PatientContextService Responsibilities
 
-No longer does:
-- Inject snapshot messages.
-- Embed timing into snapshots.
-- Persist patient metadata within chat histories.
+- Hydrate registry → memory each invocation.
+- Attempt restoration if no active.
+- Run analyzer (unless skipped).
+- Apply decision + side effects:
+  - Activation / switch → registry update, optional kernel reset
+  - Clear → archive + wipe
+- Return `(decision, TimingInfo)`.
+- Never inject snapshot (caller handles ephemeral injection).
 
-Return signature (conceptually):
-```
-(decision: Decision, timing: TimingInfo)
-```
-
-Service-level decision literal union:
+Decision union:
 ```
 "NONE" | "UNCHANGED" | "NEW_BLANK" | "SWITCH_EXISTING" |
 "CLEAR" | "RESTORED_FROM_STORAGE" | "NEEDS_PATIENT_ID"
 ```
 
----
-
-## 🧵 Web vs Teams Parity
 
-Shared pipeline:
-1. Strip old snapshot(s).
-2. Inject new snapshot (fresh `generated_at`).
-3. Run group chat orchestration.
-4. Persist history (snapshot excluded).
-5. Snapshot grounds roster/meta reasoning.
+## Example Turn (Persisted vs In-Memory)
 
-Teams additions:
-- Human-readable `PT_CTX` footer (single insertion via guard).
-- Footer includes `Session ID:`.
+In memory:
 
-Guard pattern:
-```python
-if all_pids and "PT_CTX:" not in response.content:
-    # append audit footer once
 ```
-
----
-
-## 📎 Example Turn
-
-In-memory (transient):
-```
-[System] PATIENT_CONTEXT_JSON: {"conversation_id":"c123","patient_id":"patient_4","all_patient_ids":["patient_4"],"generated_at":"...Z"}
-[User] Provide history
-[Assistant:PatientHistory] Here is the complete patient data ...
+[System] PATIENT_CONTEXT_JSON: {...}
+[User] Start review for patient_4
+[Assistant:Orchestrator] Plan...
 ```
 
 Persisted (`patient_4_context.json`):
+
 ```json
 {
   "conversation_id": "c123",
   "patient_id": "patient_4",
   "chat_history": [
-    {"role": "user", "content": "Provide history"},
-    {"role": "assistant", "name": "PatientHistory", "content": "Here is the complete patient data ..."}
-  ],
-  "patient_data": [],
-  "display_blob_urls": [],
-  "output_data": []
+    {"role": "user", "content": "Start review for patient_4"},
+    {"role": "assistant", "name": "Orchestrator", "content": "Plan..."}
+  ]
 }
 ```
 
-Snapshot absent by design.
+Snapshot intentionally absent.
 
----
+## Clear Operation
 
-## 🧽 Clear Operation
-
-Triggers on any of:
+Triggers on:
 ```
-"clear", "clear patient", "clear context", "clear patient context"
+clear | clear patient | clear context | clear patient context
 ```
 
-Steps:
-1. Archive session file, all patient files (registry-sourced), registry file.
-2. Reset: `patient_id = None`, `patient_contexts.clear()`, `chat_history.clear()`.
-3. Persist fresh empty session context.
-4. Reply with confirmation.
-
----
-
-## 🧾 Roster & Meta Queries
+Procedure:
+1. Archive (session, each patient file, registry).
+2. Reset in-memory context + histories.
+3. Persist empty session context.
+4. Respond with confirmation.
 
-Handled through Orchestrator prompt rules using the latest snapshot:
-- Use `all_patient_ids` + `patient_id`.
-- Never hallucinate absent patients.
-- Don’t “re-plan” when user repeats the already-active patient.
+## Roster & Meta Queries
 
-Stability aids:
-- Sort `all_patient_ids`.
-- (Optional future) Add `patient_count` or `_hint` if reasoning degrades.
+Agents derive:
+- Active patient → `patient_id`
+- Roster → `all_patient_ids` (sorted)
 
----
+Rules:
+- No hallucinated IDs.
+- Avoid redundant re-planning for same active patient mention.
 
-## 🛡 Why Ephemeral?
-
-| Legacy Issue | Current Resolution |
-|--------------|-------------------|
-| Persisted stale roster | Snapshot rebuilt every turn from registry |
-| Stacked duplicate system messages | Strip → reinject ensures exactly one |
-| Timing noise in reasoning | Removed from snapshot |
-| Confusion over authority | Registry authoritative; snapshot transient |
-| Unnecessary analyzer calls | Heuristic bypass for trivial handoffs |
-
----
-
-## 🧪 Validation Scenarios
-
-| Scenario | Expected |
-|----------|----------|
-| First mention “start review for patient_4” | Decision = `NEW_BLANK`; snapshot shows only `patient_4` |
-| Switch to existing other patient | Decision = `SWITCH_EXISTING`; kernel reset occurs |
-| Redundant switch to same patient | Decision = `UNCHANGED`; no reset |
-| Short handoff “back to you” | Analyzer skipped; `UNCHANGED` (if active) |
-| Clear then new command | Clean slate → next patient command = new activation |
-| Teams render | Single `PT_CTX` footer incl. Session ID |
-| Persistence audit | No `PATIENT_CONTEXT_JSON` lines in stored files |
-
----
-
-## 🛠 Code Reference (Filtering + Injection)
+## Code Reference (Filtering & Injection)
 
 ```python
-# Remove old snapshot(s)
+# Strip prior snapshot(s)
 chat_ctx.chat_history.messages = [
     m for m in chat_ctx.chat_history.messages
     if not (
@@ -335,25 +269,4 @@ sys_msg = ChatMessageContent(role=AuthorRole.SYSTEM, items=[TextContent(text=lin
 chat_ctx.chat_history.messages.insert(0, sys_msg)
 ```
 
-Teams footer guard (conceptual):
-```python
-if all_pids and "PT_CTX:" not in response.content:
-    # append audit footer once
-```
-
----
-
-## 🔮 Future Enhancements (Optional)
-
-| Idea | Rationale |
-|------|-----------|
-| Deterministic plan confirmation flag | Reduce reliance on prompt-only gating |
-| Snapshot `patient_count` field | Faster meta answers (no length calc) |
-| Registry `facts` enrichment | Richer grounding for specialized agents |
-| Test harness for decision invariants | Prevent regression in edge transitions |
-| LLM classification caching | Reduce analyzer calls for repeated short intents |
-
----
 
-Last updated: 2025-09-28  
-Status: Stable ephemeral model in production branch (`sekar/pc_poc`).
diff --git a/docs/patient_context_comprehensive.md b/docs/patient_context_comprehensive.md
new file mode 100644
index 0000000..d214346
--- /dev/null
+++ b/docs/patient_context_comprehensive.md
@@ -0,0 +1,938 @@
+# Patient Context Management - Comprehensive Guide
+
+This document provides a complete analysis of the **Patient Context Management System** migration from a single-conversation model to a multi-patient, registry-backed architecture with ephemeral snapshot grounding.
+
+> [!IMPORTANT]
+> This is a technical deep-dive document. For quick reference, see [`patient_context.md`](patient_context.md).
+
+---
+
+## Table of Contents
+
+- [Executive Summary](#executive-summary)
+- [Architecture Overview](#architecture-overview)
+- [New Components](#new-components)
+- [Modified Components](#modified-components)
+- [Complete Turn Flow](#complete-turn-flow)
+- [Migration Benefits](#migration-benefits)
+
+---
+
+## Executive Summary
+
+The Healthcare Agent Orchestrator has been enhanced with a **registry-backed, ephemeral snapshot architecture** to enable multi-patient conversational state management within a single conversation.
+
+### Key Achievements
+
+| Capability | Before | After |
+|------------|--------|-------|
+| **Patient Isolation** | Single conversation = single context | Multiple patients with isolated histories |
+| **Patient Switching** | Not supported | Seamless switching with kernel reset |
+| **Storage Model** | Single `chat_context.json` | Per-patient files + session + registry |
+| **Agent Grounding** | No patient awareness | Ephemeral snapshot each turn |
+| **Clear Operation** | Simple archive | Bulk archive (session + all patients + registry) |
+| **Patient Detection** | Manual/hardcoded | Automatic LLM-based classifier |
+| **Orchestration** | Facilitator loops, false terminations | Confirmation gate + termination overrides |
+
+---
+
+## Architecture Overview
+
+### Old Architecture (Single Context Model)
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                    User Interface                        │
+│              (Teams Bot / WebSocket API)                 │
+└──────────────────────┬──────────────────────────────────┘
+                       │
+                       ↓
+           ┌───────────────────────┐
+           │   ChatContext         │
+           │  - conversation_id    │
+           │  - chat_history       │ ← Single history
+           │  - patient_id (unused)│
+           └───────────┬───────────┘
+                       │
+                       ↓
+           ┌───────────────────────┐
+           │  Storage (Blob)       │
+           │  {conv_id}/           │
+           │    chat_context.json  │ ← One file
+           └───────────────────────┘
+```
+
+**Problems:**
+- ❌ No patient isolation (all messages in one history)
+- ❌ No patient switching capability
+- ❌ No patient awareness in agents
+- ❌ Facilitator loops (no confirmation gate)
+- ❌ False terminations (snapshot messages confused LLM)
+
+### New Architecture (Registry-Backed Ephemeral Model)
+
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│                         User Interface                               │
+│                   (Teams Bot / WebSocket API)                        │
+└───────────────────────────┬─────────────────────────────────────────┘
+                            │
+                            ↓
+        ┌───────────────────────────────────────────┐
+        │   PatientContextService                   │
+        │   - decide_and_apply()                    │
+        │   - Registry hydration                    │
+        │   - Silent restore                        │
+        │   - Validation                            │
+        └──────────┬────────────────────────────────┘
+                   │
+      ┌────────────┴─────────────┐
+      │                          │
+      ↓                          ↓
+┌─────────────────┐    ┌──────────────────────┐
+│ PatientContext  │    │ Registry Accessor    │
+│ Analyzer        │    │ (Source of Truth)    │
+│ - LLM Classifier│    │                      │
+│ - Structured    │    │ registry.json:       │
+│   Output        │    │ - active_patient_id  │
+│ - Intent        │    │ - patient_registry   │
+│   Detection     │    │   map                │
+└─────────────────┘    └──────────┬───────────┘
+                                  │
+                    ┌─────────────┴──────────────────────┐
+                    │                                     │
+                    ↓                                     ↓
+          ┌──────────────────┐              ┌───────────────────────┐
+          │  ChatContext     │              │  Storage (Blob)       │
+          │  - conversation_id│             │  {conv_id}/           │
+          │  - patient_id    │              │   session_context.json│
+          │  - patient_contexts│            │   patient_4_context   │
+          │  - chat_history  │◄────────────┤   patient_15_context  │
+          └────────┬─────────┘              │   registry.json       │
+                   │                        └───────────────────────┘
+                   │
+                   ↓
+    ┌──────────────────────────────────┐
+    │  Ephemeral Snapshot Injection    │
+    │  [0] SYSTEM: PATIENT_CONTEXT_JSON│ ← Generated each turn
+    │  [1] USER: message               │
+    │  [2] ASSISTANT: response         │
+    └──────────┬───────────────────────┘
+               │
+               ↓
+    ┌──────────────────────────┐
+    │  Group Chat              │
+    │  - Selection (w/ gate)   │
+    │  - Termination (w/       │
+    │    overrides)            │
+    │  - Agents (see snapshot) │
+    └──────────────────────────┘
+```
+
+**Benefits:**
+- ✅ **Per-patient isolation** - Separate history files
+- ✅ **Multi-patient roster** - Registry tracks all patients in session
+- ✅ **Ephemeral grounding** - Fresh snapshot each turn (never persisted)
+- ✅ **Automatic detection** - LLM analyzer classifies intent
+- ✅ **Safe switching** - Kernel reset on patient change
+- ✅ **Robust clear** - Bulk archive with timestamp folders
+- ✅ **Stable orchestration** - Confirmation gate + deterministic overrides
+
+---
+
+## New Components
+
+### 1. PatientContextAnalyzer
+
+**File:** `src/services/patient_context_analyzer.py`
+
+**Purpose:** LLM-based structured output classifier that determines patient context intent from user messages.
+
+**Key Features:**
+
+```python
+class PatientContextAnalyzer:
+    """
+    Analyzes user messages to determine patient context actions.
+    Uses Azure OpenAI with structured output for reliable classification.
+    """
+    
+    async def analyze_patient_context(
+        self,
+        user_text: str,
+        prior_patient_id: str | None,
+        known_patient_ids: list[str]
+    ) -> PatientContextDecision:
+        """
+        Returns structured decision:
+        - action: NONE | ACTIVATE_NEW | SWITCH_EXISTING | UNCHANGED | CLEAR
+        - patient_id: Extracted ID (for ACTIVATE_NEW/SWITCH_EXISTING only)
+        - reasoning: Brief explanation
+        """
+```
+
+**Decision Examples:**
+
+| User Input | Context | Action | patient_id | Reasoning |
+|------------|---------|--------|------------|-----------|
+| `"review patient_4"` | No active | `ACTIVATE_NEW` | `"patient_4"` | User explicitly requests patient_4 |
+| `"switch to patient_15"` | patient_4 active | `SWITCH_EXISTING` | `"patient_15"` | Explicit switch requested |
+| `"what's the diagnosis?"` | patient_4 active | `UNCHANGED` | `null` | Follow-up question for active patient |
+| `"clear patient"` | patient_4 active | `CLEAR` | `null` | User requests context reset |
+
+**Heuristic Skip:**
+- Messages ≤ 15 characters without keywords (`patient`, `clear`, `switch`) bypass the analyzer for efficiency
+- Returns `UNCHANGED` if patient active, `NONE` otherwise
+
+**Why This Component:**
+- **Automatic** - No manual parsing/regex
+- **Contextual** - Considers prior state and known patients
+- **Reliable** - Structured output ensures consistent format
+- **Explainable** - Reasoning field aids debugging
+- **Efficient** - Heuristic skip for short messages
+
+---
+
+### 2. PatientContextService
+
+**File:** `src/services/patient_context_service.py`
+
+**Purpose:** Orchestrates the complete patient context lifecycle - hydration, analysis, validation, and application.
+
+**Key Methods:**
+
+```python
+class PatientContextService:
+    """
+    Manages patient context lifecycle:
+    - Registry hydration
+    - Silent restoration
+    - Analyzer invocation
+    - Decision validation & application
+    - Side effects (kernel reset, archival)
+    """
+    
+    async def decide_and_apply(
+        self,
+        user_text: str,
+        chat_ctx: ChatContext
+    ) -> tuple[Decision, TimingInfo]:
+        """
+        Main orchestration method. Returns:
+        - Decision: Final service decision
+        - TimingInfo: Performance metrics
+        """
+```
+
+**Decision Pipeline:**
+
+```
+User Text
+  ↓
+1. Hydrate Registry → chat_ctx.patient_contexts
+  ↓
+2. Silent Restore Attempt (if no active patient)
+  ↓
+3. Heuristic Check (skip analyzer if short message)
+  ↓
+4. Analyzer Invocation (if not skipped)
+  ↓
+5. Validation & Transformation:
+   - ACTIVATE_NEW + new ID → NEW_BLANK
+   - ACTIVATE_NEW + exists → SWITCH_EXISTING
+   - ACTIVATE_NEW + invalid → NEEDS_PATIENT_ID
+   - SWITCH_EXISTING + invalid → NEEDS_PATIENT_ID
+   - CLEAR → archive + reset
+  ↓
+6. Apply Side Effects:
+   - Kernel reset (if patient change)
+   - Registry update
+   - Archive (if clear)
+  ↓
+7. Return (Decision, TimingInfo)
+```
+
+**Service Decisions:**
+
+```
+"NONE"                    - No patient context change
+"UNCHANGED"               - Keep current patient
+"NEW_BLANK"               - Activate new patient (reinterpreted ACTIVATE_NEW)
+"SWITCH_EXISTING"         - Switch to known patient
+"CLEAR"                   - Archive all and reset
+"RESTORED_FROM_STORAGE"   - Silent reactivation from registry
+"NEEDS_PATIENT_ID"        - User intent unclear, need valid ID
+```
+
+**Why This Component:**
+- **Centralized orchestration** - Single responsibility for patient lifecycle
+- **Consistent validation** - Regex pattern enforced (`PATIENT_ID_PATTERN`)
+- **Registry authority** - Always syncs with source of truth
+- **Performance tracking** - TimingInfo for monitoring
+- **Separation of concerns** - Service doesn't inject snapshots (caller responsibility)
+
+---
+
+### 3. PatientContextRegistry Accessor
+
+**File:** `src/data_models/patient_context_registry_accessor.py`
+
+**Purpose:** Manages persistence of the patient context registry (source of truth).
+
+**Registry Structure:**
+
+```json
+{
+  "active_patient_id": "patient_4",
+  "patient_registry": {
+    "patient_4": {
+      "patient_id": "patient_4",
+      "facts": {},
+      "conversation_id": "19:abc-123-def@thread.tacv2",
+      "created_at": "2025-09-30T16:30:00.000Z",
+      "updated_at": "2025-09-30T16:45:00.000Z"
+    },
+    "patient_15": {
+      "patient_id": "patient_15",
+      "facts": {},
+      "conversation_id": "19:abc-123-def@thread.tacv2",
+      "created_at": "2025-09-30T16:32:00.000Z",
+      "updated_at": "2025-09-30T16:40:00.000Z"
+    }
+  }
+}
+```
+
+**Key Methods:**
+
+```python
+class PatientContextRegistryAccessor:
+    async def read_registry(
+        self,
+        conversation_id: str
+    ) -> tuple[dict, str | None]:
+        """Returns (patient_registry, active_patient_id)"""
+    
+    async def write_registry(
+        self,
+        conversation_id: str,
+        patient_registry: dict,
+        active_patient_id: str | None
+    ) -> None:
+        """Persists registry to patient_context_registry.json"""
+    
+    async def archive_registry(
+        self,
+        conversation_id: str
+    ) -> None:
+        """Archives registry during clear operation"""
+```
+
+**Why This Component:**
+- **Source of truth** - Registry is authoritative for active patient
+- **Roster management** - Tracks all patients in session
+- **Extensible facts** - Can store patient-specific metadata
+- **Audit trail** - Timestamps for compliance
+- **Archival support** - Clean clear operations
+
+---
+
+### 4. PatientContext Data Models
+
+**File:** `src/data_models/patient_context_models.py`
+
+**Purpose:** Type-safe models for patient context operations.
+
+**Key Models:**
+
+```python
+class PatientContext:
+    """Represents a patient's context within a conversation."""
+    patient_id: str
+    facts: dict = field(default_factory=dict)
+    conversation_id: Optional[str] = None
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+
+
+class PatientContextDecision:
+    """Structured output from PatientContextAnalyzer."""
+    action: Literal["NONE", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED", "CLEAR"]
+    patient_id: Optional[str] = None
+    reasoning: str
+
+
+class TimingInfo:
+    """Performance metrics for patient context operations."""
+    analyzer_ms: Optional[float] = None
+    storage_fallback_ms: Optional[float] = None
+    service_total_ms: Optional[float] = None
+```
+
+**Why These Models:**
+- **Type safety** - Catches errors at development time
+- **Documentation** - Clear contracts for each component
+- **Extensibility** - Easy to add new fields (e.g., `facts`)
+- **Structured output** - Enforces LLM output format
+- **Observability** - Timing metrics for monitoring
+
+---
+
+## Modified Components
+
+### 1. ChatContext Data Model
+
+**File:** `src/data_models/chat_context.py`
+
+#### Before:
+
+```python
+class ChatContext:
+    def __init__(self, conversation_id: str):
+        self.conversation_id = conversation_id
+        self.chat_history = ChatHistory()
+        self.patient_id = None  # ← Unused field
+        # ... other fields
+```
+
+#### After:
+
+```python
+class ChatContext:
+    def __init__(self, conversation_id: str):
+        self.conversation_id = conversation_id
+        self.chat_history = ChatHistory()
+        self.patient_id = None  # ← NOW USED: Active patient pointer
+        self.patient_contexts: Dict[str, PatientContext] = {}  # ✅ NEW: Multi-patient roster
+        # ... other fields
+```
+
+**Key Changes:**
+
+| Field | Before | After |
+|-------|--------|-------|
+| `patient_id` | Unused | **Active patient pointer** (set by service) |
+| `patient_contexts` | ❌ N/A | ✅ **Dict[str, PatientContext]** - roster of all patients |
+| `chat_history` | Single history | **Swapped per-patient** (loaded from isolated files) |
+
+**Lifecycle Example:**
+
+```python
+# Turn 1: User mentions patient_4
+chat_ctx.patient_id = None
+chat_ctx.patient_contexts = {}
+  ↓ decide_and_apply()
+chat_ctx.patient_id = "patient_4"
+chat_ctx.patient_contexts = {
+    "patient_4": PatientContext(patient_id="patient_4", ...)
+}
+
+# Turn 2: User switches to patient_15
+  ↓ decide_and_apply()
+chat_ctx.patient_id = "patient_15"
+chat_ctx.patient_contexts = {
+    "patient_4": ...,
+    "patient_15": PatientContext(patient_id="patient_15", ...)
+}
+```
+
+---
+
+### 2. ChatContextAccessor (Storage Layer)
+
+**File:** `src/data_models/chat_context_accessor.py`
+
+This is one of the **most critical** changes - the accessor now handles per-patient file routing and ephemeral snapshot filtering.
+
+#### A. `get_blob_path()` - File Routing
+
+##### Before:
+```python
+def get_blob_path(self, conversation_id: str) -> str:
+    return f"{conversation_id}/chat_context.json"  # Single file
+```
+
+##### After:
+```python
+def get_blob_path(self, conversation_id: str, patient_id: str = None) -> str:
+    if patient_id:
+        return f"{conversation_id}/patient_{patient_id}_context.json"
+    return f"{conversation_id}/session_context.json"
+```
+
+**Storage Structure:**
+
+```
+BEFORE:
+conversation_123/
+  └── chat_context.json  ← All messages
+
+AFTER:
+conversation_123/
+  ├── session_context.json            ← Session-level (no patient)
+  ├── patient_patient_4_context.json  ← patient_4's history
+  ├── patient_patient_15_context.json ← patient_15's history
+  └── patient_context_registry.json   ← Source of truth
+```
+
+#### B. `serialize()` - Ephemeral Snapshot Filtering
+
+This is **CRITICAL** - ensures snapshots never get persisted.
+
+##### Before:
+```python
+@staticmethod
+def serialize(chat_ctx: ChatContext) -> str:
+    return json.dumps({
+        "chat_history": chat_ctx.chat_history.serialize(),  # Direct
+    })
+```
+
+##### After:
+```python
+@staticmethod
+def serialize(chat_ctx: ChatContext) -> str:
+    chat_messages = []
+    skipped_pc = 0
+    
+    for msg in chat_ctx.chat_history.messages:
+        # Extract content
+        content = extract_content(msg)
+        
+        # ✅ FILTER: Skip ephemeral patient context snapshot
+        if msg.role == AuthorRole.SYSTEM and content.startswith(PATIENT_CONTEXT_PREFIX):
+            skipped_pc += 1
+            continue  # ← CRITICAL: Don't persist snapshot
+        
+        chat_messages.append({...})
+    
+    return json.dumps({"chat_history": chat_messages, ...})
+```
+
+**Filtering in Action:**
+
+```python
+# In-memory (what agents see):
+[
+    [0] SYSTEM: "PATIENT_CONTEXT_JSON: {...}",  ← Ephemeral
+    [1] USER: "review patient_4",
+    [2] ASSISTANT: "Plan: ..."
+]
+
+# Persisted (what gets saved):
+[
+    [0] USER: "review patient_4",  ← Snapshot filtered out
+    [1] ASSISTANT: "Plan: ..."
+]
+```
+
+**Why This Is Critical:**
+- **Ephemeral only** - Snapshot never pollutes storage
+- **Registry as truth** - Active patient always from registry, not stale snapshot
+- **Fresh every turn** - Rebuilt from registry each time
+- **No staleness** - Can't have outdated patient context
+
+---
+
+### 3. Healthcare Agents
+
+**File:** `src/healthcare_agents/agent.py`
+
+#### Message Structure Change
+
+##### Before:
+```python
+response_message = ChatMessageContent(
+    role=AuthorRole.ASSISTANT,
+    name=agent.name,
+    content=response_dict.get("text", "")  # ❌ Direct content
+)
+```
+
+##### After:
+```python
+response_message = ChatMessageContent(
+    role=AuthorRole.ASSISTANT,
+    name=agent.name,
+    items=[TextContent(text=response_dict.get("text", ""))]  # ✅ Structured
+)
+```
+
+**Why This Change:**
+- **Consistent structure** - Aligns with Semantic Kernel message format
+- **Enables filtering** - Accessor can reliably detect snapshot messages
+- **Required for serialization** - Accessor expects `items` structure
+
+---
+
+### 4. Group Chat Orchestration
+
+**File:** `src/group_chat.py`
+
+#### A. CONFIRMATION GATE (Selection Prompt)
+
+Added to selection function prompt:
+
+```python
+"""
+- **CONFIRMATION GATE (PLAN ONLY)**: 
+  If (a) the MOST RECENT message is from {facilitator} AND 
+     (b) it contains a multi-step plan (look for "Plan", "plan:", 
+         numbered steps like "1.", "2.", or bullet lines) AND 
+     (c) no user message has appeared AFTER that plan yet, 
+  then do NOT advance to another agent. Wait for a user reply.
+"""
+```
+
+**Before:**
+```
+User: "review patient_4"
+Orchestrator: "Plan: 1. PatientHistory, 2. Radiology..."
+PatientHistory: [starts immediately] ❌
+```
+
+**After:**
+```
+User: "review patient_4"
+Orchestrator: "Plan: 1. PatientHistory, 2. Radiology..."
+Selection: 🛑 GATE TRIGGERED - wait for user
+Orchestrator: "Does this plan work for you?"
+User: "yes proceed"
+Selection: Gate lifted → PatientHistory ✅
+```
+
+#### B. Termination Overrides (Deterministic)
+
+Added to `evaluate_termination()`:
+
+```python
+def evaluate_termination(result):
+    # NEW: Pre-check before LLM evaluation
+    try:
+        last_text = extract_last_message_text(chat_ctx)
+        
+        # Override 1: Ignore patient context snapshots
+        if last_text.lower().startswith("patient_context_json"):
+            return False  # Continue
+        
+        # Override 2: Ignore internal handoffs
+        if "back to you" in last_text.lower():
+            return False  # Continue
+    except Exception:
+        pass
+    
+    # Fall back to LLM verdict
+    rule = ChatRule.model_validate_json(str(result.value[0]))
+    return rule.verdict == "yes"
+```
+
+**Why These Changes:**
+- **Prevents facilitator loops** - Waits for user confirmation before executing plan
+- **Prevents false termination** - System messages don't end conversation
+- **Allows agent handoffs** - "back to you X" continues orchestration
+- **Deterministic** - Python logic for unambiguous cases (faster, more reliable)
+
+---
+
+### 5. Entry Points (Assistant Bot & API Routes)
+
+**Files:** `src/bots/assistant_bot.py`, `src/routes/api/chats.py`
+
+Both entry points follow the **identical pattern**:
+
+#### Complete Turn Flow
+
+```python
+async def on_message_activity(self, turn_context: TurnContext):
+    conversation_id = turn_context.activity.conversation.id
+    raw_user_text = extract_user_text(turn_context)
+    
+    # STEP 1: Load session context
+    chat_ctx = await chat_context_accessor.read(conversation_id, None)
+    
+    # STEP 2: Check clear command
+    if await self._handle_clear_command(raw_user_text, chat_ctx, conversation_id):
+        await send_cleared_message()
+        return
+    
+    # STEP 3: Patient context decision
+    decision, timing = await self.patient_context_service.decide_and_apply(
+        raw_user_text, chat_ctx
+    )
+    
+    # STEP 4: Handle NEEDS_PATIENT_ID
+    if decision == "NEEDS_PATIENT_ID":
+        await send_error_message("I need a patient ID like 'patient_4'")
+        return
+    
+    # STEP 5: Load isolated patient history
+    if chat_ctx.patient_id:
+        isolated = await chat_context_accessor.read(conversation_id, chat_ctx.patient_id)
+        if isolated and isolated.chat_history.messages:
+            chat_ctx.chat_history = isolated.chat_history
+    
+    # STEP 5.5: Inject fresh ephemeral snapshot
+    chat_ctx.chat_history.messages = strip_old_snapshots(chat_ctx.chat_history.messages)
+    
+    snapshot = {
+        "conversation_id": chat_ctx.conversation_id,
+        "patient_id": chat_ctx.patient_id,
+        "all_patient_ids": sorted(chat_ctx.patient_contexts.keys()),
+        "generated_at": datetime.utcnow().isoformat() + "Z"
+    }
+    snapshot_msg = create_system_message(snapshot)
+    chat_ctx.chat_history.messages.insert(0, snapshot_msg)
+    
+    # STEP 6: Group chat
+    (chat, chat_ctx) = create_group_chat(app_context, chat_ctx)
+    chat_ctx.chat_history.add_user_message(raw_user_text)
+    
+    # STEP 7: Process chat
+    await self.process_chat(chat, chat_ctx, turn_context)
+    
+    # STEP 8: Save (snapshot auto-filtered)
+    await chat_context_accessor.write(chat_ctx)
+```
+
+**Key Additions:**
+1. **Patient context service** - Initialized in `__init__()`
+2. **Enhanced clear** - `_handle_clear_command()` bulk archives
+3. **Patient decision** - `decide_and_apply()` orchestration
+4. **Isolated load** - Swaps in patient-specific history
+5. **Ephemeral snapshot** - Fresh injection every turn
+6. **PT_CTX footer** - Appended to agent responses (UI only)
+
+---
+
+## Complete Turn Flow
+
+### Example: Multi-Patient Session
+
+```
+═══════════════════════════════════════════════════════════════
+TURN 1: User mentions patient_4
+═══════════════════════════════════════════════════════════════
+
+User (Teams): "@Orchestrator start tumor board for patient_4"
+
+[Entry Point: assistant_bot.on_message_activity]
+  ↓
+STEP 1: Load session_context.json
+  Result: Empty ChatContext(conversation_id="abc123")
+  ↓
+STEP 2: Check clear → NOT a clear command
+  ↓
+STEP 3: Patient context decision
+  ├─ Hydrate registry: {} (no registry yet)
+  ├─ Analyzer input: "start tumor board for patient_4"
+  ├─ Analyzer output: ACTIVATE_NEW (patient_id="patient_4")
+  ├─ Validation: ✅ Valid pattern, not in registry
+  ├─ Decision: NEW_BLANK
+  ├─ Action: Create PatientContext, update registry
+  └─ Result: patient_id="patient_4", registry written
+  ↓
+STEP 5: Load patient_patient_4_context.json
+  → File doesn't exist (first time) → Empty history
+  ↓
+STEP 5.5: Inject ephemeral snapshot
+  [0] SYSTEM: PATIENT_CONTEXT_JSON: {
+        "patient_id": "patient_4",
+        "all_patient_ids": ["patient_4"],
+        ...
+      }
+  ↓
+STEP 6: Add user message
+  [1] USER: "start tumor board for patient_4"
+  ↓
+STEP 7: Orchestrator responds
+  "Plan:
+   1. *PatientHistory*: Load clinical timeline
+   2. *Radiology*: Review imaging
+   3. I'll compile recommendations
+   
+   Does this plan look good?"
+  ↓
+STEP 8: Save to patient_patient_4_context.json
+  (Snapshot filtered out)
+
+[Storage After Turn 1]
+conversation_abc123/
+├── patient_patient_4_context.json     ← Created
+└── patient_context_registry.json      ← Created
+    {
+      "active_patient_id": "patient_4",
+      "patient_registry": {
+        "patient_4": {...}
+      }
+    }
+
+═══════════════════════════════════════════════════════════════
+TURN 2: User confirms plan
+═══════════════════════════════════════════════════════════════
+
+User: "yes proceed"
+
+STEP 3: Patient context decision
+  ├─ Hydrate registry: {"patient_4": ...}
+  ├─ Analyzer: Short message heuristic → UNCHANGED
+  └─ Result: Keep patient_4 active
+  ↓
+STEP 5: Load patient_patient_4_context.json
+  Contains: Previous plan message
+  ↓
+STEP 5.5: Inject fresh snapshot
+  [0] SYSTEM: PATIENT_CONTEXT_JSON: {...}  ← Fresh
+  [1] USER: "start tumor board..."
+  [2] ASSISTANT: "Plan: ..."
+  [3] USER: "yes proceed"  ← New
+  ↓
+STEP 7: Orchestration
+  ├─ Selection: User confirmed → PatientHistory
+  ├─ PatientHistory: "Timeline for patient_4: ..."
+  │   "Back to you Orchestrator."
+  ├─ Termination: "back to you" detected → CONTINUE ✅
+  ├─ Selection: Orchestrator
+  └─ Orchestrator: "Moving to step 2. *Radiology*..."
+
+═══════════════════════════════════════════════════════════════
+TURN 3: User switches to different patient
+═══════════════════════════════════════════════════════════════
+
+User: "switch to patient_15"
+
+STEP 3: Patient context decision
+  ├─ Analyzer: ACTIVATE_NEW (patient_id="patient_15")
+  ├─ Validation: Not in registry → NEW_BLANK
+  ├─ Action: Create patient_15, kernel reset
+  └─ Result: patient_id="patient_15"
+  ↓
+STEP 5: Load patient_patient_15_context.json
+  → File doesn't exist → Empty history
+  ↓
+STEP 5.5: Inject snapshot
+  [0] SYSTEM: PATIENT_CONTEXT_JSON: {
+        "patient_id": "patient_15",  ← NEW ACTIVE
+        "all_patient_ids": ["patient_4", "patient_15"],
+        ...
+      }
+  ↓
+STEP 7: Orchestrator
+  "Switched to patient_15. What would you like to review?"
+
+[Storage After Turn 3]
+conversation_abc123/
+├── patient_patient_4_context.json     ← Unchanged (isolated)
+├── patient_patient_15_context.json    ← Created
+└── patient_context_registry.json      ← Updated
+    {
+      "active_patient_id": "patient_15",  ← Changed
+      "patient_registry": {
+        "patient_4": {...},
+        "patient_15": {...}  ← Added
+      }
+    }
+
+═══════════════════════════════════════════════════════════════
+TURN 4: Clear all patient contexts
+═══════════════════════════════════════════════════════════════
+
+User: "clear patient context"
+
+STEP 2: Clear command detected ✅
+  ↓
+_handle_clear_command():
+  ├─ Archive session_context.json
+  ├─ Archive patient_patient_4_context.json
+  ├─ Archive patient_patient_15_context.json
+  ├─ Archive patient_context_registry.json
+  ├─ All archived to: archive/20250930T164500/
+  ├─ Delete all original files
+  ├─ Reset in-memory state
+  └─ Write empty session_context.json
+
+[Storage After Clear]
+conversation_abc123/
+└── archive/
+    └── 20250930T164500/
+        └── conversation_abc123/
+            ├── 20250930T164500_session_archived.json
+            ├── 20250930T164500_patient_patient_4_archived.json
+            ├── 20250930T164500_patient_patient_15_archived.json
+            └── 20250930T164500_patient_context_registry_archived.json
+```
+
+---
+
+## Migration Benefits
+
+### Safety Improvements
+
+| Before | After | Benefit |
+|--------|-------|---------|
+| ❌ All patients in same history | ✅ Separate files per patient | **No cross-contamination** |
+| ❌ Agent sees all patient data | ✅ Agent sees only active patient | **Data isolation** |
+| ❌ Switching loses context | ✅ Switching preserves history | **Context continuity** |
+| ❌ No audit trail | ✅ Registry + archives | **Compliance & debugging** |
+
+### User Experience Improvements
+
+| Before | After | Benefit |
+|--------|-------|---------|
+| ❌ Manual patient tracking | ✅ Automatic detection | **Reduced friction** |
+| ❌ No active patient visibility | ✅ PT_CTX footer every response | **Transparency** |
+| ❌ Can't work on multiple patients | ✅ Multi-patient sessions | **Workflow flexibility** |
+| ❌ Facilitator loops endlessly | ✅ Confirmation gate | **Plan validation** |
+
+### Technical Improvements
+
+| Before | After | Benefit |
+|--------|-------|---------|
+| ❌ Single storage file | ✅ Per-patient + session + registry | **Scalability** |
+| ❌ No patient awareness | ✅ Ephemeral snapshots | **Agent grounding** |
+| ❌ Simple clear | ✅ Bulk archive with timestamps | **Organized history** |
+| ❌ False terminations | ✅ Deterministic overrides | **Stable orchestration** |
+| ❌ Hardcoded patterns | ✅ Configurable via env var | **Flexibility** |
+
+---
+
+## Configuration
+
+### Customizing Patient ID Format
+
+Set the `PATIENT_ID_PATTERN` environment variable:
+
+```bash
+# Default: patient_<digits>
+export PATIENT_ID_PATTERN="^patient_[0-9]+$"
+
+# MRN format
+export PATIENT_ID_PATTERN="^mrn-[A-Z0-9]{6}$"
+
+# Multiple formats
+export PATIENT_ID_PATTERN="^(patient_[0-9]+|mrn-[A-Z0-9]{6})$"
+```
+
+> [!IMPORTANT]
+> When changing the pattern, update the analyzer prompt in `patient_context_analyzer.py` to match.
+
+---
+
+## Summary
+
+The patient context system enables:
+
+1. **Multi-patient conversations** - Work on multiple patients in one session
+2. **Complete isolation** - Each patient's history stored separately
+3. **Automatic detection** - LLM-based intent classification
+4. **Safe switching** - Kernel reset prevents cross-contamination
+5. **Ephemeral grounding** - Fresh snapshots never persisted
+6. **Registry-backed** - Single source of truth for active patient
+7. **Stable orchestration** - Confirmation gates + termination overrides
+8. **Organized archival** - Timestamped bulk archives for compliance
+
+For quick reference, see [`patient_context.md`](patient_context.md).
+
+---
+
+**Last Updated:** September 30, 2025  
+**Status:** Stable in production (`sekar/pc_poc` branch)
diff --git a/src/data_models/chat_context_accessor.py b/src/data_models/chat_context_accessor.py
index a1e9792..b2e5659 100644
--- a/src/data_models/chat_context_accessor.py
+++ b/src/data_models/chat_context_accessor.py
@@ -18,19 +18,29 @@
 
 class ChatContextAccessor:
     """
-    Hybrid accessor supporting session + per-patient isolation.
-    Ephemeral PATIENT_CONTEXT_JSON system messages are stripped (never persisted).
+    Accessor for reading and writing chat context to Azure Blob Storage.
+
+    ChatContext lifecycle:
+
+    1. User sends a message to Agent.
+    2. Agent load ChatContext from blob storage using conversation_id.
+        - If found, it reads the existing ChatContext from blob storage.
+        - Otherwise, it creates a new ChatContext with the given conversation_id.
+    2. Agent sends responses to User.
+    3. Save ChatContext to blob storage as `chat_context.json`.
+    4. Repeat steps 1-3 for the entire conversation.
+    5. User sends a "clear" message.
+    6. Archive ChatHistory to the blob storage.
+        - Append the "clear" message to chat history.
+        - Save ChatContext to `{datetime}_chat_context.json`.
+        - Delete `chat_context.json`
+    7. Hybrid accessor supporting session + per-patient isolation.
+    8. Ephemeral PATIENT_CONTEXT_JSON system messages are stripped (never persisted).
     """
 
-    def __init__(
-        self,
-        blob_service_client: BlobServiceClient,
-        container_name: str = "chat-sessions",
-        cognitive_services_token_provider=None,
-    ):
+    def __init__(self, blob_service_client: BlobServiceClient, container_name: str = "chat-sessions",):
         self.blob_service_client = blob_service_client
         self.container_client = blob_service_client.get_container_client(container_name)
-        self.cognitive_services_token_provider = cognitive_services_token_provider
 
     def get_blob_path(self, conversation_id: str, patient_id: str = None) -> str:
         if patient_id:
diff --git a/src/group_chat.py b/src/group_chat.py
index 7a266cb..e31fb2c 100644
--- a/src/group_chat.py
+++ b/src/group_chat.py
@@ -180,7 +180,6 @@ def _create_agent(agent_config: dict):
         instructions = agent_config.get("instructions")
         if agent_config.get("facilitator") and instructions:
             instructions = instructions.replace(
-                # >>> (unchanged logic, escaped quotes)
                 "{{aiAgents}}", "\n\t\t".join([f"- {agent['name']}: {agent['description']}" for agent in all_agents_config]))
 
         return (CustomChatCompletionAgent(kernel=agent_kernel,
@@ -222,7 +221,7 @@ def _create_agent(agent_config: dict):
             - **Once per turn**: Each participant can only speak once per turn.
             - **Default to {facilitator}**: Always default to {facilitator}. If no other participant is specified, {facilitator} goes next.
             - **Use best judgment**: If the rules are unclear, use your best judgment to determine who should go next, for the natural flow of the conversation.
-            - **CONFIRMATION GATE (PLAN ONLY)**: If (a) the MOST RECENT message is from {facilitator} AND (b) it contains a multi-step plan (look for "Plan", "plan:", numbered steps like "1.", "2.", or multiple leading "-" bullet lines) AND (c) no user message has appeared AFTER that plan yet, then do NOT advance to another agent. Wait for a user reply. Output {facilitator} ONLY if absolutely necessary to politely prompt the user for confirmation (do not restate the entire plan). As soon as ANY user reply appears (question, modification, or confirmation), this gate is lifted. If the user used a confirmation token (confirm, yes, proceed, continue, ok, okay, sure, sounds good, go ahead), you may advance to the next required non-facilitator agent; otherwise select the participant that best addresses the user’s reply.  # >>> added confirmation gate
+            - **CONFIRMATION GATE (PLAN ONLY)**: If (a) the MOST RECENT message is from {facilitator} AND (b) it contains a multi-step plan (look for "Plan", "plan:", numbered steps like "1.", "2.", or multiple leading "-" bullet lines) AND (c) no user message has appeared AFTER that plan yet, then do NOT advance to another agent. Wait for a user reply. Output {facilitator} ONLY if absolutely necessary to politely prompt the user for confirmation (do not restate the entire plan). As soon as ANY user reply appears (question, modification, or confirmation), this gate is lifted. If the user used a confirmation token (confirm, yes, proceed, continue, ok, okay, sure, sounds good, go ahead), you may advance to the next required non-facilitator agent; otherwise select the participant that best addresses the user’s reply.  
 
         **Output**: Give the full reasoning for your choice and the verdict. The reasoning should include careful evaluation of each rule with an explanation. The verdict should be the name of the participant who should go next.
 
@@ -251,8 +250,8 @@ def _create_agent(agent_config: dict):
         Commands addressed to a specific agent should result in 'no' if there is clear identification of the agent.
         Commands addressed to "you" or "User" should result in 'yes'.
         If you are not certain, return "yes".
-        Ignore any system metadata or patient context snapshots such as lines starting with "PATIENT_CONTEXT_JSON".  # >>> added ignore rule
-        Treat internal handoff phrases like "back to you <AgentName>" as NOT terminating (answer is still being routed).  # >>> added handoff rule
+        Ignore any system metadata or patient context snapshots such as lines starting with "PATIENT_CONTEXT_JSON".  
+        Treat internal handoff phrases like "back to you <AgentName>" as NOT terminating (answer is still being routed).  
 
         EXAMPLES:
             - "User, can you confirm the correct patient ID?" => "yes"
@@ -270,7 +269,6 @@ def _create_agent(agent_config: dict):
 
     def evaluate_termination(result):
         logger.info(f"Termination function result: {result}")
-        # >>> added deterministic pre-checks to avoid premature termination on patient context or handoff
         try:
             if chat_ctx.chat_history.messages:
                 last = chat_ctx.chat_history.messages[-1]
diff --git a/src/healthcare_agents/agent.py b/src/healthcare_agents/agent.py
index b3b1dba..2e52e81 100644
--- a/src/healthcare_agents/agent.py
+++ b/src/healthcare_agents/agent.py
@@ -26,12 +26,12 @@ class HealthcareAgentChannel(AgentChannel):
     def __init__(self):
         super().__init__()
         self.history: list[ChatMessageContent] = []
-        logger.debug("HealthcareAgentChannel initialized")
+        logger.debug("HealthcareAgentChannel initialized.")
 
     @override
     async def receive(self, history: list[ChatMessageContent]) -> None:
         for message in history:
-            logger.debug("Received message: %s", message.content)
+            logger.debug("[history] Received message: %s", message.content)
             if message.content.strip() != "":
                 self.history.append(message)
 
@@ -60,17 +60,17 @@ async def invoke(self, agent: "HealthcareAgent") -> AsyncIterable[tuple[bool, Ch
 
     @override
     async def invoke_stream(self, agent: "HealthcareAgent", history: "list[ChatMessageContent]"):
-        raise NotImplementedError("invoke_stream is not implemented yet")
+        raise NotImplementedError("invoke_stream is not implemented yet.")
 
     @override
     async def get_history(self) -> AsyncIterable[ChatMessageContent]:
-        logger.debug("Getting history from HealthcareAgentChannel")
+        logger.debug("Getting history from HealthcareAgentChannel.")
         for message in reversed(self.history):
             yield message
 
     @override
     async def reset(self) -> None:
-        logger.debug("Resetting HealthcareAgentChannel")
+        logger.debug("Resetting HealthcareAgentChannel.")
         self.history.clear()
 
 
@@ -91,14 +91,14 @@ def __init__(self,
         self._client: HealthcareAgentServiceClient = None
 
         if not name:
-            raise ValueError("Agent name is required")
+            raise ValueError("Agent name is required.")
         if not chat_ctx:
-            raise ValueError("Chat context is required")
+            raise ValueError("Chat context is required.")
         if not app_ctx:
-            raise ValueError("Application context is required")
+            raise ValueError("Application context is required.")
 
         # Initialize the HealthcareAgentServiceClient
-        logger.debug("Initializing HealthcareAgentServiceClient")
+        logger.debug("Initializing HealthcareAgentServiceClient.")
         self._client: HealthcareAgentServiceClient = HealthcareAgentServiceClient(
             agent_name=name,
             chat_ctx=chat_ctx,
@@ -117,14 +117,14 @@ def __init__(self,
         if name in self._chat_ctx.healthcare_agents:
             self._client.set_conversation_id(
                 self._chat_ctx.healthcare_agents[name].get("conversation_id", None))
-        logger.debug("HealthcareAgent initialized: %s", name)
+        logger.debug(f"HealthcareAgent initialized: {name}")
 
     @property
     def client(self):
         return self._client
 
     async def create_channel(self) -> AgentChannel:
-        logger.debug("Creating HealthcareAgentChannel")
+        logger.debug("Creating HealthcareAgentChannel.")
         return HealthcareAgentChannel()
 
     @override
@@ -144,14 +144,14 @@ async def invoke(self, *args, **kwargs) -> AsyncIterable[ChatMessageContent]:
         message = kwargs.get("message")
         logger.debug("Invoking HealthcareAgent with message: %s", message)
         if not message:
-            raise AgentInvokeException("Message is required to invoke the agent")
+            raise AgentInvokeException("Message is required to invoke the agent.")
         response = await self.get_response(message)
         yield response
 
     @override
     async def invoke_stream(self, *args, **kwargs) -> AsyncIterable[ChatMessageContent]:
         """Invoke the agent as a stream."""
-        raise NotImplementedError("invoke_stream is not implemented")
+        raise NotImplementedError("invoke_stream is not implemented.")
 
     async def get_attachments(self) -> list[dict]:
         """Get the attachments from the conversation history."""
diff --git a/src/services/patient_context_service.py b/src/services/patient_context_service.py
index 1367d51..08609d2 100644
--- a/src/services/patient_context_service.py
+++ b/src/services/patient_context_service.py
@@ -6,6 +6,7 @@
 import time
 from datetime import datetime, timezone
 from typing import Literal
+import os
 
 from data_models.chat_context import ChatContext, PatientContext
 from data_models.patient_context_models import TimingInfo
@@ -15,7 +16,7 @@
 
 # Exported constants / types
 PATIENT_CONTEXT_PREFIX = "PATIENT_CONTEXT_JSON"
-PATIENT_ID_PATTERN = re.compile(r"^patient_[0-9]+$")
+PATIENT_ID_PATTERN = re.compile(os.getenv("PATIENT_ID_PATTERN", r"^patient_[0-9]+$"))
 Decision = Literal[
     "NONE",
     "UNCHANGED",

From 1d7bbbd0cd92ed85ae472fac0577d08ad5c97b74 Mon Sep 17 00:00:00 2001
From: vvenglaturmsft <vvenglatur+github@microsoft.com>
Date: Wed, 1 Oct 2025 16:23:23 +0000
Subject: [PATCH 20/20] updated comprehensive doc

---
 docs/patient_context_comprehensive.md | 1283 +++++++++++++------------
 1 file changed, 658 insertions(+), 625 deletions(-)

diff --git a/docs/patient_context_comprehensive.md b/docs/patient_context_comprehensive.md
index d214346..94a41d6 100644
--- a/docs/patient_context_comprehensive.md
+++ b/docs/patient_context_comprehensive.md
@@ -1,142 +1,139 @@
-# Patient Context Management - Comprehensive Guide
+# Patient Context Management - Technical Guide
 
-This document provides a complete analysis of the **Patient Context Management System** migration from a single-conversation model to a multi-patient, registry-backed architecture with ephemeral snapshot grounding.
-
-> [!IMPORTANT]
-> This is a technical deep-dive document. For quick reference, see [`patient_context.md`](patient_context.md).
+This document explains how the Healthcare Agent Orchestrator now handles **multiple patients in a single conversation** using a registry-backed architecture with ephemeral snapshots.
 
 ---
 
 ## Table of Contents
 
-- [Executive Summary](#executive-summary)
-- [Architecture Overview](#architecture-overview)
+- [What Changed and Why](#what-changed-and-why)
+- [How It Works Now](#how-it-works-now)
 - [New Components](#new-components)
 - [Modified Components](#modified-components)
-- [Complete Turn Flow](#complete-turn-flow)
-- [Migration Benefits](#migration-benefits)
+- [Step-by-Step Turn Flow](#step-by-step-turn-flow)
+- [Configuration](#configuration)
 
 ---
 
-## Executive Summary
+## What Changed and Why
 
-The Healthcare Agent Orchestrator has been enhanced with a **registry-backed, ephemeral snapshot architecture** to enable multi-patient conversational state management within a single conversation.
+### The Problem
 
-### Key Achievements
+**Before**, the system could only handle **one context per conversation**:
 
-| Capability | Before | After |
-|------------|--------|-------|
-| **Patient Isolation** | Single conversation = single context | Multiple patients with isolated histories |
-| **Patient Switching** | Not supported | Seamless switching with kernel reset |
-| **Storage Model** | Single `chat_context.json` | Per-patient files + session + registry |
-| **Agent Grounding** | No patient awareness | Ephemeral snapshot each turn |
-| **Clear Operation** | Simple archive | Bulk archive (session + all patients + registry) |
-| **Patient Detection** | Manual/hardcoded | Automatic LLM-based classifier |
-| **Orchestration** | Facilitator loops, false terminations | Confirmation gate + termination overrides |
+```
+❌ All messages in one file (chat_context.json)
+❌ No way to switch between patients
+❌ Agents had no idea which patient they were discussing
+❌ "Clear" just archived one file
+```
 
----
+**Example of the problem:**
+```
+User: "Review patient_4's labs"
+[Agent responds about patient_4]
+User: "Now check patient_15's imaging"
+[Agent gets confused - both patients' messages mixed together]
+```
 
-## Architecture Overview
-
-### Old Architecture (Single Context Model)
-
-```
-┌─────────────────────────────────────────────────────────┐
-│                    User Interface                        │
-│              (Teams Bot / WebSocket API)                 │
-└──────────────────────┬──────────────────────────────────┘
-                       │
-                       ↓
-           ┌───────────────────────┐
-           │   ChatContext         │
-           │  - conversation_id    │
-           │  - chat_history       │ ← Single history
-           │  - patient_id (unused)│
-           └───────────┬───────────┘
-                       │
-                       ↓
-           ┌───────────────────────┐
-           │  Storage (Blob)       │
-           │  {conv_id}/           │
-           │    chat_context.json  │ ← One file
-           └───────────────────────┘
-```
-
-**Problems:**
-- ❌ No patient isolation (all messages in one history)
-- ❌ No patient switching capability
-- ❌ No patient awareness in agents
-- ❌ Facilitator loops (no confirmation gate)
-- ❌ False terminations (snapshot messages confused LLM)
-
-### New Architecture (Registry-Backed Ephemeral Model)
-
-```
-┌─────────────────────────────────────────────────────────────────────┐
-│                         User Interface                               │
-│                   (Teams Bot / WebSocket API)                        │
-└───────────────────────────┬─────────────────────────────────────────┘
-                            │
-                            ↓
-        ┌───────────────────────────────────────────┐
-        │   PatientContextService                   │
-        │   - decide_and_apply()                    │
-        │   - Registry hydration                    │
-        │   - Silent restore                        │
-        │   - Validation                            │
-        └──────────┬────────────────────────────────┘
-                   │
-      ┌────────────┴─────────────┐
-      │                          │
-      ↓                          ↓
-┌─────────────────┐    ┌──────────────────────┐
-│ PatientContext  │    │ Registry Accessor    │
-│ Analyzer        │    │ (Source of Truth)    │
-│ - LLM Classifier│    │                      │
-│ - Structured    │    │ registry.json:       │
-│   Output        │    │ - active_patient_id  │
-│ - Intent        │    │ - patient_registry   │
-│   Detection     │    │   map                │
-└─────────────────┘    └──────────┬───────────┘
-                                  │
-                    ┌─────────────┴──────────────────────┐
-                    │                                     │
-                    ↓                                     ↓
-          ┌──────────────────┐              ┌───────────────────────┐
-          │  ChatContext     │              │  Storage (Blob)       │
-          │  - conversation_id│             │  {conv_id}/           │
-          │  - patient_id    │              │   session_context.json│
-          │  - patient_contexts│            │   patient_4_context   │
-          │  - chat_history  │◄────────────┤   patient_15_context  │
-          └────────┬─────────┘              │   registry.json       │
-                   │                        └───────────────────────┘
-                   │
-                   ↓
-    ┌──────────────────────────────────┐
-    │  Ephemeral Snapshot Injection    │
-    │  [0] SYSTEM: PATIENT_CONTEXT_JSON│ ← Generated each turn
-    │  [1] USER: message               │
-    │  [2] ASSISTANT: response         │
-    └──────────┬───────────────────────┘
-               │
-               ↓
-    ┌──────────────────────────┐
-    │  Group Chat              │
-    │  - Selection (w/ gate)   │
-    │  - Termination (w/       │
-    │    overrides)            │
-    │  - Agents (see snapshot) │
-    └──────────────────────────┘
+### The Solution
+
+**Now**, the system supports **multiple patients with isolated histories**:
+
+```
+✅ Each patient gets their own history file
+✅ Registry tracks which patient is currently active
+✅ Agents see a "snapshot" showing current patient context
+✅ Switch between patients seamlessly
+✅ Clear archives everything properly
 ```
 
-**Benefits:**
-- ✅ **Per-patient isolation** - Separate history files
-- ✅ **Multi-patient roster** - Registry tracks all patients in session
-- ✅ **Ephemeral grounding** - Fresh snapshot each turn (never persisted)
-- ✅ **Automatic detection** - LLM analyzer classifies intent
-- ✅ **Safe switching** - Kernel reset on patient change
-- ✅ **Robust clear** - Bulk archive with timestamp folders
-- ✅ **Stable orchestration** - Confirmation gate + deterministic overrides
+**How it works now:**
+```
+User: "Review patient_4's labs"
+[System activates patient_4, creates patient_4_context.json]
+[Agent sees snapshot: "You're working on patient_4"]
+
+User: "Now check patient_15's imaging"
+[System switches to patient_15, creates patient_15_context.json]
+[Agent sees new snapshot: "You're now working on patient_15"]
+[patient_4's history is safely stored and separate]
+```
+
+### Quick Comparison
+
+| Feature | Before | After |
+|---------|--------|-------|
+| **Storage** | 1 file for everything | Separate file per patient + registry |
+| **Patient Switching** | Not supported | Automatic detection and switching |
+| **Agent Awareness** | No idea about patient context | Fresh snapshot each turn |
+| **Clear Command** | Archives 1 file | Archives all patient files + registry |
+| **Patient Detection** | Manual/hardcoded | LLM automatically detects intent |
+
+---
+
+## How It Works Now
+
+### Architecture Overview
+
+```
+User Message
+    ↓
+┌───────────────────────────────────────┐
+│  1. Load Registry                     │ ← "Which patient is active?"
+│     (patient_context_registry.json)   │
+└─────────────┬─────────────────────────┘
+              ↓
+┌───────────────────────────────────────┐
+│  2. Analyze User Intent               │ ← LLM determines what user wants
+│     (PatientContextAnalyzer)          │    "Review patient_4" = NEW
+│                                       │    "Switch to patient_15" = SWITCH
+│                                       │    "What's the diagnosis?" = UNCHANGED
+└─────────────┬─────────────────────────┘
+              ↓
+┌───────────────────────────────────────┐
+│  3. Apply Decision                    │ ← Update registry, load history
+│     (PatientContextService)           │
+└─────────────┬─────────────────────────┘
+              ↓
+┌───────────────────────────────────────┐
+│  4. Load Patient-Specific History     │ ← Get isolated history
+│     (patient_4_context.json)          │
+└─────────────┬─────────────────────────┘
+              ↓
+┌───────────────────────────────────────┐
+│  5. Inject Fresh Snapshot             │ ← Add system message
+│     "PATIENT_CONTEXT_JSON: {...}"     │    (agents see this)
+│     (EPHEMERAL - never saved)         │
+└─────────────┬─────────────────────────┘
+              ↓
+┌───────────────────────────────────────┐
+│  6. Group Chat Orchestration          │ ← Agents process with context
+│     (Agents see snapshot + history)   │
+└─────────────┬─────────────────────────┘
+              ↓
+┌───────────────────────────────────────┐
+│  7. Save History                      │ ← Snapshot is filtered out
+│     (only real messages saved)        │
+└───────────────────────────────────────┘
+```
+
+### Storage Structure
+
+```
+conversation_abc123/
+├── session_context.json              ← Messages before any patient mentioned
+├── patient_patient_4_context.json    ← patient_4's isolated history
+├── patient_patient_15_context.json   ← patient_15's isolated history
+└── patient_context_registry.json     ← SOURCE OF TRUTH
+    {
+      "active_patient_id": "patient_4",
+      "patient_registry": {
+        "patient_4": { "created_at": "...", "updated_at": "..." },
+        "patient_15": { "created_at": "...", "updated_at": "..." }
+      }
+    }
+```
 
 ---
 
@@ -146,50 +143,44 @@ The Healthcare Agent Orchestrator has been enhanced with a **registry-backed, ep
 
 **File:** `src/services/patient_context_analyzer.py`
 
-**Purpose:** LLM-based structured output classifier that determines patient context intent from user messages.
+**What it does:** Uses an LLM to automatically detect what the user wants to do with patient context.
 
-**Key Features:**
+**Before:**
+```python
+# No automatic detection - had to manually parse or hardcode
+if "patient" in message:
+    # Do something... but what?
+```
 
+**After:**
 ```python
-class PatientContextAnalyzer:
-    """
-    Analyzes user messages to determine patient context actions.
-    Uses Azure OpenAI with structured output for reliable classification.
-    """
-    
-    async def analyze_patient_context(
-        self,
-        user_text: str,
-        prior_patient_id: str | None,
-        known_patient_ids: list[str]
-    ) -> PatientContextDecision:
-        """
-        Returns structured decision:
-        - action: NONE | ACTIVATE_NEW | SWITCH_EXISTING | UNCHANGED | CLEAR
-        - patient_id: Extracted ID (for ACTIVATE_NEW/SWITCH_EXISTING only)
-        - reasoning: Brief explanation
-        """
-```
-
-**Decision Examples:**
-
-| User Input | Context | Action | patient_id | Reasoning |
-|------------|---------|--------|------------|-----------|
-| `"review patient_4"` | No active | `ACTIVATE_NEW` | `"patient_4"` | User explicitly requests patient_4 |
-| `"switch to patient_15"` | patient_4 active | `SWITCH_EXISTING` | `"patient_15"` | Explicit switch requested |
-| `"what's the diagnosis?"` | patient_4 active | `UNCHANGED` | `null` | Follow-up question for active patient |
-| `"clear patient"` | patient_4 active | `CLEAR` | `null` | User requests context reset |
-
-**Heuristic Skip:**
-- Messages ≤ 15 characters without keywords (`patient`, `clear`, `switch`) bypass the analyzer for efficiency
-- Returns `UNCHANGED` if patient active, `NONE` otherwise
-
-**Why This Component:**
-- **Automatic** - No manual parsing/regex
-- **Contextual** - Considers prior state and known patients
-- **Reliable** - Structured output ensures consistent format
-- **Explainable** - Reasoning field aids debugging
-- **Efficient** - Heuristic skip for short messages
+# LLM analyzes the message and returns structured decision
+decision = await analyzer.analyze_patient_context(
+    user_text="review patient_4",
+    prior_patient_id=None,
+    known_patient_ids=[]
+)
+# Returns: PatientContextDecision(
+#   action="ACTIVATE_NEW",
+#   patient_id="patient_4",
+#   reasoning="User explicitly requests patient_4"
+# )
+```
+
+**Examples:**
+
+| User Input | Current Patient | Decision | Explanation |
+|------------|----------------|----------|-------------|
+| `"review patient_4"` | None | `ACTIVATE_NEW` | Start working on patient_4 |
+| `"switch to patient_15"` | patient_4 | `SWITCH_EXISTING` | Change to patient_15 |
+| `"what's the diagnosis?"` | patient_4 | `UNCHANGED` | Continue with patient_4 |
+| `"clear patient"` | patient_4 | `CLEAR` | Reset everything |
+
+**Key Features:**
+- ✅ Automatic detection (no manual parsing)
+- ✅ Considers current state
+- ✅ Structured output (reliable format)
+- ✅ Efficiency: skips LLM for short messages like "ok" or "yes"
 
 ---
 
@@ -197,79 +188,57 @@ class PatientContextAnalyzer:
 
 **File:** `src/services/patient_context_service.py`
 
-**Purpose:** Orchestrates the complete patient context lifecycle - hydration, analysis, validation, and application.
+**What it does:** Orchestrates the entire patient context lifecycle - deciding what to do and making it happen.
 
-**Key Methods:**
+**Before:**
+```python
+# Logic was scattered across multiple files
+# No central place handling patient context
+```
 
+**After:**
 ```python
-class PatientContextService:
-    """
-    Manages patient context lifecycle:
-    - Registry hydration
-    - Silent restoration
-    - Analyzer invocation
-    - Decision validation & application
-    - Side effects (kernel reset, archival)
-    """
-    
-    async def decide_and_apply(
-        self,
-        user_text: str,
-        chat_ctx: ChatContext
-    ) -> tuple[Decision, TimingInfo]:
-        """
-        Main orchestration method. Returns:
-        - Decision: Final service decision
-        - TimingInfo: Performance metrics
-        """
-```
-
-**Decision Pipeline:**
-
-```
-User Text
-  ↓
-1. Hydrate Registry → chat_ctx.patient_contexts
-  ↓
-2. Silent Restore Attempt (if no active patient)
-  ↓
-3. Heuristic Check (skip analyzer if short message)
-  ↓
-4. Analyzer Invocation (if not skipped)
-  ↓
-5. Validation & Transformation:
-   - ACTIVATE_NEW + new ID → NEW_BLANK
-   - ACTIVATE_NEW + exists → SWITCH_EXISTING
-   - ACTIVATE_NEW + invalid → NEEDS_PATIENT_ID
-   - SWITCH_EXISTING + invalid → NEEDS_PATIENT_ID
-   - CLEAR → archive + reset
-  ↓
-6. Apply Side Effects:
-   - Kernel reset (if patient change)
-   - Registry update
-   - Archive (if clear)
-  ↓
-7. Return (Decision, TimingInfo)
+# One method handles everything
+decision, timing = await service.decide_and_apply(
+    user_text="switch to patient_15",
+    chat_ctx=chat_context
+)
+# Service handles:
+# - Loading registry
+# - Calling analyzer
+# - Validating decision
+# - Updating registry
+# - Resetting kernel (if switching)
 ```
 
-**Service Decisions:**
+**Decision Flow:**
 
 ```
-"NONE"                    - No patient context change
-"UNCHANGED"               - Keep current patient
-"NEW_BLANK"               - Activate new patient (reinterpreted ACTIVATE_NEW)
-"SWITCH_EXISTING"         - Switch to known patient
-"CLEAR"                   - Archive all and reset
-"RESTORED_FROM_STORAGE"   - Silent reactivation from registry
-"NEEDS_PATIENT_ID"        - User intent unclear, need valid ID
+User says: "switch to patient_15"
+    ↓
+1. Load registry → "patient_4 is active"
+    ↓
+2. Ask analyzer → "SWITCH_EXISTING to patient_15"
+    ↓
+3. Validate → "patient_15 matches pattern, exists in registry"
+    ↓
+4. Apply:
+   - Update registry: active = patient_15
+   - Reset kernel (prevents cross-contamination)
+   - Return decision: "SWITCH_EXISTING"
 ```
 
-**Why This Component:**
-- **Centralized orchestration** - Single responsibility for patient lifecycle
-- **Consistent validation** - Regex pattern enforced (`PATIENT_ID_PATTERN`)
-- **Registry authority** - Always syncs with source of truth
-- **Performance tracking** - TimingInfo for monitoring
-- **Separation of concerns** - Service doesn't inject snapshots (caller responsibility)
+**Service Decisions:**
+
+| Decision | Meaning |
+|----------|---------|
+| `NONE` | No patient context needed |
+| `UNCHANGED` | Keep current patient active |
+| `NEW_BLANK` | Activate a new patient (first time) |
+| `SWITCH_EXISTING` | Switch to a known patient |
+| `CLEAR` | Archive everything and reset |
+| `RESTORED_FROM_STORAGE` | Silently reactivated from registry |
+| `NEEDS_PATIENT_ID` | User intent unclear, need valid ID |
 
 ---
 
@@ -277,9 +246,9 @@ User Text
 
 **File:** `src/data_models/patient_context_registry_accessor.py`
 
-**Purpose:** Manages persistence of the patient context registry (source of truth).
+**What it does:** Manages the **source of truth** file that tracks which patient is active and which patients exist.
 
-**Registry Structure:**
+**Registry File Structure:**
 
 ```json
 {
@@ -288,14 +257,12 @@ User Text
     "patient_4": {
       "patient_id": "patient_4",
       "facts": {},
-      "conversation_id": "19:abc-123-def@thread.tacv2",
       "created_at": "2025-09-30T16:30:00.000Z",
       "updated_at": "2025-09-30T16:45:00.000Z"
     },
     "patient_15": {
       "patient_id": "patient_15",
       "facts": {},
-      "conversation_id": "19:abc-123-def@thread.tacv2",
       "created_at": "2025-09-30T16:32:00.000Z",
       "updated_at": "2025-09-30T16:40:00.000Z"
     }
@@ -303,136 +270,83 @@ User Text
 }
 ```
 
-**Key Methods:**
-
-```python
-class PatientContextRegistryAccessor:
-    async def read_registry(
-        self,
-        conversation_id: str
-    ) -> tuple[dict, str | None]:
-        """Returns (patient_registry, active_patient_id)"""
-    
-    async def write_registry(
-        self,
-        conversation_id: str,
-        patient_registry: dict,
-        active_patient_id: str | None
-    ) -> None:
-        """Persists registry to patient_context_registry.json"""
-    
-    async def archive_registry(
-        self,
-        conversation_id: str
-    ) -> None:
-        """Archives registry during clear operation"""
-```
-
-**Why This Component:**
-- **Source of truth** - Registry is authoritative for active patient
-- **Roster management** - Tracks all patients in session
-- **Extensible facts** - Can store patient-specific metadata
-- **Audit trail** - Timestamps for compliance
-- **Archival support** - Clean clear operations
+**Why this exists:**
+- ✅ Single source of truth for "which patient is active"
+- ✅ Tracks all patients in the session (roster)
+- ✅ Supports future features (facts, metadata)
+- ✅ Clean archival during clear operations
 
 ---
 
-### 4. PatientContext Data Models
+### 4. Data Models
 
 **File:** `src/data_models/patient_context_models.py`
 
-**Purpose:** Type-safe models for patient context operations.
+**What it does:** Type-safe models for all patient context operations.
 
 **Key Models:**
 
 ```python
+# Represents a patient's context
 class PatientContext:
-    """Represents a patient's context within a conversation."""
     patient_id: str
-    facts: dict = field(default_factory=dict)
-    conversation_id: Optional[str] = None
-    created_at: Optional[datetime] = None
-    updated_at: Optional[datetime] = None
-
+    facts: dict = {}
+    created_at: datetime
+    updated_at: datetime
 
+# LLM's structured decision
 class PatientContextDecision:
-    """Structured output from PatientContextAnalyzer."""
-    action: Literal["NONE", "ACTIVATE_NEW", "SWITCH_EXISTING", "UNCHANGED", "CLEAR"]
-    patient_id: Optional[str] = None
+    action: str  # "NONE" | "ACTIVATE_NEW" | "SWITCH_EXISTING" | ...
+    patient_id: Optional[str]
     reasoning: str
 
-
+# Performance tracking
 class TimingInfo:
-    """Performance metrics for patient context operations."""
-    analyzer_ms: Optional[float] = None
-    storage_fallback_ms: Optional[float] = None
-    service_total_ms: Optional[float] = None
+    analyzer_ms: float
+    service_total_ms: float
 ```
 
-**Why These Models:**
-- **Type safety** - Catches errors at development time
-- **Documentation** - Clear contracts for each component
-- **Extensibility** - Easy to add new fields (e.g., `facts`)
-- **Structured output** - Enforces LLM output format
-- **Observability** - Timing metrics for monitoring
-
 ---
 
 ## Modified Components
 
-### 1. ChatContext Data Model
+### 1. ChatContext (Data Model)
 
 **File:** `src/data_models/chat_context.py`
 
-#### Before:
+**What changed:** Added fields to track active patient and multi-patient roster.
 
+**Before:**
 ```python
 class ChatContext:
-    def __init__(self, conversation_id: str):
-        self.conversation_id = conversation_id
-        self.chat_history = ChatHistory()
-        self.patient_id = None  # ← Unused field
-        # ... other fields
+    conversation_id: str
+    chat_history: ChatHistory
+    patient_id: str = None  # ❌ Existed but never used
 ```
 
-#### After:
-
+**After:**
 ```python
 class ChatContext:
-    def __init__(self, conversation_id: str):
-        self.conversation_id = conversation_id
-        self.chat_history = ChatHistory()
-        self.patient_id = None  # ← NOW USED: Active patient pointer
-        self.patient_contexts: Dict[str, PatientContext] = {}  # ✅ NEW: Multi-patient roster
-        # ... other fields
+    conversation_id: str
+    chat_history: ChatHistory
+    patient_id: str = None  # ✅ NOW USED: Points to active patient
+    patient_contexts: Dict[str, PatientContext] = {}  # ✅ NEW: Roster of all patients
 ```
 
-**Key Changes:**
-
-| Field | Before | After |
-|-------|--------|-------|
-| `patient_id` | Unused | **Active patient pointer** (set by service) |
-| `patient_contexts` | ❌ N/A | ✅ **Dict[str, PatientContext]** - roster of all patients |
-| `chat_history` | Single history | **Swapped per-patient** (loaded from isolated files) |
-
-**Lifecycle Example:**
+**Example:**
 
 ```python
-# Turn 1: User mentions patient_4
-chat_ctx.patient_id = None
-chat_ctx.patient_contexts = {}
-  ↓ decide_and_apply()
+# Turn 1: Mention patient_4
 chat_ctx.patient_id = "patient_4"
 chat_ctx.patient_contexts = {
-    "patient_4": PatientContext(patient_id="patient_4", ...)
+    "patient_4": PatientContext(...)
 }
 
-# Turn 2: User switches to patient_15
-  ↓ decide_and_apply()
+# Turn 5: Switch to patient_15
 chat_ctx.patient_id = "patient_15"
 chat_ctx.patient_contexts = {
-    "patient_4": ...,
-    "patient_15": PatientContext(patient_id="patient_15", ...)
+    "patient_4": PatientContext(...),
+    "patient_15": PatientContext(...)
 }
 ```
 
@@ -442,95 +356,92 @@ chat_ctx.patient_contexts = {
 
 **File:** `src/data_models/chat_context_accessor.py`
 
-This is one of the **most critical** changes - the accessor now handles per-patient file routing and ephemeral snapshot filtering.
+**What changed:** 
+1. Routes to different files based on patient
+2. Filters out ephemeral snapshots when saving
 
-#### A. `get_blob_path()` - File Routing
+#### Change 1: File Routing
 
-##### Before:
+**Before:**
 ```python
 def get_blob_path(self, conversation_id: str) -> str:
-    return f"{conversation_id}/chat_context.json"  # Single file
+    # ❌ Always the same file
+    return f"{conversation_id}/chat_context.json"
 ```
 
-##### After:
+**After:**
 ```python
 def get_blob_path(self, conversation_id: str, patient_id: str = None) -> str:
+    # ✅ Different file per patient
     if patient_id:
         return f"{conversation_id}/patient_{patient_id}_context.json"
     return f"{conversation_id}/session_context.json"
 ```
 
-**Storage Structure:**
+**Result:**
 
 ```
 BEFORE:
 conversation_123/
-  └── chat_context.json  ← All messages
+  └── chat_context.json  ← Everything mixed together
 
 AFTER:
 conversation_123/
-  ├── session_context.json            ← Session-level (no patient)
-  ├── patient_patient_4_context.json  ← patient_4's history
-  ├── patient_patient_15_context.json ← patient_15's history
-  └── patient_context_registry.json   ← Source of truth
+  ├── session_context.json           ← Pre-patient messages
+  ├── patient_patient_4_context.json ← Isolated history
+  └── patient_patient_15_context.json← Isolated history
 ```
 
-#### B. `serialize()` - Ephemeral Snapshot Filtering
-
-This is **CRITICAL** - ensures snapshots never get persisted.
+#### Change 2: Snapshot Filtering (CRITICAL)
 
-##### Before:
+**Before:**
 ```python
-@staticmethod
 def serialize(chat_ctx: ChatContext) -> str:
+    # ❌ Saves everything including snapshots
     return json.dumps({
-        "chat_history": chat_ctx.chat_history.serialize(),  # Direct
+        "chat_history": chat_ctx.chat_history.serialize()
     })
 ```
 
-##### After:
+**After:**
 ```python
-@staticmethod
 def serialize(chat_ctx: ChatContext) -> str:
     chat_messages = []
-    skipped_pc = 0
     
     for msg in chat_ctx.chat_history.messages:
-        # Extract content
         content = extract_content(msg)
         
-        # ✅ FILTER: Skip ephemeral patient context snapshot
-        if msg.role == AuthorRole.SYSTEM and content.startswith(PATIENT_CONTEXT_PREFIX):
-            skipped_pc += 1
-            continue  # ← CRITICAL: Don't persist snapshot
+        # ✅ CRITICAL: Filter out ephemeral snapshots
+        if msg.role == AuthorRole.SYSTEM and content.startswith("PATIENT_CONTEXT_JSON"):
+            continue  # Don't save this - it's ephemeral
         
         chat_messages.append({...})
     
-    return json.dumps({"chat_history": chat_messages, ...})
+    return json.dumps({"chat_history": chat_messages})
 ```
 
-**Filtering in Action:**
+**Why this is critical:**
 
 ```python
-# In-memory (what agents see):
+# What agents see in memory:
 [
-    [0] SYSTEM: "PATIENT_CONTEXT_JSON: {...}",  ← Ephemeral
-    [1] USER: "review patient_4",
-    [2] ASSISTANT: "Plan: ..."
+    SYSTEM: "PATIENT_CONTEXT_JSON: {...}",  ← Ephemeral snapshot
+    USER: "review patient_4",
+    ASSISTANT: "Here's the plan..."
 ]
 
-# Persisted (what gets saved):
+# What gets saved to disk:
 [
-    [0] USER: "review patient_4",  ← Snapshot filtered out
-    [1] ASSISTANT: "Plan: ..."
+    USER: "review patient_4",              ← Snapshot filtered out!
+    ASSISTANT: "Here's the plan..."
 ]
 ```
 
-**Why This Is Critical:**
-- **Ephemeral only** - Snapshot never pollutes storage
-- **Registry as truth** - Active patient always from registry, not stale snapshot
-- **Fresh every turn** - Rebuilt from registry each time
-- **No staleness** - Can't have outdated patient context
+**Benefits:**
+- ✅ Snapshot is **never** persisted
+- ✅ Registry is always the source of truth
+- ✅ Fresh snapshot generated every turn
+- ✅ No stale data
 
 ---
 
@@ -538,30 +449,29 @@ def serialize(chat_ctx: ChatContext) -> str:
 
 **File:** `src/healthcare_agents/agent.py`
 
-#### Message Structure Change
+**What changed:** Message structure to enable consistent filtering.
 
-##### Before:
+**Before:**
 ```python
+# ❌ Content was just a string
 response_message = ChatMessageContent(
     role=AuthorRole.ASSISTANT,
     name=agent.name,
-    content=response_dict.get("text", "")  # ❌ Direct content
+    content=response_text
 )
 ```
 
-##### After:
+**After:**
 ```python
+# ✅ Content is structured with items
 response_message = ChatMessageContent(
     role=AuthorRole.ASSISTANT,
     name=agent.name,
-    items=[TextContent(text=response_dict.get("text", ""))]  # ✅ Structured
+    items=[TextContent(text=response_text)]
 )
 ```
 
-**Why This Change:**
-- **Consistent structure** - Aligns with Semantic Kernel message format
-- **Enables filtering** - Accessor can reliably detect snapshot messages
-- **Required for serialization** - Accessor expects `items` structure
+**Why:** Accessor needs consistent structure to reliably filter snapshots.
 
 ---
 
@@ -569,370 +479,493 @@ response_message = ChatMessageContent(
 
 **File:** `src/group_chat.py`
 
-#### A. CONFIRMATION GATE (Selection Prompt)
+**What changed:** Added confirmation gate and termination overrides for stability.
 
-Added to selection function prompt:
+#### Change 1: Confirmation Gate
 
+**The Problem:**
+```
+User: "review patient_4"
+Orchestrator: "Plan: 1. PatientHistory, 2. Radiology..."
+PatientHistory: [immediately starts executing] ❌ No user confirmation!
+```
+
+**The Solution:**
 ```python
+# Added to selection prompt:
 """
-- **CONFIRMATION GATE (PLAN ONLY)**: 
-  If (a) the MOST RECENT message is from {facilitator} AND 
-     (b) it contains a multi-step plan (look for "Plan", "plan:", 
-         numbered steps like "1.", "2.", or bullet lines) AND 
-     (c) no user message has appeared AFTER that plan yet, 
-  then do NOT advance to another agent. Wait for a user reply.
+CONFIRMATION GATE: If the most recent message is from Orchestrator
+and contains a multi-step plan, WAIT for user confirmation.
+Do not proceed to other agents yet.
 """
 ```
 
-**Before:**
+**Now it works:**
 ```
 User: "review patient_4"
-Orchestrator: "Plan: 1. PatientHistory, 2. Radiology..."
-PatientHistory: [starts immediately] ❌
+Orchestrator: "Plan: 1. PatientHistory, 2. Radiology... Good?"
+[🛑 GATE: Wait for user]
+User: "yes"
+PatientHistory: [now executes] ✅
 ```
 
-**After:**
+#### Change 2: Termination Overrides
+
+**The Problem:**
 ```
-User: "review patient_4"
-Orchestrator: "Plan: 1. PatientHistory, 2. Radiology..."
-Selection: 🛑 GATE TRIGGERED - wait for user
-Orchestrator: "Does this plan work for you?"
-User: "yes proceed"
-Selection: Gate lifted → PatientHistory ✅
+Orchestrator: "PATIENT_CONTEXT_JSON: {...}"
+LLM: "This looks like a conclusion" ❌ False termination!
 ```
 
-#### B. Termination Overrides (Deterministic)
-
-Added to `evaluate_termination()`:
-
+**The Solution:**
 ```python
 def evaluate_termination(result):
-    # NEW: Pre-check before LLM evaluation
-    try:
-        last_text = extract_last_message_text(chat_ctx)
-        
-        # Override 1: Ignore patient context snapshots
-        if last_text.lower().startswith("patient_context_json"):
-            return False  # Continue
-        
-        # Override 2: Ignore internal handoffs
-        if "back to you" in last_text.lower():
-            return False  # Continue
-    except Exception:
-        pass
+    last_text = extract_last_message_text(chat_ctx)
+    
+    # ✅ Override 1: Ignore snapshots
+    if last_text.lower().startswith("patient_context_json"):
+        return False  # Don't terminate
     
-    # Fall back to LLM verdict
-    rule = ChatRule.model_validate_json(str(result.value[0]))
-    return rule.verdict == "yes"
+    # ✅ Override 2: Ignore handoffs
+    if "back to you" in last_text.lower():
+        return False  # Don't terminate
+    
+    # Fall back to LLM evaluation
+    return llm_verdict()
 ```
 
-**Why These Changes:**
-- **Prevents facilitator loops** - Waits for user confirmation before executing plan
-- **Prevents false termination** - System messages don't end conversation
-- **Allows agent handoffs** - "back to you X" continues orchestration
-- **Deterministic** - Python logic for unambiguous cases (faster, more reliable)
+**Benefits:**
+- ✅ System messages don't end conversation
+- ✅ Agent handoffs continue smoothly
+- ✅ More reliable orchestration
 
 ---
 
-### 5. Entry Points (Assistant Bot & API Routes)
+### 5. Entry Points (Bot & API)
 
 **Files:** `src/bots/assistant_bot.py`, `src/routes/api/chats.py`
 
-Both entry points follow the **identical pattern**:
+**What changed:** Both entry points now follow the same pattern for patient context.
 
-#### Complete Turn Flow
+**Before:**
+```python
+async def on_message_activity(turn_context):
+    # ❌ Simple, no patient awareness
+    chat_ctx = await accessor.read(conversation_id)
+    chat_ctx.chat_history.add_user_message(user_text)
+    await process_chat(chat, chat_ctx)
+    await accessor.write(chat_ctx)
+```
 
+**After:**
 ```python
-async def on_message_activity(self, turn_context: TurnContext):
-    conversation_id = turn_context.activity.conversation.id
-    raw_user_text = extract_user_text(turn_context)
-    
+async def on_message_activity(turn_context):
     # STEP 1: Load session context
-    chat_ctx = await chat_context_accessor.read(conversation_id, None)
+    chat_ctx = await accessor.read(conversation_id, None)
     
-    # STEP 2: Check clear command
-    if await self._handle_clear_command(raw_user_text, chat_ctx, conversation_id):
-        await send_cleared_message()
-        return
+    # STEP 2: Check for clear command
+    if await handle_clear_command(user_text, chat_ctx):
+        return  # Archives everything
     
-    # STEP 3: Patient context decision
-    decision, timing = await self.patient_context_service.decide_and_apply(
-        raw_user_text, chat_ctx
+    # STEP 3: ✅ NEW: Patient context decision
+    decision, timing = await patient_service.decide_and_apply(
+        user_text, chat_ctx
     )
     
-    # STEP 4: Handle NEEDS_PATIENT_ID
+    # STEP 4: ✅ NEW: Handle error cases
     if decision == "NEEDS_PATIENT_ID":
-        await send_error_message("I need a patient ID like 'patient_4'")
+        await send_error("I need a valid patient ID")
         return
     
-    # STEP 5: Load isolated patient history
+    # STEP 5: ✅ NEW: Load patient-specific history
     if chat_ctx.patient_id:
-        isolated = await chat_context_accessor.read(conversation_id, chat_ctx.patient_id)
-        if isolated and isolated.chat_history.messages:
+        isolated = await accessor.read(conversation_id, chat_ctx.patient_id)
+        if isolated:
             chat_ctx.chat_history = isolated.chat_history
     
-    # STEP 5.5: Inject fresh ephemeral snapshot
-    chat_ctx.chat_history.messages = strip_old_snapshots(chat_ctx.chat_history.messages)
-    
-    snapshot = {
-        "conversation_id": chat_ctx.conversation_id,
-        "patient_id": chat_ctx.patient_id,
-        "all_patient_ids": sorted(chat_ctx.patient_contexts.keys()),
-        "generated_at": datetime.utcnow().isoformat() + "Z"
-    }
-    snapshot_msg = create_system_message(snapshot)
-    chat_ctx.chat_history.messages.insert(0, snapshot_msg)
-    
-    # STEP 6: Group chat
-    (chat, chat_ctx) = create_group_chat(app_context, chat_ctx)
-    chat_ctx.chat_history.add_user_message(raw_user_text)
+    # STEP 6: ✅ NEW: Inject fresh ephemeral snapshot
+    snapshot = create_snapshot(chat_ctx)
+    chat_ctx.chat_history.messages.insert(0, snapshot)
     
-    # STEP 7: Process chat
-    await self.process_chat(chat, chat_ctx, turn_context)
+    # STEP 7: Add user message and process
+    chat_ctx.chat_history.add_user_message(user_text)
+    await process_chat(chat, chat_ctx)
     
-    # STEP 8: Save (snapshot auto-filtered)
-    await chat_context_accessor.write(chat_ctx)
+    # STEP 8: Save (snapshot auto-filtered by accessor)
+    await accessor.write(chat_ctx)
 ```
 
 **Key Additions:**
-1. **Patient context service** - Initialized in `__init__()`
-2. **Enhanced clear** - `_handle_clear_command()` bulk archives
-3. **Patient decision** - `decide_and_apply()` orchestration
-4. **Isolated load** - Swaps in patient-specific history
-5. **Ephemeral snapshot** - Fresh injection every turn
-6. **PT_CTX footer** - Appended to agent responses (UI only)
+1. ✅ Patient context service integration
+2. ✅ Enhanced clear command (bulk archive)
+3. ✅ Isolated history loading
+4. ✅ Ephemeral snapshot injection
+5. ✅ Error handling for invalid IDs
 
 ---
 
-## Complete Turn Flow
-
-### Example: Multi-Patient Session
-
-```
-═══════════════════════════════════════════════════════════════
-TURN 1: User mentions patient_4
-═══════════════════════════════════════════════════════════════
-
-User (Teams): "@Orchestrator start tumor board for patient_4"
-
-[Entry Point: assistant_bot.on_message_activity]
-  ↓
-STEP 1: Load session_context.json
-  Result: Empty ChatContext(conversation_id="abc123")
-  ↓
-STEP 2: Check clear → NOT a clear command
-  ↓
-STEP 3: Patient context decision
-  ├─ Hydrate registry: {} (no registry yet)
-  ├─ Analyzer input: "start tumor board for patient_4"
-  ├─ Analyzer output: ACTIVATE_NEW (patient_id="patient_4")
-  ├─ Validation: ✅ Valid pattern, not in registry
-  ├─ Decision: NEW_BLANK
-  ├─ Action: Create PatientContext, update registry
-  └─ Result: patient_id="patient_4", registry written
-  ↓
-STEP 5: Load patient_patient_4_context.json
-  → File doesn't exist (first time) → Empty history
-  ↓
-STEP 5.5: Inject ephemeral snapshot
-  [0] SYSTEM: PATIENT_CONTEXT_JSON: {
-        "patient_id": "patient_4",
-        "all_patient_ids": ["patient_4"],
-        ...
-      }
-  ↓
-STEP 6: Add user message
-  [1] USER: "start tumor board for patient_4"
-  ↓
-STEP 7: Orchestrator responds
-  "Plan:
-   1. *PatientHistory*: Load clinical timeline
-   2. *Radiology*: Review imaging
-   3. I'll compile recommendations
-   
-   Does this plan look good?"
-  ↓
-STEP 8: Save to patient_patient_4_context.json
-  (Snapshot filtered out)
+## Step-by-Step Turn Flow
+
+### Scenario: User Discusses Two Patients
+
+This example shows a complete conversation where the user works with two different patients.
 
-[Storage After Turn 1]
+---
+
+#### **Turn 1: First time mentioning patient_4**
+
+**User types:** `"review patient_4 labs"`
+
+**What happens:**
+
+```
+1. Load session file
+   Result: Empty (brand new conversation)
+
+2. Check if user said "clear"
+   Result: No
+
+3. Patient context decision
+   • Load registry → No registry file exists yet
+   • Ask analyzer: What does user want?
+     Analyzer says: "ACTIVATE_NEW patient_4"
+   • Validate: "patient_4" matches our pattern ✅
+   • Decision: NEW_BLANK (create new patient)
+   • Action: 
+     - Create a PatientContext for patient_4
+     - Write registry file with patient_4 as active
+
+4. Load patient_4's history file
+   Result: Doesn't exist yet (first time) → Use empty history
+
+5. Create and inject snapshot
+   Add to position [0]: SYSTEM: "PATIENT_CONTEXT_JSON: {patient_id: 'patient_4'}"
+   This tells agents: "You're working on patient_4"
+
+6. Add user's message
+   [1] USER: "review patient_4 labs"
+
+7. Agents respond
+   [2] ASSISTANT: "Plan: 1. PatientHistory will load labs..."
+
+8. Save everything
+   • Snapshot is automatically filtered out
+   • Only save: [USER message, ASSISTANT message]
+   • File: patient_patient_4_context.json
+```
+
+**Storage after Turn 1:**
+```
 conversation_abc123/
-├── patient_patient_4_context.json     ← Created
-└── patient_context_registry.json      ← Created
+├── patient_patient_4_context.json ← Created with 2 messages
+└── patient_context_registry.json  ← Created
     {
       "active_patient_id": "patient_4",
       "patient_registry": {
         "patient_4": {...}
       }
     }
+```
 
-═══════════════════════════════════════════════════════════════
-TURN 2: User confirms plan
-═══════════════════════════════════════════════════════════════
-
-User: "yes proceed"
-
-STEP 3: Patient context decision
-  ├─ Hydrate registry: {"patient_4": ...}
-  ├─ Analyzer: Short message heuristic → UNCHANGED
-  └─ Result: Keep patient_4 active
-  ↓
-STEP 5: Load patient_patient_4_context.json
-  Contains: Previous plan message
-  ↓
-STEP 5.5: Inject fresh snapshot
-  [0] SYSTEM: PATIENT_CONTEXT_JSON: {...}  ← Fresh
-  [1] USER: "start tumor board..."
-  [2] ASSISTANT: "Plan: ..."
-  [3] USER: "yes proceed"  ← New
-  ↓
-STEP 7: Orchestration
-  ├─ Selection: User confirmed → PatientHistory
-  ├─ PatientHistory: "Timeline for patient_4: ..."
-  │   "Back to you Orchestrator."
-  ├─ Termination: "back to you" detected → CONTINUE ✅
-  ├─ Selection: Orchestrator
-  └─ Orchestrator: "Moving to step 2. *Radiology*..."
-
-═══════════════════════════════════════════════════════════════
-TURN 3: User switches to different patient
-═══════════════════════════════════════════════════════════════
+---
 
-User: "switch to patient_15"
+#### **Turn 2: Continuing with patient_4**
 
-STEP 3: Patient context decision
-  ├─ Analyzer: ACTIVATE_NEW (patient_id="patient_15")
-  ├─ Validation: Not in registry → NEW_BLANK
-  ├─ Action: Create patient_15, kernel reset
-  └─ Result: patient_id="patient_15"
-  ↓
-STEP 5: Load patient_patient_15_context.json
-  → File doesn't exist → Empty history
-  ↓
-STEP 5.5: Inject snapshot
-  [0] SYSTEM: PATIENT_CONTEXT_JSON: {
-        "patient_id": "patient_15",  ← NEW ACTIVE
-        "all_patient_ids": ["patient_4", "patient_15"],
-        ...
-      }
-  ↓
-STEP 7: Orchestrator
-  "Switched to patient_15. What would you like to review?"
+**User types:** `"yes proceed"`
+
+**What happens:**
+
+```
+1. Load session file
+   Result: Empty (still no session-level messages)
+
+2. Check if user said "clear"
+   Result: No
+
+3. Patient context decision
+   • Load registry → patient_4 is currently active
+   • Ask analyzer: What does user want?
+     Message is short ("yes proceed")
+     Heuristic: Skip analyzer, assume UNCHANGED
+   • Decision: UNCHANGED (keep patient_4)
+
+4. Load patient_4's history file
+   Result: Contains 2 messages from Turn 1:
+   [0] USER: "review patient_4 labs"
+   [1] ASSISTANT: "Plan: ..."
 
-[Storage After Turn 3]
+5. Create and inject fresh snapshot
+   Add to position [0]: SYSTEM: "PATIENT_CONTEXT_JSON: {patient_id: 'patient_4'}"
+   Now history looks like:
+   [0] SYSTEM: snapshot
+   [1] USER: "review patient_4 labs"
+   [2] ASSISTANT: "Plan: ..."
+
+6. Add user's new message
+   [3] USER: "yes proceed"
+
+7. Agents respond
+   PatientHistory: "Here are patient_4's labs... Back to you Orchestrator."
+   [Termination check: Sees "back to you" → Continue, don't stop]
+   Orchestrator: "Labs received. Moving to next step..."
+
+8. Save everything
+   • Snapshot is automatically filtered out
+   • Save: [4 messages total now]
+```
+
+---
+
+#### **Turn 3: Switching to a different patient**
+
+**User types:** `"switch to patient_15"`
+
+**What happens:**
+
+```
+1. Load session file
+   Result: Empty
+
+2. Check if user said "clear"
+   Result: No
+
+3. Patient context decision
+   • Load registry → patient_4 is currently active
+   • Ask analyzer: What does user want?
+     Analyzer says: "ACTIVATE_NEW patient_15"
+   • Validate: "patient_15" matches pattern, NOT in registry yet
+   • Decision: NEW_BLANK (create new patient)
+   • Action:
+     - Create PatientContext for patient_15
+     - Update registry: active = patient_15
+     - ⚠️ RESET KERNEL (clear analyzer's memory to prevent patient_4 data leaking)
+     - Write updated registry
+
+4. Load patient_15's history file
+   Result: Doesn't exist → Use empty history
+   (patient_4's history remains untouched in its own file)
+
+5. Create and inject fresh snapshot
+   Add to position [0]: SYSTEM: "PATIENT_CONTEXT_JSON: {
+     patient_id: 'patient_15',
+     all_patient_ids: ['patient_4', 'patient_15']
+   }"
+   Shows agents: "Now working on patient_15, patient_4 still exists"
+
+6. Add user's message
+   [1] USER: "switch to patient_15"
+
+7. Agents respond
+   [2] ASSISTANT: "Switched to patient_15. What would you like to review?"
+
+8. Save everything
+   • Snapshot filtered out
+   • Save to: patient_patient_15_context.json (NEW FILE)
+```
+
+**Storage after Turn 3:**
+```
 conversation_abc123/
-├── patient_patient_4_context.json     ← Unchanged (isolated)
-├── patient_patient_15_context.json    ← Created
-└── patient_context_registry.json      ← Updated
+├── patient_patient_4_context.json  ← Still has patient_4's history (4 messages)
+├── patient_patient_15_context.json ← NEW: patient_15's history (2 messages)
+└── patient_context_registry.json   ← Updated
     {
-      "active_patient_id": "patient_15",  ← Changed
+      "active_patient_id": "patient_15",  ← Changed from patient_4
       "patient_registry": {
         "patient_4": {...},
         "patient_15": {...}  ← Added
       }
     }
+```
+
+**Key Point:** patient_4's history is completely isolated and unchanged!
+
+---
 
-═══════════════════════════════════════════════════════════════
-TURN 4: Clear all patient contexts
-═══════════════════════════════════════════════════════════════
-
-User: "clear patient context"
-
-STEP 2: Clear command detected ✅
-  ↓
-_handle_clear_command():
-  ├─ Archive session_context.json
-  ├─ Archive patient_patient_4_context.json
-  ├─ Archive patient_patient_15_context.json
-  ├─ Archive patient_context_registry.json
-  ├─ All archived to: archive/20250930T164500/
-  ├─ Delete all original files
-  ├─ Reset in-memory state
-  └─ Write empty session_context.json
-
-[Storage After Clear]
+#### **Turn 4: Clearing everything**
+
+**User types:** `"clear patient context"`
+
+**What happens:**
+
+```
+1. Load session file
+   Result: Empty
+
+2. Check if user said "clear"
+   Result: YES ✅
+
+3. Clear command handler runs
+   • Find all files:
+     - patient_patient_4_context.json
+     - patient_patient_15_context.json
+     - patient_context_registry.json
+   
+   • Create archive folder: archive/20250930T164500/
+   
+   • Copy each file to archive with timestamp:
+     archive/20250930T164500/conversation_abc123/
+       ├── 20250930T164500_patient_patient_4_archived.json
+       ├── 20250930T164500_patient_patient_15_archived.json
+       └── 20250930T164500_patient_context_registry_archived.json
+   
+   • Delete original files
+   
+   • Reset in-memory state:
+     - chat_ctx.patient_id = None
+     - chat_ctx.patient_contexts = {}
+   
+   • Send message: "Patient context cleared"
+
+4-8. Skipped (already returned after clear)
+```
+
+**Storage after Turn 4:**
+```
 conversation_abc123/
 └── archive/
     └── 20250930T164500/
         └── conversation_abc123/
-            ├── 20250930T164500_session_archived.json
             ├── 20250930T164500_patient_patient_4_archived.json
             ├── 20250930T164500_patient_patient_15_archived.json
             └── 20250930T164500_patient_context_registry_archived.json
 ```
 
----
-
-## Migration Benefits
+**Result:** Clean slate! Ready for new patients.
 
-### Safety Improvements
+---
 
-| Before | After | Benefit |
-|--------|-------|---------|
-| ❌ All patients in same history | ✅ Separate files per patient | **No cross-contamination** |
-| ❌ Agent sees all patient data | ✅ Agent sees only active patient | **Data isolation** |
-| ❌ Switching loses context | ✅ Switching preserves history | **Context continuity** |
-| ❌ No audit trail | ✅ Registry + archives | **Compliance & debugging** |
+### Visual Summary
 
-### User Experience Improvements
+```
+Turn 1: "review patient_4"
+  → Create patient_4 ✅
+  → patient_4_context.json created
+  → Registry: active = patient_4
 
-| Before | After | Benefit |
-|--------|-------|---------|
-| ❌ Manual patient tracking | ✅ Automatic detection | **Reduced friction** |
-| ❌ No active patient visibility | ✅ PT_CTX footer every response | **Transparency** |
-| ❌ Can't work on multiple patients | ✅ Multi-patient sessions | **Workflow flexibility** |
-| ❌ Facilitator loops endlessly | ✅ Confirmation gate | **Plan validation** |
+Turn 2: "yes proceed"
+  → Continue with patient_4 ✅
+  → patient_4_context.json updated (more messages)
+  → Registry: active = patient_4 (unchanged)
 
-### Technical Improvements
+Turn 3: "switch to patient_15"
+  → Create patient_15 ✅
+  → patient_15_context.json created
+  → patient_4_context.json untouched
+  → Registry: active = patient_15
 
-| Before | After | Benefit |
-|--------|-------|---------|
-| ❌ Single storage file | ✅ Per-patient + session + registry | **Scalability** |
-| ❌ No patient awareness | ✅ Ephemeral snapshots | **Agent grounding** |
-| ❌ Simple clear | ✅ Bulk archive with timestamps | **Organized history** |
-| ❌ False terminations | ✅ Deterministic overrides | **Stable orchestration** |
-| ❌ Hardcoded patterns | ✅ Configurable via env var | **Flexibility** |
+Turn 4: "clear patient context"
+  → Archive all files ✅
+  → Delete originals
+  → Ready for fresh start
+```
 
 ---
 
 ## Configuration
 
-### Customizing Patient ID Format
+### Customizing Patient ID Pattern
+
+By default, the system accepts IDs like `patient_4`, `patient_15`, etc.
 
-Set the `PATIENT_ID_PATTERN` environment variable:
+You can customize this via environment variable:
 
 ```bash
-# Default: patient_<digits>
+# Default pattern
 export PATIENT_ID_PATTERN="^patient_[0-9]+$"
 
-# MRN format
-export PATIENT_ID_PATTERN="^mrn-[A-Z0-9]{6}$"
+# Medical Record Number format
+export PATIENT_ID_PATTERN="^MRN[0-9]{7}$"
+# Accepts: MRN1234567
 
 # Multiple formats
-export PATIENT_ID_PATTERN="^(patient_[0-9]+|mrn-[A-Z0-9]{6})$"
+export PATIENT_ID_PATTERN="^(patient_[0-9]+|MRN[0-9]{7})$"
+# Accepts: patient_4 OR MRN1234567
 ```
 
 > [!IMPORTANT]
-> When changing the pattern, update the analyzer prompt in `patient_context_analyzer.py` to match.
+> If you change the pattern, update the analyzer prompt in `patient_context_analyzer.py` to match.
+
+---
+
+## Key Concepts Explained
+
+### What is "Ephemeral Snapshot"?
+
+**Simple explanation:** A temporary system message that tells agents about the current patient. It's generated fresh every turn and **never saved**.
+
+```python
+# Generated every turn:
+snapshot = {
+    "patient_id": "patient_4",
+    "all_patient_ids": ["patient_4", "patient_15"],
+    "generated_at": "2025-09-30T16:45:00Z"
+}
+
+# Injected as message:
+SYSTEM: "PATIENT_CONTEXT_JSON: {snapshot}"
+
+# Agents see this and know: "I'm working on patient_4"
+
+# When saving: This message is filtered out (never persisted)
+```
+
+### What is "Kernel Reset"?
+
+**Simple explanation:** When switching patients, the analyzer's AI is reset to prevent mixing patient data.
+
+```python
+# Without reset:
+User: "review patient_4"
+Analyzer: [builds understanding of patient_4]
+User: "switch to patient_15"
+Analyzer: [still has patient_4 context in memory] ❌
+
+# With reset:
+User: "review patient_4"
+Analyzer: [builds understanding of patient_4]
+User: "switch to patient_15"
+Service: kernel.reset()  # Clears analyzer memory
+Analyzer: [fresh start for patient_15] ✅
+```
+
+### What is "Registry as Source of Truth"?
+
+**Simple explanation:** The registry file always has the correct answer for "which patient is active".
+
+```python
+# Registry file:
+{ "active_patient_id": "patient_4" }
+
+# When loading:
+1. Read registry → "patient_4 is active"
+2. Load patient_4_context.json
+3. Generate fresh snapshot from registry
+4. Inject snapshot
+
+# Benefits:
+- No stale snapshots
+- Always accurate
+- Single source of truth
+```
 
 ---
 
 ## Summary
 
-The patient context system enables:
+**What you need to remember:**
 
-1. **Multi-patient conversations** - Work on multiple patients in one session
-2. **Complete isolation** - Each patient's history stored separately
-3. **Automatic detection** - LLM-based intent classification
-4. **Safe switching** - Kernel reset prevents cross-contamination
-5. **Ephemeral grounding** - Fresh snapshots never persisted
-6. **Registry-backed** - Single source of truth for active patient
-7. **Stable orchestration** - Confirmation gates + termination overrides
-8. **Organized archival** - Timestamped bulk archives for compliance
+1. **Each patient gets their own history file** - Complete isolation
+2. **Registry tracks which patient is active** - Single source of truth
+3. **LLM automatically detects patient intent** - No manual parsing
+4. **Fresh snapshot injected every turn** - Never persisted
+5. **Agents see current patient context** - Grounded responses
+6. **Safe switching with kernel reset** - No cross-contamination
+7. **Bulk archival on clear** - Organized and complete
 
-For quick reference, see [`patient_context.md`](patient_context.md).
+**The result:** You can work on multiple patients in one conversation, switch between them seamlessly, and the system keeps everything organized and isolated.
 
 ---
 
-**Last Updated:** September 30, 2025  
-**Status:** Stable in production (`sekar/pc_poc` branch)
+**Last Updated:** October 1, 2025  
+**Status:** Production-ready (`sekar/pc_poc` branch)
\ No newline at end of file