diff --git a/pyproject.toml b/pyproject.toml
index e6c780fa8..570825a9d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,7 +39,8 @@ dependencies = [
     "keyring>=24.3.1",
     "python-frontmatter>=1.1.0",
     "agent-client-protocol>=0.7.0",
-    "tiktoken>=0.12.0"
+    "tiktoken>=0.12.0",
+    "uvloop>=0.22.1",
 ]
 
 [project.optional-dependencies]
diff --git a/src/fast_agent/acp/slash_commands.py b/src/fast_agent/acp/slash_commands.py
index 9bf1250d4..34c7ea7d2 100644
--- a/src/fast_agent/acp/slash_commands.py
+++ b/src/fast_agent/acp/slash_commands.py
@@ -558,7 +558,15 @@ async def _handle_status_system(self) -> str:
         if error:
             return error
 
-        system_prompt = agent.instruction if isinstance(agent, InstructionAwareAgent) else None
+        agent_name = (
+            agent.name if isinstance(agent, InstructionAwareAgent) else self.current_agent_name
+        )
+
+        system_prompt = None
+        if agent_name in self._session_instructions:
+            system_prompt = self._session_instructions[agent_name]
+        elif isinstance(agent, InstructionAwareAgent):
+            system_prompt = agent.instruction
         if not system_prompt:
             return "\n".join(
                 [
@@ -569,9 +577,6 @@ async def _handle_status_system(self) -> str:
             )
 
         # Format the response
-        agent_name = (
-            agent.name if isinstance(agent, InstructionAwareAgent) else self.current_agent_name
-        )
         lines = [
             heading,
             "",
diff --git a/src/fast_agent/cli/__main__.py b/src/fast_agent/cli/__main__.py
index 43e6dd4d5..91c05f1d7 100644
--- a/src/fast_agent/cli/__main__.py
+++ b/src/fast_agent/cli/__main__.py
@@ -4,14 +4,21 @@
 
 from fast_agent.cli.constants import GO_SPECIFIC_OPTIONS, KNOWN_SUBCOMMANDS
 from fast_agent.cli.main import app
+from fast_agent.utils.async_utils import configure_uvloop, ensure_event_loop
 
 # if the arguments would work with "go" we'll just route to it
 
 
 def main():
     """Main entry point that handles auto-routing to 'go' command."""
+    requested_uvloop, enabled_uvloop = configure_uvloop()
+    if requested_uvloop and not enabled_uvloop:
+        print(
+            "FAST_AGENT_UVLOOP is set but uvloop is unavailable; falling back to asyncio.",
+            file=sys.stderr,
+        )
     try:
-        loop = asyncio.get_event_loop()
+        loop = ensure_event_loop()
 
         def _log_asyncio_exception(loop: asyncio.AbstractEventLoop, context: dict) -> None:
             import logging
diff --git a/src/fast_agent/cli/commands/README.md b/src/fast_agent/cli/commands/README.md
index d6de8bbaa..112d34691 100644
--- a/src/fast_agent/cli/commands/README.md
+++ b/src/fast_agent/cli/commands/README.md
@@ -118,3 +118,7 @@ fast-agent serve --description "Interact with the {agent} workflow via MCP"
 # Use per-connection instances to isolate history between clients
 fast-agent serve --instance-scope=connection --transport=http
 ```
+
+### Environment toggles
+
+- uvloop is enabled by default when installed (non-Windows); set `FAST_AGENT_DISABLE_UV_LOOP=1` to opt out.
diff --git a/src/fast_agent/cli/commands/auth.py b/src/fast_agent/cli/commands/auth.py
index 266b3fcba..493c5e39f 100644
--- a/src/fast_agent/cli/commands/auth.py
+++ b/src/fast_agent/cli/commands/auth.py
@@ -16,6 +16,7 @@
     list_keyring_tokens,
 )
 from fast_agent.ui.console import console
+from fast_agent.utils.async_utils import run_sync
 
 app = typer.Typer(help="Manage OAuth authentication state for MCP servers")
 
@@ -395,9 +396,7 @@ async def _run_login():
             typer.echo(f"Login failed: {e}")
             return False
 
-    import asyncio
-
-    ok = asyncio.run(_run_login())
+    ok = bool(run_sync(_run_login))
     if ok:
         from fast_agent.mcp.oauth_client import compute_server_identity
 
diff --git a/src/fast_agent/cli/commands/go.py b/src/fast_agent/cli/commands/go.py
index 39c1c6a18..6e4c2c889 100644
--- a/src/fast_agent/cli/commands/go.py
+++ b/src/fast_agent/cli/commands/go.py
@@ -9,12 +9,10 @@
 
 import typer
 
-from fast_agent import FastAgent
-from fast_agent.agents.llm_agent import LlmAgent
 from fast_agent.cli.commands.server_helpers import add_servers_to_config, generate_server_name
 from fast_agent.cli.commands.url_parser import generate_server_configs, parse_server_urls
 from fast_agent.constants import DEFAULT_AGENT_INSTRUCTION
-from fast_agent.ui.console_display import ConsoleDisplay
+from fast_agent.utils.async_utils import configure_uvloop, create_event_loop, ensure_event_loop
 
 app = typer.Typer(
     help="Run an interactive agent directly from the command line without creating an agent.py file",
@@ -126,7 +124,10 @@ async def _run_agent(
     permissions_enabled: bool = True,
 ) -> None:
     """Async implementation to run an interactive agent."""
+    from fast_agent import FastAgent
+    from fast_agent.agents.llm_agent import LlmAgent
     from fast_agent.mcp.prompts.prompt_load import load_prompt
+    from fast_agent.ui.console_display import ConsoleDisplay
 
     # Create the FastAgent instance
 
@@ -273,6 +274,7 @@ def run_async_agent(
     permissions_enabled: bool = True,
 ):
     """Run the async agent function with proper loop handling."""
+    configure_uvloop()
     server_list = servers.split(",") if servers else None
 
     # Parse URLs and generate server configurations if provided
@@ -346,19 +348,12 @@ def run_async_agent(
                 continue
 
     # Check if we're already in an event loop
-    try:
-        loop = asyncio.get_event_loop()
-        if loop.is_running():
-            # We're inside a running event loop, so we can't use asyncio.run
-            # Instead, create a new loop
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-        _set_asyncio_exception_handler(loop)
-    except RuntimeError:
-        # No event loop exists, so we'll create one
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        _set_asyncio_exception_handler(loop)
+    loop = ensure_event_loop()
+    if loop.is_running():
+        # We're inside a running event loop, so we can't use asyncio.run
+        # Instead, create a new loop
+        loop = create_event_loop()
+    _set_asyncio_exception_handler(loop)
 
     try:
         loop.run_until_complete(
diff --git a/src/fast_agent/cli/main.py b/src/fast_agent/cli/main.py
index fa8343667..f5e7498bc 100644
--- a/src/fast_agent/cli/main.py
+++ b/src/fast_agent/cli/main.py
@@ -1,26 +1,51 @@
 """Main CLI entry point for MCP Agent."""
 
+import importlib
+
+import click
 import typer
-from rich.table import Table
+import typer.main
+from typer.core import TyperGroup
 
-from fast_agent.cli.commands import acp, auth, check_config, go, quickstart, serve, setup
 from fast_agent.cli.terminal import Application
 from fast_agent.ui.console import console as shared_console
 
+LAZY_SUBCOMMANDS: dict[str, str] = {
+    "go": "fast_agent.cli.commands.go:app",
+    "serve": "fast_agent.cli.commands.serve:app",
+    "acp": "fast_agent.cli.commands.acp:app",
+    "setup": "fast_agent.cli.commands.setup:app",
+    "check": "fast_agent.cli.commands.check_config:app",
+    "auth": "fast_agent.cli.commands.auth:app",
+    "quickstart": "fast_agent.cli.commands.quickstart:app",
+    "bootstrap": "fast_agent.cli.commands.quickstart:app",
+}
+
+
+class LazyGroup(TyperGroup):
+    lazy_subcommands: dict[str, str] = {}
+
+    def list_commands(self, ctx: click.Context) -> list[str]:
+        return sorted(self.lazy_subcommands)
+
+    def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command | None:
+        target = self.lazy_subcommands.get(cmd_name)
+        if not target:
+            return None
+        module_path, app_name = target.split(":", 1)
+        module = importlib.import_module(module_path)
+        typer_app = getattr(module, app_name)
+        command = typer.main.get_command(typer_app)
+        command.name = cmd_name
+        return command
+
+
 app = typer.Typer(
+    cls=LazyGroup,
     help="Use `fast-agent go --help` for interactive shell arguments and options.",
     add_completion=False,  # We'll add this later when we have more commands
 )
-
-# Subcommands
-app.add_typer(go.app, name="go", help="Run an interactive agent directly from the command line")
-app.add_typer(serve.app, name="serve", help="Run FastAgent as an MCP server")
-app.add_typer(acp.app, name="acp", help="Run FastAgent as an ACP stdio server")
-app.add_typer(setup.app, name="setup", help="Set up a new agent project")
-app.add_typer(check_config.app, name="check", help="Show or diagnose fast-agent configuration")
-app.add_typer(auth.app, name="auth", help="Manage OAuth authentication for MCP servers")
-app.add_typer(quickstart.app, name="bootstrap", help="Create example applications")
-app.add_typer(quickstart.app, name="quickstart", help="Create example applications")
+LazyGroup.lazy_subcommands = LAZY_SUBCOMMANDS
 
 # Shared application context
 application = Application()
@@ -32,6 +57,7 @@ def show_welcome() -> None:
     """Show a welcome message with available commands, using new styling."""
     from importlib.metadata import version
 
+    from rich.table import Table
     from rich.text import Text
 
     try:
diff --git a/src/fast_agent/context.py b/src/fast_agent/context.py
index b5968f4c4..a3c37c86c 100644
--- a/src/fast_agent/context.py
+++ b/src/fast_agent/context.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-import asyncio
-import concurrent.futures
 import logging
 import uuid
 from os import PathLike
@@ -27,6 +25,7 @@
 from fast_agent.core.logging.transport import create_transport
 from fast_agent.mcp_server_registry import ServerRegistry
 from fast_agent.skills import SkillRegistry
+from fast_agent.utils.async_utils import run_sync
 
 if TYPE_CHECKING:
     from fast_agent.acp.acp_context import ACPContext
@@ -253,22 +252,10 @@ def get_current_context() -> Context:
     """
     global _global_context
     if _global_context is None:
-        try:
-            # Try to get the current event loop
-            loop = asyncio.get_event_loop()
-            if loop.is_running():
-                # Create a new loop in a separate thread
-                def run_async():
-                    new_loop = asyncio.new_event_loop()
-                    asyncio.set_event_loop(new_loop)
-                    return new_loop.run_until_complete(initialize_context())
-
-                with concurrent.futures.ThreadPoolExecutor() as pool:
-                    _global_context = pool.submit(run_async).result()
-            else:
-                _global_context = loop.run_until_complete(initialize_context())
-        except RuntimeError:
-            _global_context = asyncio.run(initialize_context())
+        result = run_sync(initialize_context)
+        if result is None:
+            raise RuntimeError("Failed to initialize global context")
+        _global_context = result
     return _global_context
 
 
diff --git a/src/fast_agent/core/executor/workflow_signal.py b/src/fast_agent/core/executor/workflow_signal.py
index 52ef3ddfe..da4d34b94 100644
--- a/src/fast_agent/core/executor/workflow_signal.py
+++ b/src/fast_agent/core/executor/workflow_signal.py
@@ -143,8 +143,8 @@ async def wait_for_signal(self, signal, timeout_seconds=None):
         if timeout_seconds:
             print(f"(Timeout in {timeout_seconds} seconds)")
 
-        # Use asyncio.get_event_loop().run_in_executor to make input non-blocking
-        loop = asyncio.get_event_loop()
+        # Use asyncio.get_running_loop().run_in_executor to make input non-blocking
+        loop = asyncio.get_running_loop()
         if timeout_seconds is not None:
             try:
                 value = await asyncio.wait_for(
diff --git a/src/fast_agent/core/logging/logger.py b/src/fast_agent/core/logging/logger.py
index 73be23300..311e24cfa 100644
--- a/src/fast_agent/core/logging/logger.py
+++ b/src/fast_agent/core/logging/logger.py
@@ -21,6 +21,7 @@
     ProgressListener,
 )
 from fast_agent.core.logging.transport import AsyncEventBus, EventTransport
+from fast_agent.utils.async_utils import ensure_event_loop
 
 
 class Logger:
@@ -34,19 +35,9 @@ def __init__(self, namespace: str) -> None:
         self.namespace = namespace
         self.event_bus = AsyncEventBus.get()
 
-    def _ensure_event_loop(self):
-        """Ensure we have an event loop we can use."""
-        try:
-            return asyncio.get_running_loop()
-        except RuntimeError:
-            # If no loop is running, create a new one
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            return loop
-
     def _emit_event(self, event: Event) -> None:
         """Emit an event by running it in the event loop."""
-        loop = self._ensure_event_loop()
+        loop = ensure_event_loop()
         if loop.is_running():
             # If we're in a thread with a running loop, schedule the coroutine
             asyncio.create_task(self.event_bus.emit(event))
diff --git a/src/fast_agent/core/logging/transport.py b/src/fast_agent/core/logging/transport.py
index 871354265..c14a1fe8f 100644
--- a/src/fast_agent/core/logging/transport.py
+++ b/src/fast_agent/core/logging/transport.py
@@ -22,7 +22,7 @@
 from fast_agent.core.logging.json_serializer import JSONSerializer
 from fast_agent.core.logging.listeners import EventListener, LifecycleAwareListener
 from fast_agent.ui.console import console
-from fast_agent.utils.async_utils import gather_with_cancel
+from fast_agent.utils.async_utils import ensure_event_loop, gather_with_cancel
 
 
 class EventTransport(Protocol):
@@ -304,11 +304,7 @@ async def start(self) -> None:
         if self._running:
             return
 
-        try:
-            asyncio.get_running_loop()
-        except RuntimeError:
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
+        ensure_event_loop()
 
         self._queue = asyncio.Queue()
 
diff --git a/src/fast_agent/human_input/simple_form.py b/src/fast_agent/human_input/simple_form.py
index d0612a3f6..a6f141392 100644
--- a/src/fast_agent/human_input/simple_form.py
+++ b/src/fast_agent/human_input/simple_form.py
@@ -1,11 +1,11 @@
 """Simple form API for elicitation schemas without MCP wrappers."""
 
-import asyncio
 from typing import Any, Union
 
 from mcp.types import ElicitRequestedSchema
 
 from fast_agent.human_input.form_fields import FormSchema
+from fast_agent.utils.async_utils import run_sync
 
 
 async def form(
@@ -76,7 +76,7 @@ def form_sync(
     Returns:
         Dict with form data if accepted, None if cancelled/declined
     """
-    return asyncio.run(form(schema, message, title))
+    return run_sync(form, schema, message, title)
 
 
 # Convenience function with a shorter name
diff --git a/src/fast_agent/llm/hf_inference_lookup.py b/src/fast_agent/llm/hf_inference_lookup.py
index 0aecfa0f2..321b56e4a 100644
--- a/src/fast_agent/llm/hf_inference_lookup.py
+++ b/src/fast_agent/llm/hf_inference_lookup.py
@@ -6,7 +6,6 @@
 
 from __future__ import annotations
 
-import asyncio
 import random
 from enum import Enum
 from typing import TYPE_CHECKING
@@ -14,6 +13,8 @@
 import httpx
 from pydantic import BaseModel, Field, computed_field
 
+from fast_agent.utils.async_utils import run_sync
+
 if TYPE_CHECKING:
     from collections.abc import Awaitable, Callable
     from typing import Any
@@ -230,7 +231,10 @@ def lookup_inference_providers_sync(
     Returns:
         InferenceProviderLookupResult with provider information
     """
-    return asyncio.run(lookup_inference_providers(model_id, timeout))
+    result = run_sync(lookup_inference_providers, model_id, timeout)
+    if result is None:
+        raise RuntimeError("Inference provider lookup returned no result")
+    return result
 
 
 def format_inference_lookup_message(result: InferenceProviderLookupResult) -> str:
diff --git a/src/fast_agent/mcp/prompts/prompt_server.py b/src/fast_agent/mcp/prompts/prompt_server.py
index 2829639e8..fa56a09c8 100644
--- a/src/fast_agent/mcp/prompts/prompt_server.py
+++ b/src/fast_agent/mcp/prompts/prompt_server.py
@@ -6,7 +6,6 @@
 """
 
 import argparse
-import asyncio
 import base64
 import logging
 import sys
@@ -39,6 +38,7 @@
     PromptTemplateLoader,
 )
 from fast_agent.types import PromptMessageExtended
+from fast_agent.utils.async_utils import run_sync
 
 # Configure logging
 logging.basicConfig(level=logging.ERROR)
@@ -534,7 +534,8 @@ async def async_main() -> int:
 def main() -> int:
     """Run the FastMCP server"""
     try:
-        return asyncio.run(async_main())
+        result = run_sync(async_main)
+        return result if result is not None else 1
     except KeyboardInterrupt:
         logger.info("\nServer stopped by user")
     except Exception as e:
diff --git a/src/fast_agent/mcp/server/agent_server.py b/src/fast_agent/mcp/server/agent_server.py
index 841da3dbb..8ba6b8977 100644
--- a/src/fast_agent/mcp/server/agent_server.py
+++ b/src/fast_agent/mcp/server/agent_server.py
@@ -17,6 +17,7 @@
 import fast_agent.core.prompt
 from fast_agent.core.fastagent import AgentInstance
 from fast_agent.core.logging.logger import get_logger
+from fast_agent.utils.async_utils import run_sync
 
 logger = get_logger(__name__)
 
@@ -332,7 +333,7 @@ def run(
             finally:
                 # Run an async cleanup in a new event loop
                 try:
-                    asyncio.run(self.shutdown())
+                    run_sync(self.shutdown)
                 except (SystemExit, KeyboardInterrupt):
                     # These are expected during shutdown
                     pass
@@ -343,7 +344,7 @@ def run(
                 print("\nServer stopped by user (CTRL+C)")
             finally:
                 # Minimal cleanup for stdio
-                asyncio.run(self._cleanup_stdio())
+                run_sync(self._cleanup_stdio)
 
     async def run_async(
         self, transport: TransportMode = "http", host: str = "0.0.0.0", port: int = 8000
diff --git a/src/fast_agent/ui/console_display.py b/src/fast_agent/ui/console_display.py
index 4cde21ff4..0874a9c36 100644
--- a/src/fast_agent/ui/console_display.py
+++ b/src/fast_agent/ui/console_display.py
@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING, Any, Iterator, Mapping, Union
 
 from mcp.types import CallToolResult
+from rich.console import Group
 from rich.markdown import Markdown
 from rich.panel import Panel
 from rich.text import Text
@@ -166,7 +167,7 @@ def display_message(
         is_error: bool = False,
         truncate_content: bool = True,
         additional_message: Text | None = None,
-        pre_content: Text | None = None,
+        pre_content: Text | Group | None = None,
     ) -> None:
         """
         Unified method to display formatted messages to the console.
@@ -204,8 +205,12 @@ def display_message(
         self._create_combined_separator_status(left, right_info)
 
         # Display the content
-        if pre_content and pre_content.plain:
-            console.console.print(pre_content, markup=self._markup)
+        if pre_content:
+            if isinstance(pre_content, Text):
+                if pre_content.plain:
+                    console.console.print(pre_content, markup=self._markup)
+            else:
+                console.console.print(pre_content, markup=self._markup)
         self._display_content(
             content, truncate_content, is_error, message_type, check_markdown_markers=False
         )
@@ -304,8 +309,12 @@ def _display_content(
                             console.console.print(content, markup=self._markup)
                 else:
                     # Check if content has substantial XML (mixed content)
-                    # If so, skip markdown rendering as it turns XML into an unreadable blob
-                    has_substantial_xml = content.count("<") > 5 and content.count(">") > 5
+                    # If so, skip markdown rendering as it turns XML into an unreadable blob.
+                    # Ignore markdown autolinks like <https://...>.
+                    xml_probe = re.sub(r"<(?:https?://|mailto:)[^>]+>", "", content)
+                    has_substantial_xml = (
+                        xml_probe.count("<") > 5 and xml_probe.count(">") > 5
+                    )
 
                     # Check if it looks like markdown
                     if self._looks_like_markdown(content) and not has_substantial_xml:
@@ -642,7 +651,9 @@ def show_skybridge_summary(
     ) -> None:
         self._tool_display.show_skybridge_summary(agent_name, configs)
 
-    def _extract_reasoning_content(self, message: "PromptMessageExtended") -> Text | None:
+    def _extract_reasoning_content(
+        self, message: "PromptMessageExtended"
+    ) -> Text | Group | None:
         """Extract reasoning channel content as dim text."""
         channels = message.channels or {}
         reasoning_blocks = channels.get(REASONING) or []
@@ -665,6 +676,21 @@ def _extract_reasoning_content(self, message: "PromptMessageExtended") -> Text |
             return None
 
         # Render reasoning in dim italic and leave a blank line before main content
+        if self._looks_like_markdown(joined):
+            try:
+                prepared = prepare_markdown_content(joined, self._escape_xml)
+                markdown = Markdown(
+                    prepared,
+                    code_theme=self.code_style,
+                    style="dim italic",
+                )
+                return Group(markdown, Text("\n"))
+            except Exception as exc:
+                logger.exception(
+                    "Failed to render reasoning markdown",
+                    data={"error": str(exc)},
+                )
+
         text = joined
         if not text.endswith("\n"):
             text += "\n"
@@ -699,7 +725,7 @@ async def show_assistant_message(
         # Extract text from PromptMessageExtended if needed
         from fast_agent.types import PromptMessageExtended
 
-        pre_content: Text | None = None
+        pre_content: Text | Group | None = None
 
         if isinstance(message_text, PromptMessageExtended):
             display_text = message_text.last_text() or ""
@@ -899,7 +925,7 @@ def show_user_message(
         right_info = f"[dim]{' '.join(right_parts)}[/dim]" if right_parts else ""
 
         # Build attachment indicator as pre_content
-        pre_content: Text | None = None
+        pre_content: Text | Group | None = None
         if attachments:
             pre_content = Text()
             pre_content.append("🔗 ", style="dim")
diff --git a/src/fast_agent/ui/markdown_truncator.py b/src/fast_agent/ui/markdown_truncator.py
index a38394eaf..11d65842d 100644
--- a/src/fast_agent/ui/markdown_truncator.py
+++ b/src/fast_agent/ui/markdown_truncator.py
@@ -1,1010 +1,127 @@
-"""Smart markdown truncation that preserves markdown context.
+"""Markdown truncation optimized for streaming displays.
 
-This module provides intelligent truncation of markdown text for streaming displays,
-ensuring that markdown structures (code blocks, lists, blockquotes) are preserved
-when possible, and gracefully degrading when single blocks are too large.
-
-KEY CONCEPT: Truncation Strategy
-=================================
-
-In STREAMING MODE (prefer_recent=True):
-  - Always show MOST RECENT content (keep end, remove beginning)
-  - Why: Users are following along as content streams in. They want to see the
-    current position, not what was written at the start.
-  - For TABLES: Show the most recent rows while preserving the header
-  - Example: Table with 100 rows - show header + last 20 rows (not first 20)
-
-In STATIC MODE (prefer_recent=False):
-  - For TABLE-DOMINANT content (>50% table lines): Show FIRST page
-  - For TEXT content: Show MOST RECENT
-  - Example: Tool output listing 100 files - show header + first 20 rows
-
-Context Preservation
-====================
-
-When truncating removes the opening of a structure, we restore it:
-- CODE BLOCKS: Prepend ```language fence (only if it was removed)
-- TABLES: Prepend header row + separator row (only if they were removed)
-
-This ensures truncated content still renders correctly as markdown.
+This module keeps the most recent portion of a markdown stream within a
+viewport budget. It preserves code block fences and table headers without
+requiring expensive render passes.
 """
 
-from dataclasses import dataclass
-from typing import Iterable
-
-from markdown_it import MarkdownIt
-from markdown_it.token import Token
-from rich.console import Console
-from rich.markdown import Markdown
-from rich.segment import Segment
-
+from __future__ import annotations
 
-@dataclass
-class TruncationPoint:
-    """Represents a position in text where truncation is safe."""
+from typing import TYPE_CHECKING
 
-    char_position: int
-    block_type: str
-    token: Token
-    is_closing: bool
+from fast_agent.ui.streaming_buffer import StreamBuffer
 
-
-@dataclass
-class CodeBlockInfo:
-    """Information about a code block in the document."""
-
-    start_pos: int
-    end_pos: int
-    fence_line: int
-    language: str
-    fence_text: str | None
-    token: Token
-
-
-@dataclass
-class TableInfo:
-    """Information about a table in the document."""
-
-    start_pos: int
-    end_pos: int
-    thead_start_pos: int
-    thead_end_pos: int
-    tbody_start_pos: int
-    tbody_end_pos: int
-    header_lines: list[str]  # Header + separator rows
+if TYPE_CHECKING:
+    from rich.console import Console
 
 
 class MarkdownTruncator:
-    """Handles intelligent truncation of markdown text while preserving context."""
-
-    def __init__(self, target_height_ratio: float = 0.8):
-        """Initialize the truncator.
+    """Handles lightweight markdown truncation for streaming output."""
 
-        Args:
-            target_height_ratio: Target height as ratio of terminal height (0.0-1.0).
-                After truncation, aim to keep content at this ratio of terminal height.
-        """
+    def __init__(self, target_height_ratio: float = 0.8) -> None:
+        if not 0 < target_height_ratio <= 1:
+            raise ValueError("target_height_ratio must be between 0 and 1")
         self.target_height_ratio = target_height_ratio
-        self.parser = MarkdownIt().enable("strikethrough").enable("table")
-        # Cache for streaming mode to avoid redundant work
-        self._last_full_text: str | None = None
-        self._last_truncated_text: str | None = None
-        self._last_terminal_height: int | None = None
-        # Markdown parse cache
-        self._cache_source: str | None = None
-        self._cache_tokens: list[Token] | None = None
-        self._cache_lines: list[str] | None = None
-        self._cache_safe_points: list[TruncationPoint] | None = None
-        self._cache_code_blocks: list[CodeBlockInfo] | None = None
-        self._cache_tables: list[TableInfo] | None = None
+        self._buffer = StreamBuffer(target_height_ratio=target_height_ratio)
 
     def truncate(
         self,
         text: str,
         terminal_height: int,
-        console: Console,
+        console: Console | None,
         code_theme: str = "monokai",
         prefer_recent: bool = False,
     ) -> str:
-        """Truncate markdown text to fit within terminal height.
-
-        This method attempts to truncate at safe block boundaries (between paragraphs,
-        after code blocks, etc.). If no safe boundary works (e.g., single block is
-        too large), it falls back to character-based truncation.
+        """Return the most recent portion of text that fits the viewport.
 
         Args:
             text: The markdown text to truncate.
             terminal_height: Height of the terminal in lines.
-            console: Rich Console instance for measuring rendered height.
-            code_theme: Theme for code syntax highlighting.
-            prefer_recent: If True, always show most recent content (streaming mode).
-                This overrides table-dominant detection to ensure streaming tables
-                show the latest rows, not the first rows.
-
-        Returns:
-            Truncated markdown text that fits within target height.
+            console: Rich Console instance used to derive width.
+            code_theme: Unused; kept for compatibility.
+            prefer_recent: Unused; kept for compatibility.
         """
+        del code_theme, prefer_recent
         if not text:
             return text
-
-        # Fast path for streaming: use incremental truncation
-        if prefer_recent:
-            return self._truncate_streaming(text, terminal_height, console, code_theme)
-
-        # Measure current height
-        current_height = self._measure_rendered_height(text, console, code_theme)
-
-        if current_height <= terminal_height:
-            # No truncation needed
-            return text
-
-        target_height = int(terminal_height * self.target_height_ratio)
-
-        # Find safe truncation points (block boundaries)
-        safe_points = self._find_safe_truncation_points(text)
-
-        if not safe_points:
-            # No safe points found, fall back to character truncation
-            truncated = self._truncate_by_characters(text, target_height, console, code_theme)
-            # Ensure code fence is preserved if we truncated within a code block
-            truncated = self._ensure_code_fence_if_needed(text, truncated)
-            # Ensure table header is preserved if we truncated within a table body
-            return self._ensure_table_header_if_needed(text, truncated)
-
-        # Determine truncation strategy BEFORE finding best point
-        # This is needed because _find_best_truncation_point needs to know
-        # which direction to test (keep beginning vs keep end)
-        is_table_content = False if prefer_recent else self._is_primary_content_table(text)
-
-        # Try to find the best truncation point
-        best_point = self._find_best_truncation_point(
-            text, safe_points, target_height, console, code_theme, keep_beginning=is_table_content
+        terminal_width = console.size.width if console else None
+        return self._buffer.truncate_text(
+            text,
+            terminal_height=terminal_height,
+            terminal_width=terminal_width,
+            add_closing_fence=False,
         )
 
-        if best_point is None:
-            # No safe point works, fall back to character truncation
-            truncated = self._truncate_by_characters(text, target_height, console, code_theme)
-            # Ensure code fence is preserved if we truncated within a code block
-            truncated = self._ensure_code_fence_if_needed(text, truncated)
-            # Ensure table header is preserved if we truncated within a table body
-            return self._ensure_table_header_if_needed(text, truncated)
-
-        # ============================================================================
-        # TRUNCATION STRATEGY: Two Different Behaviors
-        # ============================================================================
-        #
-        # We use different truncation strategies depending on content type:
-        #
-        # 1. TABLES: Show FIRST page (keep beginning, remove end)
-        #    - Rationale: Tables are structured data where the header defines meaning.
-        #      Users need to see the header and first rows to understand the data.
-        #      Showing the "most recent" rows without context is meaningless.
-        #    - Example: A file listing table - seeing the last 10 files without the
-        #      header columns (name, size, date) is useless.
-        #    - NOTE: This is overridden when prefer_recent=True (streaming mode)
-        #
-        # 2. STREAMING TEXT: Show MOST RECENT (keep end, remove beginning)
-        #    - Rationale: In streaming assistant responses, the most recent content
-        #      is usually the most relevant. The user is following along as text
-        #      appears, so they want to see "where we are now" not "where we started".
-        #    - Example: A code explanation - seeing the conclusion is more valuable
-        #      than seeing the introduction paragraph that scrolled off.
-        #
-        # Detection: Content is considered "table-dominant" if >50% of lines are
-        # part of table structures (see _is_primary_content_table).
-        # OVERRIDE: When prefer_recent=True, always use "show most recent" strategy.
-        # ============================================================================
-
-        # Note: is_table_content was already determined above before calling _find_best_truncation_point
-
-        if is_table_content:
-            # For tables: keep BEGINNING, truncate END (show first N rows)
-            # Use safe point as END boundary, keep everything before it
-            truncated_text = text[: best_point.char_position]
-
-            # ========================================================================
-            # TABLE HEADER INTEGRITY CHECK
-            # ========================================================================
-            # Markdown tables require both a header row AND a separator line:
-            #
-            #   | Name    | Size | Date       |   <-- Header row
-            #   |---------|------|------------|   <-- Separator (required!)
-            #   | file.py | 2KB  | 2024-01-15 |   <-- Data rows
-            #
-            # If we truncate between the header and separator, the table won't
-            # render at all in markdown. So we need to ensure both are present.
-            # ========================================================================
-            if truncated_text.strip() and "|" in truncated_text:
-                lines_result = truncated_text.split("\n")
-                # Check if we have header but missing separator (dashes)
-                has_header = any("|" in line and "---" not in line for line in lines_result)
-                has_separator = any("---" in line for line in lines_result)
-
-                if has_header and not has_separator:
-                    # We cut off the separator! Find it in original and include it
-                    original_lines = text.split("\n")
-                    for i, line in enumerate(original_lines):
-                        if "---" in line and "|" in line:
-                            # Found separator line - include up to and including this line
-                            truncated_text = "\n".join(original_lines[: i + 1])
-                            break
-        else:
-            # ========================================================================
-            # STREAMING TEXT: Keep END, truncate BEGINNING (show most recent)
-            # ========================================================================
-            # This is the primary use case: assistant is streaming a response, and
-            # the terminal can't show all of it. We want to show what's currently
-            # being written (the end), not what was written minutes ago (the start).
-            # ========================================================================
-            truncated_text = text[best_point.char_position :]
-
-            # ========================================================================
-            # CONTEXT PRESERVATION for Truncated Structures
-            # ========================================================================
-            # When truncating removes the beginning of a structure (code block or
-            # table), we need to restore the opening context so it renders properly.
-            #
-            # CODE BLOCKS: If we truncate mid-block, prepend the opening fence
-            #   Original:  ```python\ndef foo():\n  return 42\n```
-            #   Truncate:  [```python removed] def foo():\n  return 42\n```
-            #   Fixed:     ```python\ndef foo():\n  return 42\n```
-            #
-            # TABLES: If we truncate table data rows, prepend the header
-            #   Original:  | Name | Size |\n|------|------|\n| a | 1 |\n| b | 2 |
-            #   Truncate:  [header removed] | b | 2 |
-            #   Fixed:     | Name | Size |\n|------|------|\n| b | 2 |
-            # ========================================================================
-
-            # Get code block info once for efficient position-based checks
-            code_blocks = self._get_code_block_info(text)
-
-            # Find which code block (if any) contains the truncation point
-            containing_code_block = None
-            for block in code_blocks:
-                if block.start_pos < best_point.char_position < block.end_pos:
-                    containing_code_block = block
-                    break
-
-            # Check if we need special handling for code blocks
-            if containing_code_block:
-                truncated_text = self._handle_code_block_truncation(
-                    containing_code_block, best_point, truncated_text
-                )
-
-            # Get table info once for efficient position-based checks
-            tables = self._get_table_info(text)
-
-            # Find ANY table whose content is in the truncated text but whose header was removed
-            for table in tables:
-                # Check if we truncated somewhere within this table (after the start)
-                # and the truncated text still contains part of this table
-                if (
-                    best_point.char_position > table.start_pos
-                    and best_point.char_position < table.end_pos
-                ):
-                    # We truncated within this table
-                    # Check if the header was removed
-                    # Use >= because if we truncate AT thead_end_pos, the header is already gone
-                    if best_point.char_position >= table.thead_end_pos:
-                        # Header was removed - prepend it
-                        header_text = "\n".join(table.header_lines) + "\n"
-                        truncated_text = header_text + truncated_text
-                        break  # Only restore one table header
-
-        return truncated_text
-
-    def _ensure_parse_cache(self, text: str) -> None:
-        if self._cache_source == text:
-            return
-
-        tokens = self.parser.parse(text)
-        self._cache_source = text
-        self._cache_tokens = tokens
-        self._cache_lines = text.split("\n")
-        self._cache_safe_points = None
-        self._cache_code_blocks = None
-        self._cache_tables = None
-
-    def _find_safe_truncation_points(self, text: str) -> list[TruncationPoint]:
-        """Find safe positions to truncate at (block boundaries).
-
-        Args:
-            text: The markdown text to analyze.
-
-        Returns:
-            List of TruncationPoint objects representing safe truncation positions.
-        """
-        self._ensure_parse_cache(text)
-        if self._cache_safe_points is not None:
-            return list(self._cache_safe_points)
-
-        assert self._cache_tokens is not None
-        assert self._cache_lines is not None
-
-        safe_points: list[TruncationPoint] = []
-        tokens = self._cache_tokens
-        lines = self._cache_lines
-
-        for token in tokens:
-            # We're interested in block-level tokens with map information
-            # Opening tokens (nesting=1) and self-closing tokens (nesting=0) have map info
-            if token.map is not None:
-                # token.map gives [start_line, end_line] (0-indexed)
-                end_line = token.map[1]
-
-                # Calculate character position at end of this block
-                if end_line <= len(lines):
-                    char_pos = sum(len(line) + 1 for line in lines[:end_line])
-
-                    safe_points.append(
-                        TruncationPoint(
-                            char_position=char_pos,
-                            block_type=token.type,
-                            token=token,
-                            is_closing=(token.nesting == 0),  # Self-closing or block end
-                        )
-                    )
-        self._cache_safe_points = safe_points
-        return list(safe_points)
-
-    def _get_code_block_info(self, text: str) -> list[CodeBlockInfo]:
-        """Extract code block positions and metadata using markdown-it.
-
-        Uses same technique as prepare_markdown_content in markdown_helpers.py:
-        parse once with markdown-it, extract exact positions from tokens.
-
-        Args:
-            text: The markdown text to analyze.
-
-        Returns:
-            List of CodeBlockInfo objects with position and language metadata.
-        """
-        self._ensure_parse_cache(text)
-        if self._cache_code_blocks is not None:
-            return list(self._cache_code_blocks)
-
-        assert self._cache_tokens is not None
-        assert self._cache_lines is not None
-
-        tokens = self._cache_tokens
-        lines = self._cache_lines
-        code_blocks: list[CodeBlockInfo] = []
-
-        for token in self._flatten_tokens(tokens):
-            if token.type in ("fence", "code_block") and token.map:
-                start_line = token.map[0]
-                end_line = token.map[1]
-                start_pos = sum(len(line) + 1 for line in lines[:start_line])
-                end_pos = sum(len(line) + 1 for line in lines[:end_line])
-                language = token.info or "" if hasattr(token, "info") else ""
-                fence_text: str | None = None
-                if token.type == "fence":
-                    fence_text = lines[start_line] if 0 <= start_line < len(lines) else None
-
-                code_blocks.append(
-                    CodeBlockInfo(
-                        start_pos=start_pos,
-                        end_pos=end_pos,
-                        fence_line=start_line,
-                        language=language,
-                        fence_text=fence_text,
-                        token=token,
-                    )
-                )
-        self._cache_code_blocks = code_blocks
-        return list(code_blocks)
-
-    def _build_code_block_prefix(self, block: CodeBlockInfo) -> str | None:
-        """Construct the opening fence text for a code block if applicable."""
-        token = block.token
-
-        if token.type == "fence":
-            if block.fence_text:
-                fence_line = block.fence_text
-            else:
-                markup = getattr(token, "markup", "") or "```"
-                info = (getattr(token, "info", "") or "").strip()
-                fence_line = f"{markup}{info}" if info else markup
-            return fence_line if fence_line.endswith("\n") else fence_line + "\n"
-
-        if token.type == "code_block":
-            info = (getattr(token, "info", "") or "").strip()
-            if info:
-                return f"```{info}\n"
-            if block.language:
-                return f"```{block.language}\n"
-            return "```\n"
-
-        return None
-
-    def _get_table_info(self, text: str) -> list[TableInfo]:
-        """Extract table positions and metadata using markdown-it.
-
-        Uses same technique as _get_code_block_info: parse once with markdown-it,
-        extract exact positions from tokens.
-
-        Args:
-            text: The markdown text to analyze.
-
-        Returns:
-            List of TableInfo objects with position and header metadata.
-        """
-        self._ensure_parse_cache(text)
-        if self._cache_tables is not None:
-            return list(self._cache_tables)
-
-        assert self._cache_tokens is not None
-        assert self._cache_lines is not None
-
-        tokens = self._cache_tokens
-        lines = self._cache_lines
-        tables: list[TableInfo] = []
-
-        for i, token in enumerate(tokens):
-            if token.type == "table_open" and token.map:
-                # Find thead and tbody within this table
-                thead_start_line = None
-                thead_end_line = None
-                tbody_start_line = None
-                tbody_end_line = None
-
-                # Look ahead in tokens to find thead and tbody
-                for j in range(i + 1, len(tokens)):
-                    if tokens[j].type == "thead_open" and tokens[j].map:
-                        token_map = tokens[j].map
-                        assert token_map is not None  # Type narrowing
-                        thead_start_line = token_map[0]
-                        thead_end_line = token_map[1]
-                    elif tokens[j].type == "tbody_open" and tokens[j].map:
-                        token_map = tokens[j].map
-                        assert token_map is not None  # Type narrowing
-                        tbody_start_line = token_map[0]
-                        tbody_end_line = token_map[1]
-                    elif tokens[j].type == "table_close":
-                        # End of this table
-                        break
-
-                # Check if we have both thead and tbody
-                if (
-                    thead_start_line is not None
-                    and thead_end_line is not None
-                    and tbody_start_line is not None
-                    and tbody_end_line is not None
-                ):
-                    # Calculate character positions
-                    table_start_line = token.map[0]
-                    table_end_line = token.map[1]
-
-                    # markdown-it reports table_start_line as pointing to the HEADER ROW,
-                    # not the separator. So table_start_line should already be correct.
-                    # We just need to capture from table_start_line to tbody_start_line
-                    # to get both the header row and separator row.
-                    actual_table_start_line = table_start_line
-
-                    table_start_pos = sum(len(line) + 1 for line in lines[:actual_table_start_line])
-                    table_end_pos = sum(len(line) + 1 for line in lines[:table_end_line])
-                    thead_start_pos = sum(len(line) + 1 for line in lines[:thead_start_line])
-                    thead_end_pos = sum(len(line) + 1 for line in lines[:thead_end_line])
-                    tbody_start_pos = sum(len(line) + 1 for line in lines[:tbody_start_line])
-                    tbody_end_pos = sum(len(line) + 1 for line in lines[:tbody_end_line])
-
-                    # Extract header lines (header row + separator)
-                    # table_start_line points to the header row,
-                    # and tbody_start_line is where data rows start.
-                    # So lines[table_start_line:tbody_start_line] gives us both header and separator
-                    header_lines = lines[actual_table_start_line:tbody_start_line]
-
-                    tables.append(
-                        TableInfo(
-                            start_pos=table_start_pos,
-                            end_pos=table_end_pos,
-                            thead_start_pos=thead_start_pos,
-                            thead_end_pos=thead_end_pos,
-                            tbody_start_pos=tbody_start_pos,
-                            tbody_end_pos=tbody_end_pos,
-                            header_lines=header_lines,
-                        )
-                    )
-        self._cache_tables = tables
-        return list(tables)
+    def measure_rendered_height(
+        self, text: str, console: Console, code_theme: str = "monokai"
+    ) -> int:
+        """Estimate how many terminal rows the markdown will occupy."""
+        del code_theme
+        if not text:
+            return 0
+        width = console.size.width
+        if width <= 0:
+            return len(text.split("\n"))
+        return self._buffer.estimate_display_lines(text, width)
 
-    def _find_best_truncation_point(
+    def truncate_to_height(
         self,
         text: str,
-        safe_points: list[TruncationPoint],
-        target_height: int,
-        console: Console,
-        code_theme: str,
-        keep_beginning: bool = False,
-    ) -> TruncationPoint | None:
-        """Find the truncation point that gets closest to target height.
-
-        Args:
-            text: The full markdown text.
-            safe_points: List of potential truncation points.
-            target_height: Target height in terminal lines.
-            console: Rich Console for measuring.
-            code_theme: Code syntax highlighting theme.
-            keep_beginning: If True, test keeping text BEFORE point (table mode).
-                           If False, test keeping text AFTER point (streaming mode).
-
-        Returns:
-            The best TruncationPoint, or None if none work.
-        """
-        best_point = None
-        best_diff = float("inf")
-
-        for point in safe_points:
-            # Test truncating at this point
-            # Direction depends on truncation strategy
-            if keep_beginning:
-                # Table mode: keep beginning, remove end
-                truncated = text[: point.char_position]
-            else:
-                # Streaming mode: keep end, remove beginning
-                truncated = text[point.char_position :]
-
-            # Skip if truncation would result in empty or nearly empty text
-            if not truncated.strip():
-                continue
-
-            height = self._measure_rendered_height(truncated, console, code_theme)
-
-            # Calculate how far we are from target
-            diff = abs(height - target_height)
-
-            # We prefer points that keep us at or below target
-            if height <= target_height and diff < best_diff:
-                best_point = point
-                best_diff = diff
-
-        return best_point
-
-    def _truncate_by_characters(
-        self, text: str, target_height: int, console: Console, code_theme: str
+        *,
+        terminal_height: int,
+        console: Console | None,
     ) -> str:
-        """Fall back to character-based truncation using binary search.
-
-        This is used when no safe block boundary works (e.g., single block too large).
-
-        Args:
-            text: The markdown text to truncate.
-            target_height: Target height in terminal lines.
-            console: Rich Console for measuring.
-            code_theme: Code syntax highlighting theme.
-
-        Returns:
-            Truncated text that fits within target height.
-        """
+        """Truncate markdown to a specific display height."""
         if not text:
             return text
-
-        # Binary search on character position
-        left, right = 0, len(text) - 1
-        best_pos = None
-
-        while left <= right:
-            mid = (left + right) // 2
-            test_text = text[mid:]
-
-            if not test_text.strip():
-                # Skip empty results
-                right = mid - 1
-                continue
-
-            height = self._measure_rendered_height(test_text, console, code_theme)
-
-            if height <= target_height:
-                # Can keep more text - try removing less
-                best_pos = mid
-                right = mid - 1
-            else:
-                # Need to truncate more
-                left = mid + 1
-
-        # If nothing fits at all, return the last portion of text that's minimal
-        if best_pos is None:
-            # Return last few characters or lines that might fit
-            # Take approximately the last 20% of the text as a fallback
-            fallback_pos = int(len(text) * 0.8)
-            return text[fallback_pos:] if fallback_pos < len(text) else text
-
-        return text[best_pos:]
-
-    def measure_rendered_height(
-        self, text: str, console: Console, code_theme: str = "monokai"
-    ) -> int:
-        """Public helper that measures rendered height for markdown content."""
-        return self._measure_rendered_height(text, console, code_theme)
-
-    def _handle_code_block_truncation(
-        self, code_block: CodeBlockInfo, truncation_point: TruncationPoint, truncated_text: str
-    ) -> str:
-        """Handle truncation within a code block by preserving the opening fence.
-
-        When truncating within a code block, we need to ensure the opening fence
-        (```language) is preserved so the remaining content renders correctly.
-
-        This uses a simple position-based approach: if the truncation point is after
-        the fence's starting position, the fence has scrolled off and needs to be
-        prepended. Otherwise, it's still on screen.
-
-        Args:
-            code_block: The CodeBlockInfo for the block being truncated.
-            truncation_point: Where we're truncating.
-            truncated_text: The text after truncation.
-
-        Returns:
-            Modified truncated text with code fence preserved if needed.
-        """
-        # Simple check: did we remove the opening fence?
-        # If truncation happened after the fence line, it scrolled off
-        if truncation_point.char_position > code_block.start_pos:
-            # Check if fence is already at the beginning (avoid duplicates)
-            fence = self._build_code_block_prefix(code_block)
-            if fence and not truncated_text.startswith(fence):
-                # Fence scrolled off - prepend it
-                return fence + truncated_text
-
-        # Fence still on screen or already prepended - keep as-is
-        return truncated_text
-
-    def _ensure_code_fence_if_needed(self, original_text: str, truncated_text: str) -> str:
-        """Ensure code fence is prepended if truncation happened within a code block.
-
-        This is used after character-based truncation to check if we need to add
-        a code fence to the beginning of the truncated text.
-
-        Uses the same position-based approach as _handle_code_block_truncation.
-
-        Args:
-            original_text: The original full text before truncation.
-            truncated_text: The truncated text.
-
-        Returns:
-            Truncated text with code fence prepended if needed.
-        """
-        if not truncated_text or truncated_text == original_text:
-            return truncated_text
-
-        # Find where the truncated text starts in the original
-        truncation_pos = original_text.rfind(truncated_text)
-        if truncation_pos == -1:
-            truncation_pos = max(0, len(original_text) - len(truncated_text))
-
-        # Get code block info using markdown-it parser
-        code_blocks = self._get_code_block_info(original_text)
-
-        # Find which code block (if any) contains the truncation point
-        for block in code_blocks:
-            if block.start_pos < truncation_pos < block.end_pos:
-                # Truncated within this code block
-                # Simple check: did truncation remove the fence?
-                if truncation_pos > block.start_pos:
-                    fence = self._build_code_block_prefix(block)
-                    if fence and not truncated_text.startswith(fence):
-                        return fence + truncated_text
-                # Fence still on screen or already prepended
-                return truncated_text
-
-        return truncated_text
+        terminal_width = console.size.width if console else None
+        return self._buffer.truncate_text(
+            text,
+            terminal_height=terminal_height,
+            terminal_width=terminal_width,
+            add_closing_fence=False,
+            target_ratio=1.0,
+        )
 
     def _ensure_table_header_if_needed(self, original_text: str, truncated_text: str) -> str:
-        """Ensure table header is prepended if truncation happened within a table body.
-
-        When truncating within a table body, we need to preserve the header row(s)
-        so the remaining table rows have context and meaning.
-
-        Uses the same position-based approach as code block handling.
-
-        Args:
-            original_text: The original full text before truncation.
-            truncated_text: The truncated text.
-
-        Returns:
-            Truncated text with table header prepended if needed.
-        """
+        """Ensure table header is prepended if truncation removed it."""
         if not truncated_text or truncated_text == original_text:
             return truncated_text
 
-        # Find where the truncated text starts in the original
         truncation_pos = original_text.rfind(truncated_text)
         if truncation_pos == -1:
             truncation_pos = max(0, len(original_text) - len(truncated_text))
 
-        # Get table info using markdown-it parser
-        tables = self._get_table_info(original_text)
+        tables = self._buffer._find_tables(original_text)
+        if not tables:
+            return truncated_text
 
-        # Find which table (if any) contains the truncation point in tbody
+        lines = original_text.split("\n")
         for table in tables:
-            # Check if truncation happened within tbody (after thead)
-            if table.thead_end_pos <= truncation_pos < table.tbody_end_pos:
-                # Truncated within table body
-                # Simple check: did truncation remove the header?
-                # Use >= because if we truncate AT thead_end_pos, the header is already gone
-                if truncation_pos >= table.thead_end_pos:
-                    # Header completely scrolled off - prepend it
-                    header_text = "\n".join(table.header_lines) + "\n"
-                    truncated_lines = truncated_text.splitlines()
-                    header_lines = [line.rstrip() for line in table.header_lines]
-                    if len(truncated_lines) >= len(header_lines):
-                        candidate = [line.rstrip() for line in truncated_lines[: len(header_lines)]]
-                        if candidate == header_lines:
-                            return truncated_text
-                    return header_text + truncated_text
-                else:
-                    # Header still on screen
-                    return truncated_text
-
-        return truncated_text
-
-    def _is_primary_content_table(self, text: str) -> bool:
-        """Check if the document's primary content is a table.
-
-        This heuristic determines if we should use "show first page" truncation
-        (for tables) vs "show most recent" truncation (for streaming text).
-
-        Detection Logic:
-        ----------------
-        A document is considered "table-dominant" if MORE THAN 50% of its lines
-        are part of table structures.
-
-        Why 50%?
-        - Below 50%: Content is mostly text with some tables mixed in.
-                     Show most recent (standard streaming behavior).
-        - Above 50%: Content is primarily tabular data.
-                     Show beginning so users see the header defining the columns.
-
-        Examples:
-        ---------
-        TABLE-DOMINANT (>50%, will show first page):
-          | Name | Size |
-          |------|------|
-          | a    | 1    |
-          | b    | 2    |
-          | c    | 3    |
-          (5 lines, 5 table lines = 100% table)
-
-        NOT TABLE-DOMINANT (≤50%, will show most recent):
-          Here's a file listing:
-          | Name | Size |
-          |------|------|
-          | a    | 1    |
-          This shows the files in the directory.
-          (6 lines, 3 table lines = 50% table)
-
-        Args:
-            text: The full markdown text.
-
-        Returns:
-            True if document is primarily a table (table content > 50% of lines).
-        """
-        if not text.strip():
-            return False
-
-        tokens = self.parser.parse(text)
-        lines = text.split("\n")
-        total_lines = len(lines)
-
-        if total_lines == 0:
-            return False
-
-        # Count lines that are part of tables
-        table_lines = 0
-        for token in tokens:
-            if token.type == "table_open" and token.map:
-                start_line = token.map[0]
-                end_line = token.map[1]
-                table_lines += end_line - start_line
-
-        # If more than 50% of content is table, consider it table-dominant
-        return table_lines > (total_lines * 0.5)
-
-    def _measure_rendered_height(self, text: str, console: Console, code_theme: str) -> int:
-        """Measure how many terminal lines the rendered markdown takes.
-
-        Args:
-            text: The markdown text to measure.
-            console: Rich Console for rendering.
-            code_theme: Code syntax highlighting theme.
-
-        Returns:
-            Height in terminal lines.
-        """
-        if not text.strip():
-            return 0
-
-        md = Markdown(text, code_theme=code_theme)
-        options = console.options
-        lines = console.render_lines(md, options)
-        _, height = Segment.get_shape(lines)
-
-        return height
-
-    def _truncate_streaming(
-        self,
-        text: str,
-        terminal_height: int,
-        console: Console,
-        code_theme: str = "monokai",
-    ) -> str:
-        """Fast truncation optimized for streaming mode.
-
-        This method uses a line-based rolling window approach that avoids
-        redundant parsing and rendering. It's designed for the common case
-        where content is continuously growing and we want to show the most
-        recent portion.
-
-        Key optimizations:
-        1. Incremental: Only processes new content since last call
-        2. Line-based: Uses fast line counting instead of full renders
-        3. Single-pass: Only one render at the end to verify fit
-
-        Args:
-            text: The markdown text to truncate.
-            terminal_height: Height of the terminal in lines.
-            console: Rich Console for rendering.
-            code_theme: Code syntax highlighting theme.
-
-        Returns:
-            Truncated text showing the most recent content.
-        """
-        if not text:
-            return text
-
-        target_height = int(terminal_height * self.target_height_ratio)
-
-        # Check if we can use cached result
-        if (
-            self._last_full_text is not None
-            and text.startswith(self._last_truncated_text or "")
-            and self._last_terminal_height == terminal_height
-        ):
-            # Text only grew at the end, we can be more efficient
-            # But for simplicity in first version, just proceed with normal flow
-            pass
-
-        # Fast line-based estimation
-        # Strategy: Keep approximately 2x target lines as a generous buffer
-        # This avoids most cases where we need multiple render passes
-        lines = text.split('\n')
-        total_lines = len(lines)
-
-        # Rough heuristic: markdown usually expands by 1.5-2x due to formatting
-        # So to get target_height rendered lines, keep ~target_height raw lines
-        estimated_raw_lines = int(target_height * 1.2)  # Conservative estimate
-
-        if total_lines <= estimated_raw_lines:
-            # Likely fits, just verify with single render
-            height = self._measure_rendered_height(text, console, code_theme)
-            if height <= terminal_height:
-                self._update_cache(text, text, terminal_height)
-                return text
-            # Didn't fit, fall through to truncation
-
-        # Keep last N lines as initial guess
-        keep_lines = min(estimated_raw_lines, total_lines)
-        truncated_lines = lines[-keep_lines:]
-        truncated_text = '\n'.join(truncated_lines)
-
-        # Check for incomplete structures and fix them
-        truncated_text = self._fix_incomplete_structures(text, truncated_text)
-
-        # Verify it fits (single render)
-        height = self._measure_rendered_height(truncated_text, console, code_theme)
-
-        # If it doesn't fit, trim more aggressively
-        if height > terminal_height:
-            # Binary search on line count (much faster than character-based)
-            left, right = 0, keep_lines
-            best_lines = None
-
-            while left <= right:
-                mid = (left + right) // 2
-                test_lines = lines[-mid:] if mid > 0 else []
-                test_text = '\n'.join(test_lines)
-
-                if not test_text.strip():
-                    right = mid - 1
-                    continue
-
-                # Fix structures before measuring
-                test_text = self._fix_incomplete_structures(text, test_text)
-                test_height = self._measure_rendered_height(test_text, console, code_theme)
-
-                if test_height <= terminal_height:
-                    best_lines = mid
-                    left = mid + 1  # Try to keep more
-                else:
-                    right = mid - 1  # Need to keep less
-
-            if best_lines is not None and best_lines > 0:
-                truncated_lines = lines[-best_lines:]
-                truncated_text = '\n'.join(truncated_lines)
-                truncated_text = self._fix_incomplete_structures(text, truncated_text)
-            else:
-                # Extreme case: even one line is too much
-                # Keep last 20% of text as fallback
-                fallback_pos = int(len(text) * 0.8)
-                truncated_text = text[fallback_pos:]
-                truncated_text = self._fix_incomplete_structures(text, truncated_text)
-
-        self._update_cache(text, truncated_text, terminal_height)
-        return truncated_text
-
-    def _fix_incomplete_structures(self, original_text: str, truncated_text: str) -> str:
-        """Fix incomplete markdown structures after line-based truncation.
+            if not (table.start_pos < truncation_pos < table.end_pos):
+                continue
 
-        Handles:
-        - Code blocks missing opening fence
-        - Tables missing headers
+            table_start_line = original_text[: table.start_pos].count("\n")
+            data_start_line = table_start_line + len(table.header_lines)
+            data_start_pos = sum(len(line) + 1 for line in lines[:data_start_line])
 
-        Args:
-            original_text: The original full text.
-            truncated_text: The truncated text that may have incomplete structures.
+            if truncation_pos >= data_start_pos:
+                header_text = "\n".join(table.header_lines) + "\n"
+                if truncated_text.startswith(header_text):
+                    return truncated_text
+                truncated_lines = truncated_text.splitlines()
+                header_lines = [line.rstrip() for line in table.header_lines]
+                if len(truncated_lines) >= len(header_lines):
+                    candidate = [
+                        line.rstrip() for line in truncated_lines[: len(header_lines)]
+                    ]
+                    if candidate == header_lines:
+                        return truncated_text
+                return header_text + truncated_text
 
-        Returns:
-            Fixed truncated text with structures completed.
-        """
-        if not truncated_text or truncated_text == original_text:
             return truncated_text
 
-        original_fragment = truncated_text
-
-        # Find where the truncated text starts in the original
-        truncation_pos = original_text.rfind(original_fragment)
-        if truncation_pos == -1:
-            truncation_pos = max(0, len(original_text) - len(original_fragment))
-
-        code_blocks = self._get_code_block_info(original_text)
-        active_block = None
-        for block in code_blocks:
-            if block.start_pos <= truncation_pos < block.end_pos:
-                active_block = block
-
-        if active_block:
-            fence = self._build_code_block_prefix(active_block)
-            if fence and not truncated_text.startswith(fence):
-                truncated_text = fence + truncated_text
-
-        # Check for incomplete tables when not inside a code block
-        if active_block is None and '|' in truncated_text:
-            tables = self._get_table_info(original_text)
-            for table in tables:
-                if table.thead_end_pos <= truncation_pos < table.tbody_end_pos:
-                    # We're in the table body, header was removed
-                    header_text = "\n".join(table.header_lines) + "\n"
-                    if not truncated_text.startswith(header_text):
-                        truncated_text = header_text + truncated_text
-                    break
-
         return truncated_text
 
-    def _update_cache(self, full_text: str, truncated_text: str, terminal_height: int) -> None:
-        """Update the cache for streaming mode.
 
-        Args:
-            full_text: The full text that was truncated.
-            truncated_text: The resulting truncated text.
-            terminal_height: The terminal height used.
-        """
-        self._last_full_text = full_text
-        self._last_truncated_text = truncated_text
-        self._last_terminal_height = terminal_height
-
-    def _flatten_tokens(self, tokens: Iterable[Token]) -> Iterable[Token]:
-        """Flatten nested token structure.
-
-        Args:
-            tokens: Iterable of Token objects from markdown-it.
-
-        Yields:
-            Flattened tokens.
-        """
-        for token in tokens:
-            is_fence = token.type == "fence"
-            is_image = token.tag == "img"
-            if token.children and not (is_image or is_fence):
-                yield from self._flatten_tokens(token.children)
-            else:
-                yield token
+__all__ = ["MarkdownTruncator"]
diff --git a/src/fast_agent/ui/stream_segments.py b/src/fast_agent/ui/stream_segments.py
new file mode 100644
index 000000000..698c19665
--- /dev/null
+++ b/src/fast_agent/ui/stream_segments.py
@@ -0,0 +1,484 @@
+"""Segmented streaming buffer for assistant output and tool events."""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Literal
+
+from fast_agent.utils.reasoning_stream_parser import ReasoningSegment, ReasoningStreamParser
+
+if TYPE_CHECKING:
+    from fast_agent.llm.stream_types import StreamChunk
+
+SegmentKind = Literal["markdown", "plain", "reasoning", "tool"]
+
+
+@dataclass
+class StreamSegment:
+    """A contiguous chunk of streamed content with a single rendering mode."""
+
+    kind: SegmentKind
+    text: str
+    tool_name: str | None = None
+    tool_use_id: str | None = None
+
+    def append(self, text: str) -> None:
+        self.text += text
+
+    def copy_with_text(self, text: str) -> "StreamSegment":
+        return StreamSegment(
+            kind=self.kind,
+            text=text,
+            tool_name=self.tool_name,
+            tool_use_id=self.tool_use_id,
+        )
+
+
+class LiteralNewlineDecoder:
+    """Convert escaped newline sequences while preserving trailing backslashes."""
+
+    def __init__(self) -> None:
+        self._pending_backslashes = ""
+
+    def decode(self, chunk: str) -> str:
+        if not chunk:
+            return chunk
+
+        text = chunk
+        if self._pending_backslashes:
+            text = self._pending_backslashes + text
+            self._pending_backslashes = ""
+
+        result: list[str] = []
+        length = len(text)
+        index = 0
+
+        while index < length:
+            char = text[index]
+            if char == "\\":
+                start = index
+                while index < length and text[index] == "\\":
+                    index += 1
+                count = index - start
+
+                if index >= length:
+                    self._pending_backslashes = "\\" * count
+                    break
+
+                next_char = text[index]
+                if next_char == "n" and count % 2 == 1:
+                    if count > 1:
+                        result.append("\\" * (count - 1))
+                    result.append("\n")
+                    index += 1
+                else:
+                    result.append("\\" * count)
+            else:
+                result.append(char)
+                index += 1
+
+        return "".join(result)
+
+
+class StreamSegmentBuffer:
+    """Collect streaming content while keeping markdown/table boundaries intact."""
+
+    def __init__(self, base_kind: SegmentKind) -> None:
+        if base_kind not in ("markdown", "plain"):
+            raise ValueError("base_kind must be 'markdown' or 'plain'")
+        self._base_kind = base_kind
+        self._segments: list[StreamSegment] = []
+        self._pending_table_row = ""
+        self._reasoning_separator_pending = False
+        self._plain_decoder = LiteralNewlineDecoder()
+        self._reasoning_decoder = LiteralNewlineDecoder()
+
+    @property
+    def segments(self) -> list[StreamSegment]:
+        return self._segments
+
+    @property
+    def pending_table_row(self) -> str:
+        return self._pending_table_row
+
+    def mark_reasoning_boundary(self) -> None:
+        self._reasoning_separator_pending = True
+
+    def ensure_separator(self) -> None:
+        """Insert a newline before switching into a plain segment if needed."""
+        if self._pending_table_row:
+            return
+        if not self._segments:
+            return
+        if self._segments[-1].text.endswith("\n"):
+            return
+        self._append_to_segment(self._base_kind, "\n")
+
+    def append_content(self, text: str) -> bool:
+        if self._base_kind == "plain":
+            return self._append_plain(text, kind="plain", decoder=self._plain_decoder)
+        return self._append_markdown(text)
+
+    def append_reasoning(self, text: str) -> bool:
+        return self._append_plain(text, kind="reasoning", decoder=self._reasoning_decoder)
+
+    def append_segment(self, segment: StreamSegment) -> None:
+        self._segments.append(segment)
+
+    def consume_reasoning_gap(self) -> None:
+        gap = self._consume_reasoning_gap()
+        if gap:
+            self._append_to_segment("plain", gap)
+
+    def _append_plain(
+        self,
+        text: str,
+        *,
+        kind: SegmentKind,
+        decoder: LiteralNewlineDecoder,
+    ) -> bool:
+        if not text:
+            return False
+        processed = decoder.decode(text)
+        if not processed:
+            return False
+        if kind != "reasoning":
+            self.consume_reasoning_gap()
+        self._append_to_segment(kind, processed)
+        return True
+
+    def _append_markdown(self, text: str) -> bool:
+        if not text:
+            return False
+        self.consume_reasoning_gap()
+
+        if self._pending_table_row:
+            if "\n" not in text:
+                self._pending_table_row += text
+                return False
+            text = self._pending_table_row + text
+            self._pending_table_row = ""
+
+        last_segment = self._last_segment(kind="markdown")
+        text_so_far = last_segment.text if last_segment else ""
+        ends_with_newline = text_so_far.endswith("\n")
+        last_line = "" if ends_with_newline else (text_so_far.split("\n")[-1] if text_so_far else "")
+        currently_in_table = bool(last_segment) and last_line.strip().startswith("|")
+        starts_table_row = text.lstrip().startswith("|")
+
+        if "\n" not in text and (currently_in_table or starts_table_row):
+            pending_seed = ""
+            if currently_in_table and last_segment:
+                split_index = text_so_far.rfind("\n")
+                if split_index == -1:
+                    pending_seed = text_so_far
+                    last_segment.text = ""
+                else:
+                    pending_seed = text_so_far[split_index + 1 :]
+                    last_segment.text = text_so_far[: split_index + 1]
+                if last_segment.text == "":
+                    self._segments.pop()
+            self._pending_table_row = pending_seed + text
+            return False
+
+        if self._pending_table_row:
+            self._append_to_segment("markdown", self._pending_table_row)
+            self._pending_table_row = ""
+
+        self._append_to_segment("markdown", text)
+        return True
+
+    def _consume_reasoning_gap(self) -> str:
+        if not self._reasoning_separator_pending:
+            return ""
+        if self._pending_table_row:
+            self._reasoning_separator_pending = False
+            return ""
+        if not self._segments:
+            self._reasoning_separator_pending = False
+            return ""
+
+        last_text = self._segments[-1].text
+        if not last_text:
+            self._reasoning_separator_pending = False
+            return ""
+
+        last_line = last_text.split("\n")[-1]
+        if last_line.strip().startswith("|"):
+            self._reasoning_separator_pending = False
+            return ""
+
+        if last_text.endswith("\n\n"):
+            gap = ""
+        elif last_text.endswith("\n"):
+            gap = "\n"
+        else:
+            gap = "\n\n"
+
+        self._reasoning_separator_pending = False
+        return gap
+
+    def _append_to_segment(self, kind: SegmentKind, text: str) -> None:
+        if not text:
+            return
+        last_segment = self._last_segment(kind=kind)
+        if last_segment is not None:
+            last_segment.append(text)
+        else:
+            self._segments.append(StreamSegment(kind=kind, text=text))
+
+    def _last_segment(self, *, kind: SegmentKind) -> StreamSegment | None:
+        if not self._segments:
+            return None
+        last_segment = self._segments[-1]
+        if last_segment.kind != kind:
+            return None
+        return last_segment
+
+
+@dataclass
+class ToolStreamState:
+    tool_use_id: str
+    tool_name: str
+    segment_index: int
+    raw_text: str = ""
+    display_text: str = ""
+    completed: bool = False
+    decoder: LiteralNewlineDecoder = field(default_factory=LiteralNewlineDecoder)
+
+    def append(self, chunk: str) -> None:
+        if not chunk:
+            return
+        self.raw_text += chunk
+        self.display_text += self.decoder.decode(chunk)
+
+    def render_text(self, *, prefix: str, pretty: bool) -> str:
+        header = f"{prefix} Calling {self.tool_name}\n"
+        if not self.display_text and not pretty:
+            return header
+
+        args_text = self.display_text
+        if pretty and self.raw_text.strip():
+            formatted = _format_json(self.raw_text)
+            if formatted is not None:
+                args_text = formatted
+
+        if pretty and not args_text:
+            return header + "\n"
+        if args_text and pretty and not args_text.endswith("\n"):
+            args_text += "\n"
+        return header + (args_text or "")
+
+
+def _format_json(raw_text: str) -> str | None:
+    if not raw_text:
+        return None
+    try:
+        parsed = json.loads(raw_text)
+    except Exception:
+        return None
+    return json.dumps(parsed, indent=2, ensure_ascii=True)
+
+
+class StreamSegmentAssembler:
+    """Route streamed chunks into markdown/reasoning/tool segments."""
+
+    def __init__(self, *, base_kind: SegmentKind, tool_prefix: str) -> None:
+        self._buffer = StreamSegmentBuffer(base_kind)
+        self._reasoning_parser = ReasoningStreamParser()
+        self._reasoning_active = False
+        self._tool_prefix = tool_prefix
+        self._tool_states: dict[str, ToolStreamState] = {}
+        self._fallback_tool_counter = 0
+        self._last_tool_id: str | None = None
+
+    @property
+    def segments(self) -> list[StreamSegment]:
+        return self._buffer.segments
+
+    @property
+    def pending_table_row(self) -> str:
+        return self._buffer.pending_table_row
+
+    def handle_stream_chunk(self, chunk: StreamChunk) -> bool:
+        if not chunk.text:
+            return False
+
+        if not chunk.is_reasoning and self._process_reasoning_tags(chunk.text):
+            return True
+
+        if chunk.is_reasoning:
+            if not self._reasoning_active:
+                self._buffer.ensure_separator()
+                self._reasoning_active = True
+            return self._buffer.append_reasoning(chunk.text)
+
+        if self._reasoning_active:
+            self._reasoning_active = False
+            self._buffer.mark_reasoning_boundary()
+
+        return self._buffer.append_content(chunk.text)
+
+    def handle_text(self, chunk: str) -> bool:
+        if not chunk:
+            return False
+        if self._process_reasoning_tags(chunk):
+            return True
+        if self._reasoning_active:
+            self._reasoning_active = False
+            self._buffer.mark_reasoning_boundary()
+        return self._buffer.append_content(chunk)
+
+    def flush(self) -> bool:
+        if not self._reasoning_parser.in_think:
+            return False
+        segments = self._reasoning_parser.flush()
+        return self._handle_reasoning_segments(segments)
+
+    def handle_tool_event(self, event_type: str, info: dict[str, Any] | None) -> bool:
+        tool_name = str(info.get("tool_name") or "tool") if info else "tool"
+        tool_use_id = str(info.get("tool_use_id")) if info and info.get("tool_use_id") else ""
+
+        if not tool_use_id:
+            if event_type == "start":
+                tool_use_id = self._fallback_tool_id()
+            else:
+                tool_use_id = self._last_tool_id or self._fallback_tool_id()
+        self._last_tool_id = tool_use_id
+
+        state = self._tool_states.get(tool_use_id)
+        if state is not None and tool_name and state.tool_name != tool_name:
+            state.tool_name = tool_name
+
+        if event_type == "start":
+            if state is None:
+                state = self._start_tool(tool_use_id, tool_name)
+            state.completed = False
+            self._update_tool_segment(state, pretty=False)
+            return True
+
+        if event_type == "delta":
+            if state is None:
+                state = self._start_tool(tool_use_id, tool_name)
+            chunk = str(info.get("chunk") or "") if info else ""
+            state.append(chunk)
+            self._update_tool_segment(state, pretty=False)
+            return True
+
+        if event_type == "stop":
+            if state is None:
+                state = self._start_tool(tool_use_id, tool_name)
+            state.completed = True
+            self._update_tool_segment(state, pretty=True)
+            self._tool_states.pop(tool_use_id, None)
+            if self._last_tool_id == tool_use_id:
+                self._last_tool_id = None
+            return True
+
+        return False
+
+    def compact(self, window_segments: list[StreamSegment]) -> None:
+        if not window_segments or self._tool_states:
+            return
+        segments = self._buffer.segments
+        if not segments:
+            return
+        filtered = [(idx, segment) for idx, segment in enumerate(segments) if segment.text]
+        if not filtered:
+            return
+        last_window = window_segments[-1]
+        last_pos = next(
+            (pos for pos, (_, segment) in enumerate(filtered) if segment is last_window),
+            None,
+        )
+        if last_pos is None:
+            last_pos = len(filtered) - 1
+            last_index = filtered[last_pos][0]
+            last_segment = segments[last_index]
+            if (
+                last_segment.kind != last_window.kind
+                or last_segment.tool_use_id != last_window.tool_use_id
+                or not last_segment.text.endswith(last_window.text)
+            ):
+                return
+        start_pos = last_pos - (len(window_segments) - 1)
+        if start_pos <= 0:
+            return
+        if start_pos >= len(filtered):
+            return
+        start_index = filtered[start_pos][0]
+        if start_index <= 0:
+            return
+        first_window = window_segments[0]
+        original_first = segments[start_index]
+        if first_window is not original_first:
+            original_first.text = first_window.text
+        del segments[:start_index]
+
+    def _start_tool(self, tool_use_id: str, tool_name: str) -> ToolStreamState:
+        self._buffer.consume_reasoning_gap()
+        self._buffer.ensure_separator()
+        segment = StreamSegment(kind="tool", text="", tool_name=tool_name, tool_use_id=tool_use_id)
+        self._buffer.append_segment(segment)
+        state = ToolStreamState(
+            tool_use_id=tool_use_id,
+            tool_name=tool_name,
+            segment_index=len(self._buffer.segments) - 1,
+        )
+        self._tool_states[tool_use_id] = state
+        return state
+
+    def _update_tool_segment(self, state: ToolStreamState, *, pretty: bool) -> None:
+        segment = self._buffer.segments[state.segment_index]
+        segment.text = state.render_text(prefix=self._tool_prefix, pretty=pretty)
+
+    def _fallback_tool_id(self) -> str:
+        self._fallback_tool_counter += 1
+        return f"tool-{self._fallback_tool_counter}"
+
+    def _process_reasoning_tags(self, chunk: str) -> bool:
+        should_process = (
+            self._reasoning_parser.in_think or "<think>" in chunk or "</think>" in chunk
+        )
+        if not should_process:
+            return False
+        segments = self._reasoning_parser.feed(chunk)
+        return self._handle_reasoning_segments(segments)
+
+    def _handle_reasoning_segments(self, segments: list[ReasoningSegment]) -> bool:
+        if not segments:
+            return False
+        handled = False
+        emitted_non_reasoning = False
+
+        for segment in segments:
+            if segment.is_thinking:
+                if not self._reasoning_active:
+                    self._buffer.ensure_separator()
+                    self._reasoning_active = True
+                handled = self._buffer.append_reasoning(segment.text) or handled
+            else:
+                if self._reasoning_active:
+                    self._reasoning_active = False
+                    self._buffer.mark_reasoning_boundary()
+                emitted_non_reasoning = True
+                handled = self._buffer.append_content(segment.text) or handled
+
+        if (
+            self._reasoning_active
+            and not self._reasoning_parser.in_think
+            and not emitted_non_reasoning
+        ):
+            self._reasoning_active = False
+            self._buffer.mark_reasoning_boundary()
+
+        return handled
+
+
+__all__ = [
+    "SegmentKind",
+    "StreamSegment",
+    "StreamSegmentAssembler",
+    "StreamSegmentBuffer",
+]
diff --git a/src/fast_agent/ui/stream_viewport.py b/src/fast_agent/ui/stream_viewport.py
new file mode 100644
index 000000000..31937212a
--- /dev/null
+++ b/src/fast_agent/ui/stream_viewport.py
@@ -0,0 +1,121 @@
+"""Viewport calculations for streaming segment windows."""
+
+from __future__ import annotations
+
+import math
+from typing import TYPE_CHECKING, Iterable
+
+if TYPE_CHECKING:
+    from rich.console import Console
+
+    from fast_agent.ui.markdown_truncator import MarkdownTruncator
+    from fast_agent.ui.plain_text_truncator import PlainTextTruncator
+    from fast_agent.ui.stream_segments import StreamSegment
+
+
+def estimate_plain_text_height(text: str, width: int) -> int:
+    """Estimate how many terminal rows the plain text will occupy."""
+    if not text:
+        return 0
+    width = max(1, width)
+    total = 0
+    for line in text.split("\n"):
+        expanded_len = len(line.expandtabs())
+        total += max(1, math.ceil(expanded_len / width)) if expanded_len else 1
+    return total
+
+
+class StreamViewport:
+    """Select a tail window of segments that fit within the viewport budget."""
+
+    def __init__(
+        self,
+        *,
+        markdown_truncator: MarkdownTruncator,
+        plain_truncator: PlainTextTruncator,
+    ) -> None:
+        self._markdown_truncator = markdown_truncator
+        self._plain_truncator = plain_truncator
+
+    def slice_segments(
+        self,
+        segments: Iterable[StreamSegment],
+        *,
+        terminal_height: int,
+        console: Console,
+        target_ratio: float,
+    ) -> list[StreamSegment]:
+        if terminal_height <= 0:
+            return list(segments)
+
+        width = max(1, console.size.width)
+        segments_list = [segment for segment in segments if segment.text]
+        if not segments_list:
+            return []
+
+        max_lines = max(1, int(terminal_height * target_ratio))
+
+        heights = [
+            self._segment_height(segment, console=console, width=width)
+            for segment in segments_list
+        ]
+        total_height = sum(heights)
+        if total_height <= max_lines:
+            return segments_list
+
+        remaining = max_lines
+        window: list[StreamSegment] = []
+        for segment, height in zip(reversed(segments_list), reversed(heights)):
+            if remaining <= 0:
+                break
+            if height <= remaining:
+                window.append(segment)
+                remaining -= height
+                continue
+
+            trimmed = self._truncate_segment(
+                segment,
+                terminal_height=remaining,
+                terminal_width=width,
+                console=console,
+            )
+            if trimmed.text:
+                window.append(trimmed)
+            break
+
+        window.reverse()
+        return window
+
+    def _segment_height(self, segment: StreamSegment, *, console: Console, width: int) -> int:
+        if segment.kind in ("markdown", "reasoning"):
+            return self._markdown_truncator.measure_rendered_height(
+                segment.text, console, code_theme="monokai"
+            )
+        return estimate_plain_text_height(segment.text, width)
+
+    def _truncate_segment(
+        self,
+        segment: StreamSegment,
+        *,
+        terminal_height: int,
+        terminal_width: int,
+        console: Console,
+    ) -> StreamSegment:
+        if terminal_height <= 0 or not segment.text:
+            return segment.copy_with_text("")
+        if segment.kind in ("markdown", "reasoning"):
+            truncated = self._markdown_truncator.truncate_to_height(
+                segment.text,
+                terminal_height=terminal_height,
+                console=console,
+            )
+        else:
+            truncated = self._plain_truncator.truncate(
+                segment.text,
+                terminal_height=terminal_height,
+                terminal_width=terminal_width,
+            )
+        return segment.copy_with_text(truncated)
+
+
+__all__ = ["StreamViewport", "estimate_plain_text_height"]
diff --git a/src/fast_agent/ui/streaming.py b/src/fast_agent/ui/streaming.py
index c30e4833e..0c90913b0 100644
--- a/src/fast_agent/ui/streaming.py
+++ b/src/fast_agent/ui/streaming.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
 import asyncio
-import math
 import time
+from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, Protocol
 
 from rich.console import Group
@@ -16,21 +16,30 @@
 from fast_agent.ui.markdown_helpers import prepare_markdown_content
 from fast_agent.ui.markdown_truncator import MarkdownTruncator
 from fast_agent.ui.plain_text_truncator import PlainTextTruncator
-from fast_agent.utils.reasoning_stream_parser import ReasoningSegment, ReasoningStreamParser
+from fast_agent.ui.stream_segments import StreamSegmentAssembler
+from fast_agent.ui.stream_viewport import StreamViewport, estimate_plain_text_height
 
 if TYPE_CHECKING:
+    from rich.console import RenderableType
+
     from fast_agent.mcp.prompt_message_extended import PromptMessageExtended
     from fast_agent.ui.console_display import ConsoleDisplay
 
 
 logger = get_logger(__name__)
 
-MARKDOWN_STREAM_TARGET_RATIO = 0.75
+MARKDOWN_STREAM_TARGET_RATIO = 0.85
 MARKDOWN_STREAM_REFRESH_PER_SECOND = 4
-MARKDOWN_STREAM_HEIGHT_FUDGE = 1
-PLAIN_STREAM_TARGET_RATIO = 0.9
+MARKDOWN_STREAM_HEIGHT_FUDGE = 2
+PLAIN_STREAM_TARGET_RATIO = 0.92
 PLAIN_STREAM_REFRESH_PER_SECOND = 20
-PLAIN_STREAM_HEIGHT_FUDGE = 1
+PLAIN_STREAM_HEIGHT_FUDGE = 2
+
+
+@dataclass(frozen=True)
+class _ToolStreamEvent:
+    event_type: str
+    info: dict[str, Any] | None
 
 
 class NullStreamingHandle:
@@ -72,16 +81,29 @@ def __init__(
         self._highlight_index = highlight_index
         self._max_item_length = max_item_length
         self._use_plain_text = use_plain_text
-        self._preferred_plain_text = use_plain_text
-        self._plain_text_override_count = 0
         self._header_left = header_left
         self._header_right = header_right
         self._progress_display = progress_display
         self._progress_paused = False
-        self._buffer: list[str] = []
         self._plain_text_style: str | None = None
-        self._convert_literal_newlines = False
-        self._pending_literal_backslashes = ""
+        base_kind = "plain" if use_plain_text else "markdown"
+        self._render_reasoning_markdown = not use_plain_text
+        self._segment_assembler = StreamSegmentAssembler(
+            base_kind=base_kind,
+            tool_prefix="→",
+        )
+        self._markdown_truncator = MarkdownTruncator(target_height_ratio=1.0)
+        self._plain_truncator = PlainTextTruncator(target_height_ratio=1.0)
+        self._viewport = StreamViewport(
+            markdown_truncator=self._markdown_truncator,
+            plain_truncator=self._plain_truncator,
+        )
+        self._stream_target_ratio = (
+            PLAIN_STREAM_TARGET_RATIO if use_plain_text else MARKDOWN_STREAM_TARGET_RATIO
+        )
+        self._height_fudge = (
+            PLAIN_STREAM_HEIGHT_FUDGE if use_plain_text else MARKDOWN_STREAM_HEIGHT_FUDGE
+        )
         initial_renderable = (
             Text("", style=self._plain_text_style or "") if self._use_plain_text else Markdown("")
         )
@@ -110,21 +132,7 @@ def __init__(
         self._live_started = False
         self._active = True
         self._finalized = False
-        self._in_table = False
-        self._pending_table_row = ""
-        self._truncator = MarkdownTruncator(target_height_ratio=MARKDOWN_STREAM_TARGET_RATIO)
-        self._plain_truncator = (
-            PlainTextTruncator(target_height_ratio=PLAIN_STREAM_TARGET_RATIO)
-            if self._use_plain_text
-            else None
-        )
         self._max_render_height = 0
-        self._reasoning_parser = ReasoningStreamParser()
-        self._styled_buffer: list[tuple[str, bool]] = []
-        self._has_reasoning = False
-        self._reasoning_active = False
-        self._tool_active = False
-        self._render_reasoning_stream = True
 
         if self._async_mode and self._loop and self._queue is not None:
             self._worker_task = self._loop.create_task(self._render_worker())
@@ -214,170 +222,14 @@ def _close_incomplete_code_blocks(self, text: str) -> str:
 
         return text
 
-    def _trim_to_displayable(self, text: str) -> str:
-        if not text:
-            return text
-
-        terminal_height = console.console.size.height - 1
-
-        if self._use_plain_text and self._plain_truncator:
-            terminal_width = console.console.size.width
-            return self._plain_truncator.truncate(
-                text,
-                terminal_height=terminal_height,
-                terminal_width=terminal_width,
-            )
-
-        return self._truncator.truncate(
-            text,
-            terminal_height=terminal_height,
-            console=console.console,
-            code_theme=self._display.code_style,
-            prefer_recent=True,
-        )
-
-    def _switch_to_plain_text(self, style: str | None = "dim") -> None:
-        if not self._use_plain_text:
-            self._use_plain_text = True
-        if not self._plain_truncator:
-            self._plain_truncator = PlainTextTruncator(
-                target_height_ratio=PLAIN_STREAM_TARGET_RATIO
-            )
-        self._plain_text_style = style
-        self._convert_literal_newlines = True
-
-    def _switch_to_markdown(self) -> None:
-        self._use_plain_text = False
-        self._plain_text_style = None
-        self._convert_literal_newlines = False
-        self._pending_literal_backslashes = ""
-
-    def _insert_mode_switch_newline(self) -> None:
-        if self._pending_table_row:
-            return
-        if not self._buffer:
-            return
-        if self._buffer[-1].endswith("\n"):
-            return
-        self._buffer.append("\n")
-        if self._has_reasoning:
-            self._styled_buffer.append(("\n", False))
-
-    def _set_use_plain_text(self, use_plain_text: bool, *, insert_newline: bool) -> None:
-        if use_plain_text == self._use_plain_text:
-            return
-        if insert_newline:
-            self._insert_mode_switch_newline()
-        if use_plain_text:
-            self._switch_to_plain_text(style=None)
-        else:
-            self._switch_to_markdown()
-
-    def _begin_plain_text_override(self) -> None:
-        self._plain_text_override_count += 1
-        if self._plain_text_override_count == 1:
-            self._set_use_plain_text(True, insert_newline=True)
-
-    def _end_plain_text_override(self) -> None:
-        if self._plain_text_override_count == 0:
-            return
-        self._plain_text_override_count -= 1
-        if self._plain_text_override_count == 0:
-            self._set_use_plain_text(self._preferred_plain_text, insert_newline=True)
-
-    def _begin_reasoning_mode(self) -> None:
-        if self._reasoning_active:
-            return
-        self._reasoning_active = True
-        if self._buffer and not self._styled_buffer:
-            self._styled_buffer.append(("".join(self._buffer), False))
-        self._has_reasoning = True
-        self._begin_plain_text_override()
-
-    def _end_reasoning_mode(self) -> None:
-        if not self._reasoning_active:
-            return
-        self._reasoning_active = False
-        self._end_plain_text_override()
-
-    def _begin_tool_mode(self) -> None:
-        if self._tool_active:
-            return
-        self._tool_active = True
-        self._begin_plain_text_override()
-
-    def _end_tool_mode(self) -> None:
-        if not self._tool_active:
-            return
-        self._tool_active = False
-        self._end_plain_text_override()
-
-    def _append_plain_text(self, text: str, *, is_reasoning: bool | None = None) -> bool:
-        processed = text
-        if self._convert_literal_newlines:
-            processed = self._decode_literal_newlines(processed)
-            if not processed:
-                return False
-        processed = self._wrap_plain_chunk(processed)
-        if self._pending_table_row:
-            self._buffer.append(self._pending_table_row)
-            if self._has_reasoning:
-                self._styled_buffer.append((self._pending_table_row, False))
-            self._pending_table_row = ""
-        self._buffer.append(processed)
-        if self._has_reasoning:
-            self._styled_buffer.append((processed, bool(is_reasoning)))
-        return True
-
-    def _append_text_in_current_mode(self, text: str) -> bool:
-        if not text:
-            return False
-        if self._use_plain_text:
-            return self._append_plain_text(text)
-
-        text_so_far = "".join(self._buffer)
-        ends_with_newline = text_so_far.endswith("\n")
-        lines = text_so_far.split("\n") if text_so_far else []
-        last_line = "" if ends_with_newline else (lines[-1] if lines else "")
-        currently_in_table = last_line.strip().startswith("|")
-        if self._pending_table_row:
-            if "\n" not in text:
-                self._pending_table_row += text
-                return False
-            text = self._pending_table_row + text
-            self._pending_table_row = ""
-
-        starts_table_row = text.lstrip().startswith("|")
-        if "\n" not in text and (currently_in_table or starts_table_row):
-            pending_seed = ""
-            if currently_in_table:
-                split_index = text_so_far.rfind("\n")
-                if split_index == -1:
-                    pending_seed = text_so_far
-                    self._buffer = []
-                else:
-                    pending_seed = text_so_far[split_index + 1 :]
-                    prefix = text_so_far[: split_index + 1]
-                    self._buffer = [prefix] if prefix else []
-            self._pending_table_row = pending_seed + text
-            return False
-
-        if self._pending_table_row:
-            self._buffer.append(self._pending_table_row)
-            self._pending_table_row = ""
-
-        self._buffer.append(text)
-        if self._has_reasoning:
-            self._styled_buffer.append((text, False))
-        return True
-
     def finalize(self, _message: "PromptMessageExtended | str") -> None:
         if not self._active or self._finalized:
             return
 
         # Flush any buffered reasoning content before closing the live view
-        self._process_reasoning_chunk("")
-        if self._buffer:
+        if self._segment_assembler.flush():
+            self._render_current_buffer()
+        elif self._segment_assembler.segments:
             self._render_current_buffer()
 
         self._finalized = True
@@ -417,120 +269,6 @@ def close(self) -> None:
         self._shutdown_live_resources()
         self._max_render_height = 0
 
-    def _extract_trailing_paragraph(self, text: str) -> str:
-        if not text:
-            return ""
-        double_break = text.rfind("\n\n")
-        if double_break != -1:
-            candidate = text[double_break + 2 :]
-        else:
-            candidate = text
-        if "\n" in candidate:
-            candidate = candidate.split("\n")[-1]
-        return candidate
-
-    def _wrap_plain_chunk(self, chunk: str) -> str:
-        width = max(1, console.console.size.width)
-        if not chunk or width <= 1:
-            return chunk
-
-        result_segments: list[str] = []
-        start = 0
-        length = len(chunk)
-
-        while start < length:
-            newline_pos = chunk.find("\n", start)
-            if newline_pos == -1:
-                line = chunk[start:]
-                delimiter = ""
-                start = length
-            else:
-                line = chunk[start:newline_pos]
-                delimiter = "\n"
-                start = newline_pos + 1
-
-            if len(line.expandtabs()) > width:
-                wrapped = self._wrap_plain_line(line, width)
-                result_segments.append("\n".join(wrapped))
-            else:
-                result_segments.append(line)
-
-            result_segments.append(delimiter)
-
-        return "".join(result_segments)
-
-    @staticmethod
-    def _wrap_plain_line(line: str, width: int) -> list[str]:
-        if not line:
-            return [""]
-
-        segments: list[str] = []
-        remaining = line
-
-        while len(remaining) > width:
-            break_at = remaining.rfind(" ", 0, width)
-            if break_at == -1 or break_at < width // 2:
-                break_at = width
-                segments.append(remaining[:break_at])
-                remaining = remaining[break_at:]
-            else:
-                segments.append(remaining[:break_at])
-                remaining = remaining[break_at + 1 :]
-        segments.append(remaining)
-        return segments
-
-    def _decode_literal_newlines(self, chunk: str) -> str:
-        if not chunk:
-            return chunk
-
-        text = chunk
-        if self._pending_literal_backslashes:
-            text = self._pending_literal_backslashes + text
-            self._pending_literal_backslashes = ""
-
-        result: list[str] = []
-        length = len(text)
-        index = 0
-
-        while index < length:
-            char = text[index]
-            if char == "\\":
-                start = index
-                while index < length and text[index] == "\\":
-                    index += 1
-                count = index - start
-
-                if index >= length:
-                    self._pending_literal_backslashes = "\\" * count
-                    break
-
-                next_char = text[index]
-                if next_char == "n" and count % 2 == 1:
-                    if count > 1:
-                        result.append("\\" * (count - 1))
-                    result.append("\n")
-                    index += 1
-                else:
-                    result.append("\\" * count)
-                    continue
-            else:
-                result.append(char)
-                index += 1
-
-        return "".join(result)
-
-    def _estimate_plain_render_height(self, text: str) -> int:
-        if not text:
-            return 0
-
-        width = max(1, console.console.size.width)
-        lines = text.split("\n")
-        total = 0
-        for line in lines:
-            expanded_len = len(line.expandtabs())
-            total += max(1, math.ceil(expanded_len / width)) if expanded_len else 1
-        return total
-
     def _enqueue_chunk(self, chunk: object) -> None:
         if not self._queue or not self._loop:
             return
@@ -560,133 +298,16 @@ def _enqueue_chunk(self, chunk: object) -> None:
                     data={"error": str(exc), "chunk_repr": repr(chunk)},
                 )
 
-    def _process_reasoning_chunk(self, chunk: str) -> bool:
-        """
-        Detect and style reasoning-tagged content (<think>...</think>) when present.
-
-        Returns True if the chunk was handled by reasoning-aware processing.
-        """
-        should_process = (
-            self._reasoning_parser.in_think or "<think>" in chunk or "</think>" in chunk
-        )
-        if not should_process and not self._has_reasoning:
-            return False
-        previous_in_think = self._reasoning_parser.in_think
-        segments: list[ReasoningSegment] = []
-        if chunk:
-            segments = self._reasoning_parser.feed(chunk)
-        elif self._reasoning_parser.in_think:
-            segments = self._reasoning_parser.flush()
-
-        if not segments:
-            return False
-        handled = False
-        emitted_non_thinking = False
-
-        for segment in segments:
-            if segment.is_thinking:
-                if self._render_reasoning_stream:
-                    self._begin_reasoning_mode()
-                    self._append_plain_text(segment.text, is_reasoning=True)
-                handled = True
-            else:
-                if self._reasoning_active:
-                    self._end_reasoning_mode()
-                if self._render_reasoning_stream and self._has_reasoning:
-                    self._drop_reasoning_stream()
-                emitted_non_thinking = True
-                self._append_text_in_current_mode(segment.text)
-                handled = True
-
-        if (
-            previous_in_think
-            and not self._reasoning_parser.in_think
-            and self._reasoning_active
-            and not emitted_non_thinking
-        ):
-            self._end_reasoning_mode()
-
-        return handled
-
     def _handle_stream_chunk(self, chunk: StreamChunk) -> bool:
         """Process a typed stream chunk with explicit reasoning flag."""
-        if not chunk.text:
-            return False
-        if not chunk.is_reasoning and self._process_reasoning_chunk(chunk.text):
-            return True
-
-        if chunk.is_reasoning:
-            if self._render_reasoning_stream:
-                self._begin_reasoning_mode()
-                return self._append_plain_text(chunk.text, is_reasoning=True)
-            return False
-
-        if self._render_reasoning_stream and self._has_reasoning:
-            self._drop_reasoning_stream()
-        if self._reasoning_active:
-            self._end_reasoning_mode()
-
-        return self._append_text_in_current_mode(chunk.text)
-
-    def _drop_reasoning_stream(self) -> None:
-        if not self._has_reasoning:
-            return
-        if self._styled_buffer:
-            kept = [text for text, is_reasoning in self._styled_buffer if not is_reasoning]
-            rebuilt = "".join(kept)
-            self._buffer = [rebuilt] if rebuilt else []
-        self._styled_buffer.clear()
-        self._render_reasoning_stream = False
-        self._has_reasoning = False
-        self._reasoning_active = False
+        return self._segment_assembler.handle_stream_chunk(chunk)
 
     def _handle_chunk(self, chunk: str) -> bool:
-        if not chunk:
-            return False
-
-        if self._process_reasoning_chunk(chunk):
-            return True
-        return self._append_text_in_current_mode(chunk)
-
-    def _slice_styled_segments(self, target_text: str) -> list[tuple[str, bool]]:
-        """Trim styled buffer to the tail matching the provided text length."""
-        if not self._styled_buffer:
-            return []
-
-        remaining = len(target_text)
-        selected: list[tuple[str, bool]] = []
-
-        for text, is_thinking in reversed(self._styled_buffer):
-            if remaining <= 0:
-                break
-            if len(text) <= remaining:
-                selected.append((text, is_thinking))
-                remaining -= len(text)
-            else:
-                selected.append((text[-remaining:], is_thinking))
-                remaining = 0
-
-        selected.reverse()
-        return selected
-
-    def _build_styled_text(self, text: str) -> Text:
-        """Build a Rich Text object with dim/italic styling for reasoning segments."""
-        if not self._has_reasoning or not self._styled_buffer:
-            return (
-                Text(text, style=self._plain_text_style) if self._plain_text_style else Text(text)
-            )
-
-        segments = self._slice_styled_segments(text)
-        self._styled_buffer = segments
-
-        styled_text = Text()
-        for segment_text, is_thinking in segments:
-            style = "dim italic" if is_thinking else self._plain_text_style
-            styled_text.append(segment_text, style=style)
-        return styled_text
+        return self._segment_assembler.handle_text(chunk)
 
     def _render_current_buffer(self) -> None:
-        if not self._buffer:
+        segments = self._segment_assembler.segments
+        if not segments:
             return
 
         self._ensure_started()
@@ -694,71 +315,74 @@ def _render_current_buffer(self) -> None:
         if not self._live:
             return
 
-        # Consolidate buffer if it gets fragmented (>10 items)
-        # Then check if we need to truncate to keep only recent content
-        if len(self._buffer) > 10:
-            text = "".join(self._buffer)
-            trimmed = self._trim_to_displayable(text)
-            # Only update buffer if truncation actually reduced content
-            # This keeps buffer size manageable for continuous scrolling
-            if len(trimmed) < len(text):
-                self._buffer = [trimmed]
-                if self._has_reasoning:
-                    self._styled_buffer = self._slice_styled_segments(trimmed)
-            else:
-                self._buffer = [text]
-
-        text = "".join(self._buffer)
-
-        # Check if trailing paragraph is too long and needs trimming
-        trailing_paragraph = self._extract_trailing_paragraph(text)
-        if trailing_paragraph and "\n" not in trailing_paragraph:
-            width = max(1, console.console.size.width)
-            target_ratio = (
-                PLAIN_STREAM_TARGET_RATIO if self._use_plain_text else MARKDOWN_STREAM_TARGET_RATIO
-            )
-            target_rows = max(1, int(console.console.size.height * target_ratio) - 1)
-            estimated_rows = math.ceil(len(trailing_paragraph.expandtabs()) / width)
-            if estimated_rows > target_rows:
-                trimmed = self._trim_to_displayable(text)
-                if len(trimmed) < len(text):
-                    text = trimmed
-                    self._buffer = [trimmed]
-                    if self._has_reasoning:
-                        self._styled_buffer = self._slice_styled_segments(trimmed)
-
         header = self._build_header()
         max_allowed_height = max(1, console.console.size.height - 2)
-        self._max_render_height = min(self._max_render_height, max_allowed_height)
-
-        if self._use_plain_text:
-            content_height = self._estimate_plain_render_height(text)
-            budget_height = min(content_height + PLAIN_STREAM_HEIGHT_FUDGE, max_allowed_height)
+        window_segments = self._viewport.slice_segments(
+            segments,
+            terminal_height=max_allowed_height,
+            console=console.console,
+            target_ratio=self._stream_target_ratio,
+        )
+        if not window_segments:
+            return
+        self._segment_assembler.compact(window_segments)
 
-            if budget_height > self._max_render_height:
-                self._max_render_height = budget_height
+        renderables: list[RenderableType] = []
+        content_height = 0
+        width = console.console.size.width
 
-            padding_lines = max(0, self._max_render_height - content_height)
-            content = self._build_styled_text(text)
-            if padding_lines:
-                content.append("\n" * padding_lines)
-        else:
-            prepared = prepare_markdown_content(text, self._display._escape_xml)
-            prepared_for_display = self._close_incomplete_code_blocks(prepared)
+        for segment in window_segments:
+            if segment.kind == "markdown":
+                prepared = prepare_markdown_content(segment.text, self._display._escape_xml)
+                prepared_for_display = self._close_incomplete_code_blocks(prepared)
+                if prepared_for_display:
+                    renderables.append(
+                        Markdown(prepared_for_display, code_theme=self._display.code_style)
+                    )
+                    content_height += self._markdown_truncator.measure_rendered_height(
+                        prepared_for_display,
+                        console.console,
+                        self._display.code_style,
+                    )
+                else:
+                    renderables.append(Text(""))
+            elif segment.kind == "reasoning":
+                if self._render_reasoning_markdown:
+                    prepared = prepare_markdown_content(segment.text, self._display._escape_xml)
+                    prepared_for_display = self._close_incomplete_code_blocks(prepared)
+                    markdown = Markdown(
+                        prepared_for_display,
+                        code_theme=self._display.code_style,
+                        style="dim italic",
+                    )
+                    renderables.append(markdown)
+                    content_height += self._markdown_truncator.measure_rendered_height(
+                        prepared_for_display,
+                        console.console,
+                        self._display.code_style,
+                    )
+                else:
+                    renderables.append(Text(segment.text, style="dim italic"))
+                    content_height += estimate_plain_text_height(segment.text, width)
+            else:
+                renderables.append(Text(segment.text))
+                content_height += estimate_plain_text_height(segment.text, width)
 
-            content_height = self._truncator.measure_rendered_height(
-                prepared_for_display, console.console, self._display.code_style
-            )
-            budget_height = min(content_height + MARKDOWN_STREAM_HEIGHT_FUDGE, max_allowed_height)
+        self._max_render_height = min(self._max_render_height, max_allowed_height)
+        budget_height = min(content_height + self._height_fudge, max_allowed_height)
 
-            if budget_height > self._max_render_height:
-                self._max_render_height = budget_height
+        if budget_height > self._max_render_height:
+            self._max_render_height = budget_height
 
-            padding_lines = max(0, self._max_render_height - content_height)
-            if padding_lines:
-                prepared_for_display = prepared_for_display + ("\n" * padding_lines)
+        padding_lines = max(0, self._max_render_height - content_height)
+        if padding_lines:
+            renderables.append(Text("\n" * padding_lines))
 
-            content = Markdown(prepared_for_display, code_theme=self._display.code_style)
+        content = (
+            Group(*renderables)
+            if len(renderables) > 1
+            else (renderables[0] if renderables else Text(""))
+        )
 
         header_with_spacing = header.copy()
         header_with_spacing.append("\n", style="default")
@@ -804,6 +428,11 @@ async def _render_worker(self) -> None:
                         should_render = self._handle_stream_chunk(chunk) or should_render
                     elif isinstance(chunk, str):
                         should_render = self._handle_chunk(chunk) or should_render
+                    elif isinstance(chunk, _ToolStreamEvent):
+                        should_render = (
+                            self._segment_assembler.handle_tool_event(chunk.event_type, chunk.info)
+                            or should_render
+                        )
 
                 if should_render:
                     self._render_current_buffer()
@@ -837,16 +466,13 @@ def handle_tool_event(self, event_type: str, info: dict[str, Any] | None = None)
             if not self._active:
                 return
 
-            tool_name = info.get("tool_name", "unknown") if info else "unknown"
-
-            if event_type == "start":
-                self._begin_tool_mode()
-                self.update(f"→ Calling {tool_name}\n")
+            event = _ToolStreamEvent(event_type=event_type, info=info)
+            if self._async_mode and self._queue is not None:
+                self._enqueue_chunk(event)
                 return
-            if event_type == "delta":
-                self.update(info.get("chunk", "") if info else "")
-            elif event_type == "stop":
-                self._end_tool_mode()
+
+            if self._segment_assembler.handle_tool_event(event_type, info):
+                self._render_current_buffer()
         except Exception as exc:
             logger.warning(
                 "Error handling tool event",
diff --git a/src/fast_agent/ui/streaming_buffer.py b/src/fast_agent/ui/streaming_buffer.py
index b417e3b83..01528c1c1 100644
--- a/src/fast_agent/ui/streaming_buffer.py
+++ b/src/fast_agent/ui/streaming_buffer.py
@@ -1,4 +1,4 @@
-"""Streaming buffer for markdown content with intelligent truncation.
+"""Streaming buffer for markdown content with lightweight truncation.
 
 This module provides a simple, robust streaming buffer that:
 1. Accumulates streaming chunks from LLM responses
@@ -6,15 +6,11 @@
 3. Preserves markdown context when truncating:
    - Code blocks: retains opening ```language fence
    - Tables: retains header + separator rows
-   - Code blocks: adds closing ``` if unclosed
-
-Design Philosophy
-=================
-KISS (Keep It Simple, Stupid):
-- No binary search (streaming is linear)
-- No dual modes (streaming always keeps recent content)
-- Parse once per truncation (not per chunk)
-- Position-based tracking (clear, testable)
+4. Optionally adds a closing ``` fence for unclosed code blocks
+
+Design philosophy:
+- Keep the logic linear and easy to reason about.
+- Avoid expensive render passes; use width-based line estimation.
 """
 
 from dataclasses import dataclass
@@ -54,9 +50,12 @@ class StreamBuffer:
             render(display_text)
     """
 
-    def __init__(self):
+    def __init__(self, target_height_ratio: float = 0.7):
         """Initialize the stream buffer."""
+        if not 0 < target_height_ratio <= 1:
+            raise ValueError("target_height_ratio must be between 0 and 1")
         self._chunks: list[str] = []
+        self._target_height_ratio = target_height_ratio
         self._parser = MarkdownIt().enable("table")
 
     def append(self, chunk: str) -> None:
@@ -79,21 +78,18 @@ def get_full_text(self) -> str:
     def get_display_text(
         self,
         terminal_height: int,
-        target_ratio: float = 0.7,
+        target_ratio: float | None = None,
         terminal_width: int | None = None,
+        *,
+        add_closing_fence: bool = False,
     ) -> str:
         """Get text for display, truncated to fit terminal.
 
-        This applies intelligent truncation when content exceeds terminal height:
-        1. Keeps most recent content (last N lines)
-        2. Preserves code block fences if truncated mid-block
-        3. Preserves table headers if truncated in table data
-        4. Adds closing fence if code block is unclosed
-
         Args:
             terminal_height: Height of terminal in lines
-            target_ratio: Keep this multiple of terminal height (default 1.5)
+            target_ratio: Ratio of terminal height to keep (defaults to instance ratio)
             terminal_width: Optional terminal width for estimating wrapped lines
+            add_closing_fence: Append a closing fence for unclosed code blocks
 
         Returns:
             Text ready for display (truncated if needed)
@@ -101,8 +97,34 @@ def get_display_text(
         full_text = self.get_full_text()
         if not full_text:
             return full_text
+        ratio = target_ratio if target_ratio is not None else self._target_height_ratio
         return self._truncate_for_display(
-            full_text, terminal_height, target_ratio, terminal_width
+            full_text,
+            terminal_height,
+            ratio,
+            terminal_width,
+            add_closing_fence=add_closing_fence,
+        )
+
+    def truncate_text(
+        self,
+        text: str,
+        terminal_height: int,
+        terminal_width: int | None = None,
+        *,
+        add_closing_fence: bool = False,
+        target_ratio: float | None = None,
+    ) -> str:
+        """Truncate the provided text without mutating the internal buffer."""
+        if not text:
+            return text
+        ratio = target_ratio if target_ratio is not None else self._target_height_ratio
+        return self._truncate_for_display(
+            text,
+            terminal_height,
+            ratio,
+            terminal_width,
+            add_closing_fence=add_closing_fence,
         )
 
     def clear(self) -> None:
@@ -115,6 +137,8 @@ def _truncate_for_display(
         terminal_height: int,
         target_ratio: float,
         terminal_width: int | None,
+        *,
+        add_closing_fence: bool = False,
     ) -> str:
         """Truncate text to fit display with context preservation.
 
@@ -134,13 +158,11 @@ def _truncate_for_display(
         Returns:
             Truncated text with preserved context
         """
-        lines = text.split("\n")
+        if terminal_height <= 0:
+            return text
 
-        if target_ratio <= 1:
-            extra_lines = 0
-        else:
-            extra_lines = int(ceil(terminal_height * (target_ratio - 1)))
-        raw_target_lines = terminal_height + extra_lines
+        lines = text.split("\n")
+        target_lines = max(1, int(terminal_height * target_ratio))
 
         # Estimate how many rendered lines the text will occupy
         if terminal_width and terminal_width > 0:
@@ -153,16 +175,10 @@ def _truncate_for_display(
 
         # Fast path: no truncation needed if content still fits the viewport
         if total_display_lines <= terminal_height:
-            # Still need to check for unclosed code blocks
-            return self._add_closing_fence_if_needed(text)
+            return self._add_closing_fence_if_needed(text) if add_closing_fence else text
 
         # Determine how many display lines we want to keep after truncation
-        desired_display_lines = min(total_display_lines, raw_target_lines)
-        if desired_display_lines > terminal_height:
-            window_lines = max(1, terminal_height // 5)  # keep ~20% headroom
-            desired_display_lines = max(terminal_height, desired_display_lines - window_lines)
-        else:
-            desired_display_lines = terminal_height
+        desired_display_lines = min(total_display_lines, target_lines)
 
         # Determine how many logical lines we can keep based on estimated display rows
         if display_counts:
@@ -203,8 +219,9 @@ def _truncate_for_display(
             text, truncated_text, truncation_pos, tables
         )
 
-        # Add closing fence if code block is unclosed
-        truncated_text = self._add_closing_fence_if_needed(truncated_text)
+        # Add closing fence if code block is unclosed (display-only)
+        if add_closing_fence:
+            truncated_text = self._add_closing_fence_if_needed(truncated_text)
 
         return truncated_text
 
@@ -351,6 +368,14 @@ def _preserve_table_context(
                 # If we truncated in the data section, restore header
                 if truncation_pos >= data_start_pos:
                     header_text = "\n".join(table.header_lines) + "\n"
+                    if truncated_text.startswith(header_text):
+                        return truncated_text
+                    truncated_lines = truncated_text.splitlines()
+                    header_lines = [line.rstrip() for line in table.header_lines]
+                    if len(truncated_lines) >= len(header_lines):
+                        candidate = [line.rstrip() for line in truncated_lines[: len(header_lines)]]
+                        if candidate == header_lines:
+                            return truncated_text
                     return header_text + truncated_text
 
                 # Found the relevant table, no need to check others
@@ -404,11 +429,11 @@ def _flatten_tokens(self, tokens: list[Token]) -> Generator[Token, None, None]:
     def _estimate_display_counts(self, lines: list[str], terminal_width: int) -> list[int]:
         """Estimate how many terminal rows each logical line will occupy."""
         return [
-            max(1, ceil(len(line) / terminal_width)) if line else 1
+            max(1, ceil(len(line.expandtabs()) / terminal_width)) if line else 1
             for line in lines
         ]
 
-    def _estimate_display_lines(self, text: str, terminal_width: int) -> int:
+    def estimate_display_lines(self, text: str, terminal_width: int) -> int:
         """Estimate how many terminal rows the given text will occupy."""
         if not text:
             return 0
@@ -426,7 +451,7 @@ def _trim_within_line_if_needed(
         """Trim additional characters when a single line exceeds the viewport."""
         current_pos = truncation_pos
         current_text = truncated_text
-        estimated_lines = self._estimate_display_lines(current_text, terminal_width)
+        estimated_lines = self.estimate_display_lines(current_text, terminal_width)
 
         while estimated_lines > max_display_lines and current_pos < len(text):
             excess_display = estimated_lines - max_display_lines
@@ -446,6 +471,6 @@ def _trim_within_line_if_needed(
 
             current_pos = candidate_pos
             current_text = text[current_pos:]
-            estimated_lines = self._estimate_display_lines(current_text, terminal_width)
+            estimated_lines = self.estimate_display_lines(current_text, terminal_width)
 
         return current_text, current_pos
diff --git a/src/fast_agent/utils/async_utils.py b/src/fast_agent/utils/async_utils.py
index de051009e..1c906fe49 100644
--- a/src/fast_agent/utils/async_utils.py
+++ b/src/fast_agent/utils/async_utils.py
@@ -1,12 +1,111 @@
-from __future__ import annotations
-
 import asyncio
-from typing import TYPE_CHECKING, TypeVar
-
-if TYPE_CHECKING:
-    from collections.abc import Awaitable, Iterable
+import concurrent.futures
+import os
+import sys
+from collections.abc import Awaitable, Callable, Iterable
+from typing import ParamSpec, TypeVar
 
 T = TypeVar("T")
+P = ParamSpec("P")
+
+_UVLOOP_REQUESTED: bool | None = None
+_UVLOOP_CONFIGURED: bool | None = None
+
+
+def _env_value(name: str) -> bool | None:
+    value = os.getenv(name)
+    if value is None:
+        return None
+    return value.strip().lower() in {"1", "true", "yes", "on"}
+
+
+def configure_uvloop(
+    env_var: str = "FAST_AGENT_UVLOOP",
+    disable_env_var: str = "FAST_AGENT_DISABLE_UV_LOOP",
+) -> tuple[bool, bool]:
+    """
+    Configure uvloop via an env var toggle.
+
+    Returns a tuple of (requested, enabled).
+    """
+    global _UVLOOP_REQUESTED, _UVLOOP_CONFIGURED
+    if _UVLOOP_REQUESTED is not None and _UVLOOP_CONFIGURED is not None:
+        return _UVLOOP_REQUESTED, _UVLOOP_CONFIGURED
+
+    explicit_enable = _env_value(env_var)
+    explicit_disable = _env_value(disable_env_var)
+    requested = explicit_enable is True and explicit_disable is not True
+    enabled = False
+
+    if explicit_disable is True or explicit_enable is False:
+        enabled = False
+    elif not sys.platform.startswith("win"):
+        try:
+            import uvloop
+        except Exception:
+            enabled = False
+        else:
+            asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+            enabled = True
+
+    _UVLOOP_REQUESTED = requested
+    _UVLOOP_CONFIGURED = enabled
+    return requested, enabled
+
+
+def create_event_loop() -> asyncio.AbstractEventLoop:
+    """Create and set a new event loop using the configured policy."""
+    configure_uvloop()
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    return loop
+
+
+def ensure_event_loop() -> asyncio.AbstractEventLoop:
+    """Return a usable event loop, creating one if needed."""
+    try:
+        return asyncio.get_running_loop()
+    except RuntimeError:
+        try:
+            loop = asyncio.get_event_loop()
+        except RuntimeError:
+            return create_event_loop()
+        if loop.is_closed():
+            return create_event_loop()
+        return loop
+
+
+def run_sync(
+    func: Callable[P, Awaitable[T]], *args: P.args, **kwargs: P.kwargs
+) -> T | None:
+    """
+    Run an async callable from sync code using the shared loop policy.
+
+    If a loop is already running in this thread, we run the coroutine in a new thread.
+    """
+    try:
+        asyncio.get_running_loop()
+    except RuntimeError:
+        loop = ensure_event_loop()
+        if loop.is_running():
+            return _run_in_new_loop(func, *args, **kwargs)
+        return loop.run_until_complete(func(*args, **kwargs))
+    return _run_in_new_loop(func, *args, **kwargs)
+
+
+def _run_in_new_loop(func: Callable[P, Awaitable[T]], *args: P.args, **kwargs: P.kwargs) -> T:
+    def runner() -> T:
+        loop = create_event_loop()
+        try:
+            return loop.run_until_complete(func(*args, **kwargs))
+        finally:
+            try:
+                loop.run_until_complete(loop.shutdown_asyncgens())
+            finally:
+                loop.close()
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+        return pool.submit(runner).result()
 
 
 async def gather_with_cancel(aws: Iterable[Awaitable[T]]) -> list[T | BaseException]:
diff --git a/tests/integration/acp/test_acp_slash_commands.py b/tests/integration/acp/test_acp_slash_commands.py
index f9a78dbfd..30fe58db0 100644
--- a/tests/integration/acp/test_acp_slash_commands.py
+++ b/tests/integration/acp/test_acp_slash_commands.py
@@ -199,6 +199,28 @@ class AgentWithInstruction(StubAgent):
     assert stub_agent.instruction in response
 
 
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_slash_command_status_system_prefers_session_instruction() -> None:
+    """Test /status system prefers session-resolved instructions when available."""
+
+    @dataclass
+    class AgentWithInstruction(StubAgent):
+        name: str = "test-agent"
+        instruction: str = "Template instruction with {{env}}."
+
+    stub_agent = AgentWithInstruction(message_history=[], llm=None)
+    instance = StubAgentInstance(agents={"test-agent": stub_agent})
+
+    resolved_instruction = "Resolved instruction with env."
+    handler = _handler(instance, session_instructions={"test-agent": resolved_instruction})
+
+    response = await handler.execute_command("status", "system")
+
+    assert resolved_instruction in response
+    assert stub_agent.instruction not in response
+
+
 @pytest.mark.integration
 @pytest.mark.asyncio
 async def test_slash_command_status_system_without_instruction() -> None:
diff --git a/tests/unit/fast_agent/ui/test_console_display_plain_wrap.py b/tests/unit/fast_agent/ui/test_console_display_plain_wrap.py
index bece66ca5..45924d4dd 100644
--- a/tests/unit/fast_agent/ui/test_console_display_plain_wrap.py
+++ b/tests/unit/fast_agent/ui/test_console_display_plain_wrap.py
@@ -1,27 +1,14 @@
-from fast_agent.ui import console
-from fast_agent.ui.console_display import _StreamingMessageHandle
+from fast_agent.ui.stream_viewport import estimate_plain_text_height
 
 
-def test_wrap_plain_line_breaks_on_space() -> None:
-    result = _StreamingMessageHandle._wrap_plain_line("hello world again", 6)
-    assert result == ["hello", "world", "again"]
+def test_estimate_plain_text_height_wraps_long_line() -> None:
+    assert estimate_plain_text_height("hello world again", 6) == 3
 
 
-def test_wrap_plain_line_handles_long_word() -> None:
-    result = _StreamingMessageHandle._wrap_plain_line("abcdefghij", 4)
-    assert result == ["abcd", "efgh", "ij"]
+def test_estimate_plain_text_height_handles_long_word() -> None:
+    assert estimate_plain_text_height("abcdefghij", 4) == 3
 
 
-def test_wrap_plain_chunk_inserts_newlines() -> None:
-    handle = object.__new__(_StreamingMessageHandle)
-    handle._use_plain_text = True
-    original_width = getattr(console.console, "_width", None)
-    try:
-        console.console._width = 8
-        wrapped = _StreamingMessageHandle._wrap_plain_chunk(handle, "abcdefghijk")
-        assert wrapped == "abcdefgh\nijk"
-    finally:
-        if original_width is None:
-            delattr(console.console, "_width")
-        else:
-            console.console._width = original_width
+def test_estimate_plain_text_height_counts_lines() -> None:
+    text = "abc\ndefghij\n"
+    assert estimate_plain_text_height(text, 4) == 4
diff --git a/tests/unit/fast_agent/ui/test_markdown_truncator_streaming.py b/tests/unit/fast_agent/ui/test_markdown_truncator_streaming.py
index ae87da94d..c02ef5ddf 100644
--- a/tests/unit/fast_agent/ui/test_markdown_truncator_streaming.py
+++ b/tests/unit/fast_agent/ui/test_markdown_truncator_streaming.py
@@ -279,3 +279,88 @@ def test_streaming_truncation_avoids_duplicate_table_header() -> None:
 
     result = truncator._ensure_table_header_if_needed(original, truncated)
     assert result.count("| Mission | Date |") == 1
+
+
+def test_streaming_table_scrolls_latest_rows() -> None:
+    truncator = MarkdownTruncator(target_height_ratio=0.75)
+    test_console = Console(width=200)
+
+    header = (
+        "| Rank | Airport Name | IATA | ICAO | City/Region | Country | Elevation (m) | "
+        "Elevation (ft) |"
+    )
+    separator = (
+        "|------|--------------|------|------|-------------|---------|---------------|"
+        "----------------|"
+    )
+    rows = [
+        "| 1 | Daocheng Yading Airport | DCY | ZUDC | Daocheng | China | 4,411 | 14,472 |",
+        "| 2 | Qamdo Bamda Airport | BPX | ZUBD | Qamdo | China | 4,334 | 14,219 |",
+        "| 3 | Kangding Airport | KGT | ZUKD | Kangding | China | 4,280 | 14,042 |",
+        "| 4 | Ngari Gunsa Airport | NGQ | ZUAS | Ngari | China | 4,274 | 14,022 |",
+        "| 5 | El Alto International Airport | LPB | SLLP | La Paz | Bolivia | 4,061 | 13,325 |",
+        "| 6 | Yushu Batang Airport | YUS | ZLYS | Yushu | China | 3,890 | 12,762 |",
+        "| 7 | Inca Manco Capac International Airport | JUL | SPJL | Juliaca | Peru | 3,826 | 12,552 |",
+        "| 8 | Shigatse Peace Airport | RKZ | ZURK | Shigatse | China | 3,782 | 12,408 |",
+        "| 9 | Lhasa Gonggar Airport | LXA | ZULS | Lhasa | China | 3,570 | 11,710 |",
+        "| 10 | Leh Kushok Bakula Rimpochee Airport | IXL | VILH | Leh | India | 3,256 | 10,682 |",
+        "| 11 | Alejandro Velasco Astete International Airport | CUZ | SPZO | Cusco | Peru | 3,199 | 10,489 |",
+        "| 12 | Tenzing-Hillary Airport | LUA | VNLK | Lukla | Nepal | 2,860 | 9,383 |",
+        "| 13 | Alcantari Airport (Sucre) | SRE | SLET | Sucre | Bolivia | 2,834 | 9,301 |",
+        "| 14 | Toluca International Airport | TLC | MMTO | Toluca | Mexico | 2,580 | 8,465 |",
+        "| 15 | Arequipa Airport | AQP | SPQU | Arequipa | Peru | 2,560 | 8,400 |",
+        "| 16 | Jorge Wilstermann International Airport | CBB | SLCB | Cochabamba | Bolivia | 2,548 | 8,360 |",
+        "| 17 | El Dorado International Airport | BOG | SKBO | Bogota | Colombia | 2,548 | 8,360 |",
+        "| 18 | Mariscal Sucre International Airport | UIO | SEQM | Quito | Ecuador | 2,400 | 7,873 |",
+        "| 19 | Addis Ababa Bole International Airport | ADD | HAAB | Addis Ababa | Ethiopia | 2,334 | 7,625 |",
+        "| 20 | Mexico City International Airport | MEX | MMMX | Mexico City | Mexico | 2,230 | 7,316 |",
+        "| 21 | Puebla International Airport | PBC | MMPB | Puebla | Mexico | 2,204 | 7,230 |",
+        "| 22 | Kunming Changshui International Airport | KMG | ZPPP | Kunming | China | 2,103 | 6,896 |",
+        "| 23 | Sanaa International Airport | SAH | OYSN | Sanaa | Yemen | 2,200 | 7,218 |",
+        "| 24 | Lanzhou Zhongchuan International Airport | LHW | ZLLL | Lanzhou | China | 1,967 | 6,450 |",
+        "| 25 | Kabul International Airport | KBL | OAKB | Kabul | Afghanistan | 1,791 | 5,877 |",
+        "| 26 | Denver International Airport | DEN | KDEN | Denver | USA | 1,655 | 5,431 |",
+        "| 27 | O.R. Tambo International Airport | JNB | FAOR | Johannesburg | South Africa | 1,694 | 5,558 |",
+        "| 28 | Tehran Imam Khomeini International Airport | IKA | OIIE | Tehran | Iran | 1,007 | 3,305 |",
+        "| 29 | Urumqi Diwopu International Airport | URC | ZWWW | Urumqi | China | 648 | 2,126 |",
+        "| 30 | Silao International Airport (Bajio) | BJX | MMLO | Silao | Mexico | 1,815 | 5,955 |",
+    ]
+
+    table_text = "\n".join([header, separator, *rows])
+    text = "Here is a table of the highest elevation airports worldwide:\n\n" + table_text
+
+    total_rows = len(rows)
+    total_lines = total_rows + 2
+
+    def expected_start_row(height: int, ratio: float) -> int:
+        target_lines = max(1, int(height * ratio))
+        if target_lines >= total_lines:
+            return 1
+        start_line = total_lines - target_lines + 1
+        return max(1, start_line - 2)
+
+    for height in (12, 16, 20):
+        truncated = truncator.truncate(
+            text,
+            terminal_height=height,
+            console=test_console,
+            code_theme="native",
+            prefer_recent=True,
+        )
+
+        lines = [line for line in truncated.splitlines() if line.strip()]
+        assert header in lines
+        assert separator in lines
+
+        row_numbers = []
+        for line in lines:
+            if line.startswith("|"):
+                parts = [part.strip() for part in line.split("|")]
+                if len(parts) > 1 and parts[1].isdigit():
+                    row_numbers.append(int(parts[1]))
+
+        assert row_numbers, "expected table rows to be present in truncated output"
+        start_row = expected_start_row(height, 0.75)
+        assert row_numbers[0] == start_row
+        assert row_numbers[-1] == total_rows
+        assert row_numbers == list(range(start_row, total_rows + 1))
diff --git a/tests/unit/fast_agent/ui/test_streaming_mode_switch.py b/tests/unit/fast_agent/ui/test_streaming_mode_switch.py
index 83f96b1f9..20d9de122 100644
--- a/tests/unit/fast_agent/ui/test_streaming_mode_switch.py
+++ b/tests/unit/fast_agent/ui/test_streaming_mode_switch.py
@@ -4,6 +4,7 @@
 from fast_agent.llm.stream_types import StreamChunk
 from fast_agent.ui import console
 from fast_agent.ui.console_display import ConsoleDisplay, _StreamingMessageHandle
+from fast_agent.ui.stream_segments import StreamSegmentAssembler
 
 
 def _set_console_size(width: int = 80, height: int = 24) -> tuple[object | None, object | None]:
@@ -46,53 +47,33 @@ def _make_handle(
 
 
 def test_reasoning_stream_switches_back_to_markdown() -> None:
-    original_width, original_height = _set_console_size()
-    handle = _make_handle("markdown")
-    try:
-        handle._handle_stream_chunk(StreamChunk("Intro"))
-        assert handle._use_plain_text is False
-
-        handle._handle_stream_chunk(StreamChunk("Thinking", is_reasoning=True))
-        assert handle._use_plain_text is True
-        assert handle._reasoning_active is True
-
-        handle._handle_stream_chunk(StreamChunk("Answer"))
-        assert handle._use_plain_text is False
-        assert handle._reasoning_active is False
-
-        text = "".join(handle._buffer)
-        intro_idx = text.find("Intro")
-        answer_idx = text.find("Answer")
-        assert intro_idx != -1
-        assert answer_idx != -1
-        assert text.find("Thinking") == -1
-        assert "\n" in text[intro_idx + len("Intro") : answer_idx]
-    finally:
-        _restore_console_size(original_width, original_height)
-
-
-def test_tool_mode_switches_back_to_markdown() -> None:
-    original_width, original_height = _set_console_size()
-    handle = _make_handle("markdown")
-    try:
-        handle._handle_chunk("Intro")
-        handle._begin_tool_mode()
-        assert handle._use_plain_text is True
-
-        handle._handle_chunk("Calling tool")
-        handle._end_tool_mode()
-        assert handle._use_plain_text is False
-
-        handle._handle_chunk("Result")
-
-        text = "".join(handle._buffer)
-        intro_idx = text.find("Intro")
-        tool_idx = text.find("Calling tool")
-        result_idx = text.find("Result")
-        assert intro_idx != -1
-        assert tool_idx != -1
-        assert result_idx != -1
-        assert "\n" in text[intro_idx + len("Intro") : tool_idx]
-        assert "\n" in text[tool_idx + len("Calling tool") : result_idx]
-    finally:
-        _restore_console_size(original_width, original_height)
+    assembler = StreamSegmentAssembler(base_kind="markdown", tool_prefix="->")
+
+    assembler.handle_stream_chunk(StreamChunk("Intro"))
+    assembler.handle_stream_chunk(StreamChunk("Thinking", is_reasoning=True))
+    assembler.handle_stream_chunk(StreamChunk("Answer"))
+
+    text = "".join(segment.text for segment in assembler.segments)
+    intro_idx = text.find("Intro")
+    thinking_idx = text.find("Thinking")
+    answer_idx = text.find("Answer")
+    assert intro_idx != -1
+    assert thinking_idx != -1
+    assert answer_idx != -1
+    assert "\n" in text[intro_idx + len("Intro") : thinking_idx]
+    assert "\n\n" in text[thinking_idx + len("Thinking") : answer_idx]
+
+
+def test_reasoning_stream_handles_multiple_blocks() -> None:
+    assembler = StreamSegmentAssembler(base_kind="markdown", tool_prefix="->")
+
+    assembler.handle_stream_chunk(StreamChunk("Think1", is_reasoning=True))
+    assembler.handle_stream_chunk(StreamChunk("Answer1"))
+    assembler.handle_stream_chunk(StreamChunk("Think2", is_reasoning=True))
+    assembler.handle_stream_chunk(StreamChunk("Answer2"))
+
+    text = "".join(segment.text for segment in assembler.segments)
+    assert "Think1" in text
+    assert "Answer1" in text
+    assert "Think2" in text
+    assert "Answer2" in text
diff --git a/tests/unit/fast_agent/ui/test_streaming_table_chunking.py b/tests/unit/fast_agent/ui/test_streaming_table_chunking.py
index 944ec7893..e02cc3a1e 100644
--- a/tests/unit/fast_agent/ui/test_streaming_table_chunking.py
+++ b/tests/unit/fast_agent/ui/test_streaming_table_chunking.py
@@ -1,98 +1,37 @@
-from fast_agent.config import Settings
 from fast_agent.llm.stream_types import StreamChunk
-from fast_agent.ui import console
-from fast_agent.ui.console_display import ConsoleDisplay, _StreamingMessageHandle
-
-
-def _make_handle() -> _StreamingMessageHandle:
-    settings = Settings()
-    settings.logger.streaming = "markdown"
-    display = ConsoleDisplay(settings)
-    return _StreamingMessageHandle(
-        display=display,
-        bottom_items=None,
-        highlight_index=None,
-        max_item_length=None,
-        use_plain_text=False,
-        header_left="",
-        header_right="",
-        progress_display=None,
-    )
+from fast_agent.ui.stream_segments import StreamSegmentAssembler
 
 
 def test_table_rows_do_not_duplicate_when_streaming_in_parts() -> None:
-    original_width = getattr(console.console, "_width", None)
-    original_height = getattr(console.console, "_height", None)
-    console.console._width = 80
-    console.console._height = 24
-    handle = _make_handle()
-
-    try:
-        chunks = ["| Mission | ", "Landing Date |", "\n"]
-        for chunk in chunks:
-            handle._handle_chunk(chunk)
+    assembler = StreamSegmentAssembler(base_kind="markdown", tool_prefix="->")
+    chunks = ["| Mission | ", "Landing Date |", "\n"]
+    for chunk in chunks:
+        assembler.handle_text(chunk)
 
-        text = "".join(handle._buffer)
-        assert text == "".join(chunks)
-    finally:
-        if original_width is None:
-            delattr(console.console, "_width")
-        else:
-            console.console._width = original_width
-        if original_height is None:
-            delattr(console.console, "_height")
-        else:
-            console.console._height = original_height
+    text = "".join(segment.text for segment in assembler.segments)
+    assert text == "".join(chunks)
 
 
 def test_table_rows_do_not_duplicate_when_reasoning_interrupts() -> None:
-    original_width = getattr(console.console, "_width", None)
-    original_height = getattr(console.console, "_height", None)
-    console.console._width = 80
-    console.console._height = 24
-    handle = _make_handle()
+    assembler = StreamSegmentAssembler(base_kind="markdown", tool_prefix="->")
 
-    try:
-        handle._handle_chunk("| Mission ")
-        handle._handle_stream_chunk(StreamChunk("thinking", is_reasoning=True))
-        handle._handle_stream_chunk(StreamChunk(" done", is_reasoning=False))
-        handle._handle_chunk("Mission | | Landing Date |\n")
+    assembler.handle_text("| Mission ")
+    assembler.handle_stream_chunk(StreamChunk("thinking", is_reasoning=True))
+    assembler.handle_stream_chunk(StreamChunk(" done", is_reasoning=False))
+    assembler.handle_text("Mission | | Landing Date |\n")
 
-        text = "".join(handle._buffer)
-        assert text.count("| Mission Mission |") == 0
-        assert text.count("| Mission ") == 1
-    finally:
-        if original_width is None:
-            delattr(console.console, "_width")
-        else:
-            console.console._width = original_width
-        if original_height is None:
-            delattr(console.console, "_height")
-        else:
-            console.console._height = original_height
+    text = "".join(segment.text for segment in assembler.segments)
+    assert text.count("| Mission Mission |") == 0
+    assert text.count("| Mission ") == 1
 
 
 def test_table_pending_row_not_duplicated_after_reasoning() -> None:
-    original_width = getattr(console.console, "_width", None)
-    original_height = getattr(console.console, "_height", None)
-    console.console._width = 80
-    console.console._height = 24
-    handle = _make_handle()
+    assembler = StreamSegmentAssembler(base_kind="markdown", tool_prefix="->")
 
-    try:
-        handle._handle_stream_chunk(StreamChunk("thinking", is_reasoning=True))
-        handle._handle_stream_chunk(StreamChunk(" |", is_reasoning=False))
-        assert handle._pending_table_row == " |"
+    assembler.handle_stream_chunk(StreamChunk("thinking", is_reasoning=True))
+    assembler.handle_stream_chunk(StreamChunk(" |", is_reasoning=False))
+    assert assembler.pending_table_row == " |"
 
-        handle._handle_stream_chunk(StreamChunk(" Fact |\n", is_reasoning=False))
-        text = "".join(handle._buffer)
-        assert text.endswith(" | Fact |\n")
-    finally:
-        if original_width is None:
-            delattr(console.console, "_width")
-        else:
-            console.console._width = original_width
-        if original_height is None:
-            delattr(console.console, "_height")
-        else:
-            console.console._height = original_height
+    assembler.handle_stream_chunk(StreamChunk(" Fact |\n", is_reasoning=False))
+    text = "".join(segment.text for segment in assembler.segments)
+    assert text.endswith(" | Fact |\n")
diff --git a/tests/unit/fast_agent/ui/test_streaming_tool_events.py b/tests/unit/fast_agent/ui/test_streaming_tool_events.py
new file mode 100644
index 000000000..15640ba16
--- /dev/null
+++ b/tests/unit/fast_agent/ui/test_streaming_tool_events.py
@@ -0,0 +1,21 @@
+from fast_agent.ui.stream_segments import StreamSegmentAssembler
+
+
+def _make_assembler() -> StreamSegmentAssembler:
+    return StreamSegmentAssembler(base_kind="markdown", tool_prefix="->")
+
+
+def test_tool_stream_delta_bootstraps_mode() -> None:
+    assembler = _make_assembler()
+
+    assembler.handle_tool_event(
+        "delta", {"tool_name": "search", "tool_use_id": "tool-1", "chunk": "{\"q\":1}"}
+    )
+
+    text = "".join(segment.text for segment in assembler.segments)
+    assert "Calling search" in text
+    assert "{\"q\":1}" in text
+
+    assembler.handle_tool_event("stop", {"tool_name": "search", "tool_use_id": "tool-1"})
+    text = "".join(segment.text for segment in assembler.segments)
+    assert "\"q\": 1" in text
diff --git a/uv.lock b/uv.lock
index 5c0305dc3..0e03731aa 100644
--- a/uv.lock
+++ b/uv.lock
@@ -518,6 +518,7 @@ dependencies = [
     { name = "rich" },
     { name = "tiktoken" },
     { name = "typer" },
+    { name = "uvloop" },
 ]
 
 [package.optional-dependencies]
@@ -589,6 +590,7 @@ requires-dist = [
     { name = "textual", marker = "extra == 'textual'", specifier = ">=6.2.1" },
     { name = "tiktoken", specifier = ">=0.12.0" },
     { name = "typer", specifier = ">=0.20.0" },
+    { name = "uvloop", specifier = ">=0.22.1" },
 ]
 provides-extras = ["azure", "bedrock", "tensorzero", "textual", "all-providers"]
 
@@ -2370,6 +2372,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" },
 ]
 
+[[package]]
+name = "uvloop"
+version = "0.22.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" },
+    { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" },
+]
+
 [[package]]
 name = "virtualenv"
 version = "20.35.4"