evalstate · evalstate · May 9, 2025 · May 9, 2025 · May 9, 2025 · May 9, 2025
diff --git a/src/mcp_agent/config.py b/src/mcp_agent/config.py
@@ -249,6 +249,7 @@ class LoggerSettings(BaseModel):
     """Show MCP Sever tool calls on the console"""
     truncate_tools: bool = True
     """Truncate display of long tool calls"""
+    enable_markup: bool = True
 
 
 class Settings(BaseSettings):

diff --git a/src/mcp_agent/ui/console_display.py b/src/mcp_agent/ui/console_display.py
@@ -46,7 +46,7 @@ def show_tool_result(self, result: CallToolResult) -> None:
             if len(str(result.content)) > 360:
                 panel.height = 8
 
-        console.console.print(panel)
+        console.console.print(panel, markup=self.config.logger.enable_markup)
         console.console.print("\n")
 
     def show_oai_tool_result(self, result) -> None:
@@ -67,7 +67,7 @@ def show_oai_tool_result(self, result) -> None:
             if len(str(result)) > 360:
                 panel.height = 8
 
-        console.console.print(panel)
+        console.console.print(panel, markup=self.config.logger.enable_markup)
         console.console.print("\n")
 
     def show_tool_call(self, available_tools, tool_name, tool_args) -> None:
@@ -92,7 +92,7 @@ def show_tool_call(self, available_tools, tool_name, tool_args) -> None:
             if len(str(tool_args)) > 360:
                 panel.height = 8
 
-        console.console.print(panel)
+        console.console.print(panel, markup=self.config.logger.enable_markup)
         console.console.print("\n")
 
     def _format_tool_list(self, available_tools, selected_tool_name):
@@ -172,7 +172,7 @@ async def show_assistant_message(
             subtitle=display_server_list,
             subtitle_align="left",
         )
-        console.console.print(panel)
+        console.console.print(panel, markup=self.config.logger.enable_markup)
         console.console.print("\n")
 
     def show_user_message(
@@ -196,7 +196,7 @@ def show_user_message(
             subtitle=subtitle_text,
             subtitle_align="left",
         )
-        console.console.print(panel)
+        console.console.print(panel, markup=self.config.logger.enable_markup)
         console.console.print("\n")
 
     async def show_prompt_loaded(
@@ -270,5 +270,5 @@ async def show_prompt_loaded(
             subtitle_align="left",
         )
 
-        console.console.print(panel)
+        console.console.print(panel, markup=self.config.logger.enable_markup)
         console.console.print("\n")
diff --git a/tests/integration/workflow/evaluator_optimizer/fastagent.config.yaml b/tests/integration/workflow/evaluator_optimizer/fastagent.config.yaml
@@ -2,6 +2,8 @@ logging:
   dir_path: null
   file_path: null
   level: DEBUG
+logger:
+  enable_markup: false
 
 mcp:
   name: evaluator_optimizer_tests
diff --git a/tests/integration/workflow/evaluator_optimizer/test_evaluator_optimizer.py b/tests/integration/workflow/evaluator_optimizer/test_evaluator_optimizer.py
@@ -233,6 +233,73 @@ async def agent_function():
 
     await agent_function()
 
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_check_markup_config(fast_agent):
+    """Test that evaluator-optimizer stops when quality threshold is met."""
+    fast = fast_agent
+
+    @fast.agent(name="generator_quality", model="passthrough")
+    @fast.agent(name="evaluator_quality", model="passthrough")
+    @fast.evaluator_optimizer(
+        name="optimizer_quality",
+        generator="generator_quality",
+        evaluator="evaluator_quality",
+        min_rating=QualityRating.GOOD,  # Stop when reaching GOOD quality
+        max_refinements=5,
+    )
+    async def agent_function():
+        async with fast.run() as agent:
+            # Initial generation
+            initial_response = f"{FIXED_RESPONSE_INDICATOR} Initial draft."
+            await agent.generator_quality._llm.generate([Prompt.user(initial_response)])
+
+            # First evaluation - needs improvement (FAIR is below GOOD threshold)
+            first_eval = {
+                "rating": "FAIR",
+                "feedback": "Needs improvement.",
+                "needs_improvement": True,
+                "focus_areas": ["Be more specific"],
+            }
+            first_eval_json = json.dumps(first_eval)
+            await agent.evaluator_quality._llm.generate(
+                [Prompt.user(f"{FIXED_RESPONSE_INDICATOR} {first_eval_json}")]
+            )
+
+            # First refinement
+            first_refinement = f"{FIXED_RESPONSE_INDICATOR} First refinement with more details."
+            await agent.generator_quality._llm.generate([Prompt.user(first_refinement)])
+
+            # Second evaluation - meets quality threshold (GOOD)
+            second_eval = {
+                "rating": "GOOD",
+                "feedback": "Much better!",
+                "needs_improvement": False,
+                "focus_areas": [],
+            }
+            second_eval_json = json.dumps(second_eval)
+            await agent.evaluator_quality._llm.generate(
+                [Prompt.user(f"{FIXED_RESPONSE_INDICATOR} {second_eval_json}")]
+            )
+
+            # Additional refinement response (should not be used because we hit quality threshold)
+            unused_response = f"{FIXED_RESPONSE_INDICATOR} This refinement should never be used."
+            await agent.generator_quality._llm.generate([Prompt.user(unused_response)])
+
+            # Send the input and get optimized output
+            result = await agent.optimizer_quality.send("'[/]Write something")
+
+            # Just check we got a non-empty result - we don't need to check the exact content
+            # since what matters is that the proper early stopping occurred
+            assert result is not None
+            assert len(result) > 0  # Should have some content
+
+            # Verify early stopping
+            history = agent.optimizer_quality.refinement_history
+            assert len(history) <= 2  # Should not have more than 2 iterations
+
+    await agent_function()
+
 
 @pytest.mark.integration
 @pytest.mark.asyncio