Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/mcp_agent/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ class LoggerSettings(BaseModel):
"""Show MCP Sever tool calls on the console"""
truncate_tools: bool = True
"""Truncate display of long tool calls"""
enable_markup: bool = True


class Settings(BaseSettings):
Expand Down
12 changes: 6 additions & 6 deletions src/mcp_agent/ui/console_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def show_tool_result(self, result: CallToolResult) -> None:
if len(str(result.content)) > 360:
panel.height = 8

console.console.print(panel)
console.console.print(panel, markup=self.config.logger.enable_markup)
console.console.print("\n")

def show_oai_tool_result(self, result) -> None:
Expand All @@ -67,7 +67,7 @@ def show_oai_tool_result(self, result) -> None:
if len(str(result)) > 360:
panel.height = 8

console.console.print(panel)
console.console.print(panel, markup=self.config.logger.enable_markup)
console.console.print("\n")

def show_tool_call(self, available_tools, tool_name, tool_args) -> None:
Expand All @@ -92,7 +92,7 @@ def show_tool_call(self, available_tools, tool_name, tool_args) -> None:
if len(str(tool_args)) > 360:
panel.height = 8

console.console.print(panel)
console.console.print(panel, markup=self.config.logger.enable_markup)
console.console.print("\n")

def _format_tool_list(self, available_tools, selected_tool_name):
Expand Down Expand Up @@ -172,7 +172,7 @@ async def show_assistant_message(
subtitle=display_server_list,
subtitle_align="left",
)
console.console.print(panel)
console.console.print(panel, markup=self.config.logger.enable_markup)
console.console.print("\n")

def show_user_message(
Expand All @@ -196,7 +196,7 @@ def show_user_message(
subtitle=subtitle_text,
subtitle_align="left",
)
console.console.print(panel)
console.console.print(panel, markup=self.config.logger.enable_markup)
console.console.print("\n")

async def show_prompt_loaded(
Expand Down Expand Up @@ -270,5 +270,5 @@ async def show_prompt_loaded(
subtitle_align="left",
)

console.console.print(panel)
console.console.print(panel, markup=self.config.logger.enable_markup)
console.console.print("\n")
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ logging:
dir_path: null
file_path: null
level: DEBUG
logger:
enable_markup: false

mcp:
name: evaluator_optimizer_tests
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,73 @@ async def agent_function():

await agent_function()

@pytest.mark.integration
@pytest.mark.asyncio
async def test_check_markup_config(fast_agent):
"""Test that evaluator-optimizer stops when quality threshold is met."""
fast = fast_agent

@fast.agent(name="generator_quality", model="passthrough")
@fast.agent(name="evaluator_quality", model="passthrough")
@fast.evaluator_optimizer(
name="optimizer_quality",
generator="generator_quality",
evaluator="evaluator_quality",
min_rating=QualityRating.GOOD, # Stop when reaching GOOD quality
max_refinements=5,
)
async def agent_function():
async with fast.run() as agent:
# Initial generation
initial_response = f"{FIXED_RESPONSE_INDICATOR} Initial draft."
await agent.generator_quality._llm.generate([Prompt.user(initial_response)])

# First evaluation - needs improvement (FAIR is below GOOD threshold)
first_eval = {
"rating": "FAIR",
"feedback": "Needs improvement.",
"needs_improvement": True,
"focus_areas": ["Be more specific"],
}
first_eval_json = json.dumps(first_eval)
await agent.evaluator_quality._llm.generate(
[Prompt.user(f"{FIXED_RESPONSE_INDICATOR} {first_eval_json}")]
)

# First refinement
first_refinement = f"{FIXED_RESPONSE_INDICATOR} First refinement with more details."
await agent.generator_quality._llm.generate([Prompt.user(first_refinement)])

# Second evaluation - meets quality threshold (GOOD)
second_eval = {
"rating": "GOOD",
"feedback": "Much better!",
"needs_improvement": False,
"focus_areas": [],
}
second_eval_json = json.dumps(second_eval)
await agent.evaluator_quality._llm.generate(
[Prompt.user(f"{FIXED_RESPONSE_INDICATOR} {second_eval_json}")]
)

# Additional refinement response (should not be used because we hit quality threshold)
unused_response = f"{FIXED_RESPONSE_INDICATOR} This refinement should never be used."
await agent.generator_quality._llm.generate([Prompt.user(unused_response)])

# Send the input and get optimized output
result = await agent.optimizer_quality.send("'[/]Write something")

# Just check we got a non-empty result - we don't need to check the exact content
# since what matters is that the proper early stopping occurred
assert result is not None
assert len(result) > 0 # Should have some content

# Verify early stopping
history = agent.optimizer_quality.refinement_history
assert len(history) <= 2 # Should not have more than 2 iterations

await agent_function()


@pytest.mark.integration
@pytest.mark.asyncio
Expand Down
Loading