Skip to content

Commit

Permalink
reliable way to reproduce error
Browse files Browse the repository at this point in the history
  • Loading branch information
xingyaoww committed Jan 18, 2025
1 parent 501824a commit 867f672
Showing 1 changed file with 170 additions and 2 deletions.
172 changes: 170 additions & 2 deletions tests/runtime/test_stress_remote_runtime.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
"""Bash-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
"""Stress tests for the remote runtime.
Example usage:
```bash
export ALLHANDS_API_KEY="YOUR_API_KEY"
export RUNTIME=remote
export SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.staging.all-hands.dev"
poetry run pytest -vvxss tests/runtime/test_stress_remote_runtime.py
```
"""

import asyncio
import json
import os
import tempfile
import time
from datetime import datetime
from unittest.mock import MagicMock

import pandas as pd
Expand Down Expand Up @@ -41,6 +55,8 @@
'CodeActAgent': codeact_user_response,
}

SAVE_PERF_DEBUG = os.environ.get('SAVE_PERF_DEBUG', 'false').lower() in ['true', '1']


def get_config() -> AppConfig:
assert (
Expand Down Expand Up @@ -140,7 +156,7 @@ def _process_instance(
else:
logger.info(f'Starting evaluation for instance {instance.instance_id}.')

runtime = create_runtime(config, headless_mode=False)
runtime = create_runtime(config, headless_mode=True)
call_async_from_sync(runtime.connect)

try:
Expand Down Expand Up @@ -224,3 +240,155 @@ def next_command(*args, **kwargs):
)

run_evaluation(instances, metadata, output_file, n_eval_workers, _process_instance)


@pytest.mark.skipif(
TEST_IN_CI,
reason='This test should only be run locally, not in CI.',
)
def test_stress_remote_runtime_long_output_with_soft_and_hard_timeout():
"""Stress test for the remote runtime."""
config = get_config()
runtime = create_runtime(config, headless_mode=True)
call_async_from_sync(runtime.connect)

_time_for_test = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

action = CmdRunAction(
command='sudo apt-get update && sudo apt-get install -y stress-ng'
)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0

try:
# Run a command that generates long output multiple times
for i in range(10):
start_time = time.time()
iteration_stats = {
'iteration': i,
'timestamp': time.time(),
}

# Check overall system memory usage
mem_action = CmdRunAction(
'free -k | grep "Mem:" | awk \'{printf "Total: %8.1f MB, Used: %8.1f MB, Free: %8.1f MB, Available: %8.1f MB\\n", $2/1024, $3/1024, $4/1024, $7/1024}\''
)
mem_obs = runtime.run_action(mem_action)
assert mem_obs.exit_code == 0
logger.info(
f'System memory usage (iteration {i}): {mem_obs.content.strip()}'
)
# Parse memory values from output
mem_parts = mem_obs.content.strip().split(',')
for part in mem_parts:
key, value = part.strip().split(':')
iteration_stats[f'memory_{key.lower()}'] = float(
value.replace('MB', '').strip()
)

# Check top memory-consuming processes
mem_action = CmdRunAction(
'ps aux | awk \'{printf "%8.1f MB %s\\n", $6/1024, $0}\' | sort -nr | head -n 5'
)
mem_obs = runtime.run_action(mem_action)
assert mem_obs.exit_code == 0
_top_processes = [i.strip() for i in mem_obs.content.strip().split('\n')]
logger.info(
f'Top 5 memory-consuming processes (iteration {i}):\n{"- " + "\n- ".join(_top_processes)}'
)
iteration_stats['top_processes'] = _top_processes

# Check tmux memory usage (in KB)
mem_action = CmdRunAction(
'ps aux | awk \'{printf "%8.1f MB %s\\n", $6/1024, $0}\' | sort -nr | grep "/usr/bin/tmux" | grep -v grep | awk \'{print $1}\''
)
mem_obs = runtime.run_action(mem_action)
assert mem_obs.exit_code == 0
logger.info(
f'Tmux memory usage (iteration {i}): {mem_obs.content.strip()} KB'
)
try:
iteration_stats['tmux_memory_mb'] = float(mem_obs.content.strip())
except (ValueError, AttributeError):
iteration_stats['tmux_memory_mb'] = None

# Check action_execution_server mem
mem_action = CmdRunAction(
'ps aux | awk \'{printf "%8.1f MB %s\\n", $6/1024, $0}\' | sort -nr | grep "action_execution_server" | grep "/openhands/poetry" | grep -v grep | awk \'{print $1}\''
)
mem_obs = runtime.run_action(mem_action)
assert mem_obs.exit_code == 0
logger.info(
f'Action execution server memory usage (iteration {i}): {mem_obs.content.strip()} MB'
)
try:
iteration_stats['action_server_memory_mb'] = float(
mem_obs.content.strip()
)
except (ValueError, AttributeError):
iteration_stats['action_server_memory_mb'] = None

# Test soft timeout
action = CmdRunAction(
'read -p "Do you want to continue? [Y/n] " answer; if [[ $answer == "Y" ]]; then echo "Proceeding with operation..."; echo "Operation completed successfully!"; else echo "Operation cancelled."; exit 1; fi'
)
obs = runtime.run_action(action)
assert 'Do you want to continue?' in obs.content
assert obs.exit_code == -1 # Command is still running, waiting for input

# Send the confirmation
action = CmdRunAction('Y', is_input=True)
obs = runtime.run_action(action)
assert 'Proceeding with operation...' in obs.content
assert 'Operation completed successfully!' in obs.content
assert obs.exit_code == 0
assert '[The command completed with exit code 0.]' in obs.metadata.suffix

# Test hard timeout w/ long output
# Generate long output with 1000 asterisks per line
action = CmdRunAction(
f'export i={i}; for j in $(seq 1 100); do echo "Line $j - Iteration $i - $(printf \'%1000s\' | tr " " "*")"; sleep 1; done'
)
action.set_hard_timeout(2)
obs = runtime.run_action(action)

# Verify the output
assert obs.exit_code == -1
assert f'Line 1 - Iteration {i}' in obs.content

# Because hard-timeout is triggered, the terminal will in a weird state
# where it will not accept any new commands.
obs = runtime.run_action(CmdRunAction('ls'))
assert obs.exit_code == -1
assert 'The previous command is still running' in obs.metadata.suffix

# We need to send a Ctrl+C to reset the terminal.
obs = runtime.run_action(CmdRunAction('C-c', is_input=True))
assert obs.exit_code == 130

# Now make sure the terminal is in a good state
obs = runtime.run_action(CmdRunAction('ls'))
assert obs.exit_code == 0

# run stress-ng stress tests for 1 minute
action = CmdRunAction(command='stress-ng --all 1 -t 1m')
action.set_hard_timeout(120)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})

duration = time.time() - start_time
iteration_stats['duration'] = duration
logger.info(f'Completed iteration {i} in {duration:.2f} seconds')

# Save stats to JSONL file
if SAVE_PERF_DEBUG:
with open(
f'terminal_perf_analysis_result_{_time_for_test}.jsonl', 'a'
) as f:
json.dump(iteration_stats, f)
f.write('\n')
finally:
runtime.close()

0 comments on commit 867f672

Please sign in to comment.