From 867f672562324374901c8e853426ebe838ac87cc Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Fri, 17 Jan 2025 22:23:39 -0500 Subject: [PATCH] reliable way to reproduce error --- tests/runtime/test_stress_remote_runtime.py | 172 +++++++++++++++++++- 1 file changed, 170 insertions(+), 2 deletions(-) diff --git a/tests/runtime/test_stress_remote_runtime.py b/tests/runtime/test_stress_remote_runtime.py index 77ba961f6c1e..a7dc72d99ae4 100644 --- a/tests/runtime/test_stress_remote_runtime.py +++ b/tests/runtime/test_stress_remote_runtime.py @@ -1,8 +1,22 @@ -"""Bash-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox.""" +"""Stress tests for the remote runtime. + +Example usage: + +```bash +export ALLHANDS_API_KEY="YOUR_API_KEY" +export RUNTIME=remote +export SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.staging.all-hands.dev" +poetry run pytest -vvxss tests/runtime/test_stress_remote_runtime.py +``` + +""" import asyncio +import json import os import tempfile +import time +from datetime import datetime from unittest.mock import MagicMock import pandas as pd @@ -41,6 +55,8 @@ 'CodeActAgent': codeact_user_response, } +SAVE_PERF_DEBUG = os.environ.get('SAVE_PERF_DEBUG', 'false').lower() in ['true', '1'] + def get_config() -> AppConfig: assert ( @@ -140,7 +156,7 @@ def _process_instance( else: logger.info(f'Starting evaluation for instance {instance.instance_id}.') - runtime = create_runtime(config, headless_mode=False) + runtime = create_runtime(config, headless_mode=True) call_async_from_sync(runtime.connect) try: @@ -224,3 +240,155 @@ def next_command(*args, **kwargs): ) run_evaluation(instances, metadata, output_file, n_eval_workers, _process_instance) + + +@pytest.mark.skipif( + TEST_IN_CI, + reason='This test should only be run locally, not in CI.', +) +def test_stress_remote_runtime_long_output_with_soft_and_hard_timeout(): + """Stress test for the remote runtime.""" + config = get_config() + runtime = create_runtime(config, headless_mode=True) + call_async_from_sync(runtime.connect) + + _time_for_test = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') + + action = CmdRunAction( + command='sudo apt-get update && sudo apt-get install -y stress-ng' + ) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + + try: + # Run a command that generates long output multiple times + for i in range(10): + start_time = time.time() + iteration_stats = { + 'iteration': i, + 'timestamp': time.time(), + } + + # Check overall system memory usage + mem_action = CmdRunAction( + 'free -k | grep "Mem:" | awk \'{printf "Total: %8.1f MB, Used: %8.1f MB, Free: %8.1f MB, Available: %8.1f MB\\n", $2/1024, $3/1024, $4/1024, $7/1024}\'' + ) + mem_obs = runtime.run_action(mem_action) + assert mem_obs.exit_code == 0 + logger.info( + f'System memory usage (iteration {i}): {mem_obs.content.strip()}' + ) + # Parse memory values from output + mem_parts = mem_obs.content.strip().split(',') + for part in mem_parts: + key, value = part.strip().split(':') + iteration_stats[f'memory_{key.lower()}'] = float( + value.replace('MB', '').strip() + ) + + # Check top memory-consuming processes + mem_action = CmdRunAction( + 'ps aux | awk \'{printf "%8.1f MB %s\\n", $6/1024, $0}\' | sort -nr | head -n 5' + ) + mem_obs = runtime.run_action(mem_action) + assert mem_obs.exit_code == 0 + _top_processes = [i.strip() for i in mem_obs.content.strip().split('\n')] + logger.info( + f'Top 5 memory-consuming processes (iteration {i}):\n{"- " + "\n- ".join(_top_processes)}' + ) + iteration_stats['top_processes'] = _top_processes + + # Check tmux memory usage (in KB) + mem_action = CmdRunAction( + 'ps aux | awk \'{printf "%8.1f MB %s\\n", $6/1024, $0}\' | sort -nr | grep "/usr/bin/tmux" | grep -v grep | awk \'{print $1}\'' + ) + mem_obs = runtime.run_action(mem_action) + assert mem_obs.exit_code == 0 + logger.info( + f'Tmux memory usage (iteration {i}): {mem_obs.content.strip()} KB' + ) + try: + iteration_stats['tmux_memory_mb'] = float(mem_obs.content.strip()) + except (ValueError, AttributeError): + iteration_stats['tmux_memory_mb'] = None + + # Check action_execution_server mem + mem_action = CmdRunAction( + 'ps aux | awk \'{printf "%8.1f MB %s\\n", $6/1024, $0}\' | sort -nr | grep "action_execution_server" | grep "/openhands/poetry" | grep -v grep | awk \'{print $1}\'' + ) + mem_obs = runtime.run_action(mem_action) + assert mem_obs.exit_code == 0 + logger.info( + f'Action execution server memory usage (iteration {i}): {mem_obs.content.strip()} MB' + ) + try: + iteration_stats['action_server_memory_mb'] = float( + mem_obs.content.strip() + ) + except (ValueError, AttributeError): + iteration_stats['action_server_memory_mb'] = None + + # Test soft timeout + action = CmdRunAction( + 'read -p "Do you want to continue? [Y/n] " answer; if [[ $answer == "Y" ]]; then echo "Proceeding with operation..."; echo "Operation completed successfully!"; else echo "Operation cancelled."; exit 1; fi' + ) + obs = runtime.run_action(action) + assert 'Do you want to continue?' in obs.content + assert obs.exit_code == -1 # Command is still running, waiting for input + + # Send the confirmation + action = CmdRunAction('Y', is_input=True) + obs = runtime.run_action(action) + assert 'Proceeding with operation...' in obs.content + assert 'Operation completed successfully!' in obs.content + assert obs.exit_code == 0 + assert '[The command completed with exit code 0.]' in obs.metadata.suffix + + # Test hard timeout w/ long output + # Generate long output with 1000 asterisks per line + action = CmdRunAction( + f'export i={i}; for j in $(seq 1 100); do echo "Line $j - Iteration $i - $(printf \'%1000s\' | tr " " "*")"; sleep 1; done' + ) + action.set_hard_timeout(2) + obs = runtime.run_action(action) + + # Verify the output + assert obs.exit_code == -1 + assert f'Line 1 - Iteration {i}' in obs.content + + # Because hard-timeout is triggered, the terminal will in a weird state + # where it will not accept any new commands. + obs = runtime.run_action(CmdRunAction('ls')) + assert obs.exit_code == -1 + assert 'The previous command is still running' in obs.metadata.suffix + + # We need to send a Ctrl+C to reset the terminal. + obs = runtime.run_action(CmdRunAction('C-c', is_input=True)) + assert obs.exit_code == 130 + + # Now make sure the terminal is in a good state + obs = runtime.run_action(CmdRunAction('ls')) + assert obs.exit_code == 0 + + # run stress-ng stress tests for 1 minute + action = CmdRunAction(command='stress-ng --all 1 -t 1m') + action.set_hard_timeout(120) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + + duration = time.time() - start_time + iteration_stats['duration'] = duration + logger.info(f'Completed iteration {i} in {duration:.2f} seconds') + + # Save stats to JSONL file + if SAVE_PERF_DEBUG: + with open( + f'terminal_perf_analysis_result_{_time_for_test}.jsonl', 'a' + ) as f: + json.dump(iteration_stats, f) + f.write('\n') + finally: + runtime.close()