autoforge/autonomous_agent_demo.py at master · AutoForgeAI/autoforge · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
#!/usr/bin/env python3
"""
Autonomous Coding Agent Demo
============================

A minimal harness demonstrating long-running autonomous coding with Claude.
This script implements a unified orchestrator pattern that handles:
- Initialization (creating features from app_spec)
- Coding agents (implementing features)
- Testing agents (regression testing)

Example Usage:
    # Using absolute path directly
    python autonomous_agent_demo.py --project-dir C:/Projects/my-app

    # Using registered project name (looked up from registry)
    python autonomous_agent_demo.py --project-dir my-app

    # Limit iterations for testing (when running as subprocess)
    python autonomous_agent_demo.py --project-dir my-app --max-iterations 5

    # YOLO mode: rapid prototyping without testing agents
    python autonomous_agent_demo.py --project-dir my-app --yolo

    # Parallel execution with 3 concurrent coding agents
    python autonomous_agent_demo.py --project-dir my-app --concurrency 3

    # Single agent mode (orchestrator with concurrency=1, the default)
    python autonomous_agent_demo.py --project-dir my-app

    # Run as specific agent type (used by orchestrator to spawn subprocesses)
    python autonomous_agent_demo.py --project-dir my-app --agent-type initializer
    python autonomous_agent_demo.py --project-dir my-app --agent-type coding --feature-id 42
    python autonomous_agent_demo.py --project-dir my-app --agent-type testing
"""

import argparse
import asyncio
from pathlib import Path

from dotenv import load_dotenv

# Load environment variables from .env file (if it exists)
# IMPORTANT: Must be called BEFORE importing other modules that read env vars at load time
load_dotenv()

import os

from agent import run_autonomous_agent
from registry import DEFAULT_MODEL, get_effective_sdk_env, get_project_path


def parse_args() -> argparse.Namespace:
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description="Autonomous Coding Agent Demo - Unified orchestrator pattern",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Use absolute path directly (single agent, default)
  python autonomous_agent_demo.py --project-dir C:/Projects/my-app

  # Use registered project name (looked up from registry)
  python autonomous_agent_demo.py --project-dir my-app

  # Parallel execution with 3 concurrent agents
  python autonomous_agent_demo.py --project-dir my-app --concurrency 3

  # YOLO mode: rapid prototyping without testing agents
  python autonomous_agent_demo.py --project-dir my-app --yolo

  # Configure testing agent ratio (2 testing agents per coding agent)
  python autonomous_agent_demo.py --project-dir my-app --testing-ratio 2

  # Disable testing agents (similar to YOLO but with verification)
  python autonomous_agent_demo.py --project-dir my-app --testing-ratio 0

Authentication:
  Uses Claude CLI authentication. API key (ANTHROPIC_API_KEY) is recommended.
  Alternatively run 'claude login', but note Anthropic's policy may restrict subscription auth.
        """,
    )

    parser.add_argument(
        "--project-dir",
        type=str,
        required=True,
        help="Project directory path (absolute) or registered project name",
    )

    parser.add_argument(
        "--max-iterations",
        type=int,
        default=None,
        help="Maximum number of agent iterations (default: unlimited, typically 1 for subprocesses)",
    )

    parser.add_argument(
        "--model",
        type=str,
        default=DEFAULT_MODEL,
        help=f"Claude model to use (default: {DEFAULT_MODEL})",
    )

    parser.add_argument(
        "--yolo",
        action="store_true",
        default=False,
        help="Enable YOLO mode: skip testing agents for rapid prototyping",
    )

    # Unified orchestrator mode (replaces --parallel)
    parser.add_argument(
        "--concurrency", "-c",
        type=int,
        default=1,
        help="Number of concurrent coding agents (default: 1, max: 5)",
    )

    # Backward compatibility: --parallel is deprecated alias for --concurrency
    parser.add_argument(
        "--parallel", "-p",
        type=int,
        nargs="?",
        const=3,
        default=None,
        metavar="N",
        help="DEPRECATED: Use --concurrency instead. Alias for --concurrency.",
    )

    parser.add_argument(
        "--feature-id",
        type=int,
        default=None,
        help="Work on a specific feature ID only (used by orchestrator for coding agents)",
    )

    parser.add_argument(
        "--feature-ids",
        type=str,
        default=None,
        help="Comma-separated feature IDs to implement in batch (e.g., '5,8,12')",
    )

    # Agent type for subprocess mode
    parser.add_argument(
        "--agent-type",
        choices=["initializer", "coding", "testing"],
        default=None,
        help="Agent type (used by orchestrator to spawn specialized subprocesses)",
    )

    parser.add_argument(
        "--testing-feature-id",
        type=int,
        default=None,
        help="Feature ID to regression test (used by orchestrator for testing agents, legacy single mode)",
    )

    parser.add_argument(
        "--testing-feature-ids",
        type=str,
        default=None,
        help="Comma-separated feature IDs to regression test in batch (e.g., '5,12,18')",
    )

    # Testing agent configuration
    parser.add_argument(
        "--testing-ratio",
        type=int,
        default=1,
        help="Testing agents per coding agent (0-3, default: 1). Set to 0 to disable testing agents.",
    )

    parser.add_argument(
        "--testing-batch-size",
        type=int,
        default=3,
        help="Number of features per testing batch (1-15, default: 3)",
    )

    parser.add_argument(
        "--batch-size",
        type=int,
        default=3,
        help="Max features per coding agent batch (1-15, default: 3)",
    )

    parser.add_argument(
        "--auto-improve",
        action="store_true",
        default=False,
        help=(
            "Run in auto-improve mode: a single agent session that analyses "
            "the codebase, creates one improvement feature, implements it, "
            "verifies with lint/typecheck/build, commits, and exits."
        ),
    )

    return parser.parse_args()


def main() -> None:
    """Main entry point."""
    print("[ENTRY] autonomous_agent_demo.py starting...", flush=True)
    args = parse_args()

    # Note: Authentication is handled by start.bat/start.sh before this script runs.
    # The Claude SDK auto-detects credentials from ~/.claude/.credentials.json

    # Apply UI-configured provider settings to this process's environment.
    # This ensures CLI-launched agents respect Settings UI provider config (GLM, Ollama, etc.).
    # Uses setdefault so explicit env vars / .env file take precedence.
    sdk_overrides = get_effective_sdk_env()
    for key, value in sdk_overrides.items():
        if value:  # Only set non-empty values (empty values are used to clear conflicts)
            os.environ.setdefault(key, value)

    # Handle deprecated --parallel flag
    if args.parallel is not None:
        print("WARNING: --parallel is deprecated. Use --concurrency instead.", flush=True)
        args.concurrency = args.parallel

    # Resolve project directory:
    # 1. If absolute path, use as-is
    # 2. Otherwise, look up from registry by name
    project_dir_input = args.project_dir
    project_dir = Path(project_dir_input)

    if project_dir.is_absolute():
        # Absolute path provided - use directly
        if not project_dir.exists():
            print(f"Error: Project directory does not exist: {project_dir}")
            return
    else:
        # Treat as a project name - look up from registry
        registered_path = get_project_path(project_dir_input)
        if registered_path:
            project_dir = registered_path
        else:
            print(f"Error: Project '{project_dir_input}' not found in registry")
            print("Use an absolute path or register the project first.")
            return

    # Migrate project layout to .autoforge/ if needed (idempotent, safe)
    from autoforge_paths import migrate_project_layout
    migrated = migrate_project_layout(project_dir)
    if migrated:
        print(f"Migrated project files to .autoforge/: {', '.join(migrated)}", flush=True)

    # Migrate project to current AutoForge version (idempotent, safe)
    from prompts import migrate_project_to_current
    version_migrated = migrate_project_to_current(project_dir)
    if version_migrated:
        print(f"Upgraded project: {', '.join(version_migrated)}", flush=True)

    # Parse batch testing feature IDs (comma-separated string -> list[int])
    testing_feature_ids: list[int] | None = None
    if args.testing_feature_ids:
        try:
            testing_feature_ids = [int(x.strip()) for x in args.testing_feature_ids.split(",") if x.strip()]
        except ValueError:
            print(f"Error: --testing-feature-ids must be comma-separated integers, got: {args.testing_feature_ids}")
            return

    # Parse batch coding feature IDs (comma-separated string -> list[int])
    coding_feature_ids: list[int] | None = None
    if args.feature_ids:
        try:
            coding_feature_ids = [int(x.strip()) for x in args.feature_ids.split(",") if x.strip()]
        except ValueError:
            print(f"Error: --feature-ids must be comma-separated integers, got: {args.feature_ids}")
            return

    try:
        if args.auto_improve:
            # Auto-improve mode: single agent session, one improvement per run.
            # Bypasses the parallel orchestrator entirely — auto-improve is
            # always single-agent, single-feature, and exits after one commit.
            print("[AUTO-IMPROVE] Starting single-session improvement run...", flush=True)
            asyncio.run(
                run_autonomous_agent(
                    project_dir=project_dir,
                    model=args.model,
                    max_iterations=1,
                    yolo_mode=args.yolo,
                    agent_type="coding",
                    auto_improve=True,
                )
            )
        elif args.agent_type:
            # Subprocess mode - spawned by orchestrator for a specific role
            asyncio.run(
                run_autonomous_agent(
                    project_dir=project_dir,
                    model=args.model,
                    max_iterations=args.max_iterations or 1,
                    yolo_mode=args.yolo,
                    feature_id=args.feature_id,
                    feature_ids=coding_feature_ids,
                    agent_type=args.agent_type,
                    testing_feature_id=args.testing_feature_id,
                    testing_feature_ids=testing_feature_ids,
                )
            )
        else:
            # Entry point mode - always use unified orchestrator
            # Clean up stale temp files before starting (prevents temp folder bloat)
            from temp_cleanup import cleanup_stale_temp
            cleanup_stats = cleanup_stale_temp()
            if cleanup_stats["dirs_deleted"] > 0 or cleanup_stats["files_deleted"] > 0:
                mb_freed = cleanup_stats["bytes_freed"] / (1024 * 1024)
                print(
                    f"[CLEANUP] Removed {cleanup_stats['dirs_deleted']} dirs, "
                    f"{cleanup_stats['files_deleted']} files ({mb_freed:.1f} MB freed)",
                    flush=True,
                )

            from parallel_orchestrator import run_parallel_orchestrator

            # Clamp concurrency to valid range (1-5)
            concurrency = max(1, min(args.concurrency, 5))
            if concurrency != args.concurrency:
                print(f"Clamping concurrency to valid range: {concurrency}", flush=True)

            asyncio.run(
                run_parallel_orchestrator(
                    project_dir=project_dir,
                    max_concurrency=concurrency,
                    model=args.model,
                    yolo_mode=args.yolo,
                    testing_agent_ratio=args.testing_ratio,
                    testing_batch_size=args.testing_batch_size,
                    batch_size=args.batch_size,
                )
            )
    except KeyboardInterrupt:
        print("\n\nInterrupted by user")
        print("To resume, run the same command again")
    except Exception as e:
        print(f"\nFatal error: {e}")
        raise


if __name__ == "__main__":
    main()