@@ -556,29 +556,41 @@ def _truncate(self, s: str, max_len: int = 4000) -> str:
556556
557557 def _build_user_message (self , task : SweTask ) -> str :
558558 """Build the initial user message for the agent."""
559- # Get language-specific test commands
560- build_cmds , test_cmds = self ._test_commands_for_language (task .language )
561-
562559 return f"""Repository: { task .repo }
563560Language: { task .language }
564561PR description: { self ._truncate (task .prompt , 1000 )}
565562
566- Suggested build: { " && " .join (build_cmds )}
567- Suggested test: { " && " .join (test_cmds )}
568-
569563Diff (truncated):
570564```
571565{ self ._truncate (task .patch , 4000 )}
572566```
573567
574- The repo is cloned at /repo. Explore it, write behavioral tests, then submit.
568+ == WORKFLOW (FOLLOW EXACTLY) ==
569+
570+ Step 1: Install dependencies
571+ - Run: apt-get update && apt-get install -y python3 python3-pip git
572+ - Check pyproject.toml/setup.py for install commands
573+ - Run install commands via `shell`
574+
575+ Step 2: Explore the changed code
576+ - Use `read_file` to read the files mentioned in the diff
577+ - Use `list_dir` to understand project structure
578+
579+ Step 3: WRITE TEST FILES (DO THIS NOW!)
580+ - Use `write_file` to create test_swe_<feature>.py
581+ - Write behavioral tests that EXERCISE the changed functionality
582+
583+ Step 4: Run tests to validate
584+ - Use `shell` to run: pytest -c /dev/null test_swe_<feature>.py -v
585+
586+ Step 5: SUBMIT (MUST CALL THIS!)
587+ - Call `submit_tests` with your test files and install commands
575588
576- REMEMBER:
577- - Your fail_to_pass tests will be verified against the PR patch.
578- They MUST pass once the patch is applied, or they will be rejected.
579- - Do NOT read source files and assert on their content. Test runtime behavior only.
580- - Include pass_to_pass tests from existing test suites adjacent to the changed code.
581- - Test edge cases and use DIFFERENT inputs than those in the diff (anti-hardcoding)."""
589+ == CRITICAL REMINDERS ==
590+ - Do NOT just explore forever - WRITE TEST FILES using `write_file`
591+ - Do NOT end without calling `submit_tests`
592+ - Tests MUST be behavioral (import, call functions, check values)
593+ - Do NOT read source and assert on file content."""
582594
583595 def _test_commands_for_language (self , language : str ) -> tuple [list [str ], list [str ]]:
584596 """Get suggested build and test commands for a language.
0 commit comments