Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 83 additions & 5 deletions apps/backend/core/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,21 +743,99 @@ def _try_smart_merge_inner(
"preview": preview,
}

# All conflicts can be auto-merged or no conflicts
print(muted(" All changes compatible, proceeding with merge..."))
# All conflicts can be auto-merged or no conflicts - execute git merge
spec_branch = f"auto-claude/{spec_name}"
print(muted(f" All changes compatible, merging {spec_branch}..."))

# Always use --no-commit so we can inspect staged files with git diff --cached.
# If the caller did NOT request no_commit, we commit explicitly after inspection.
merge_cmd = ["merge", "--no-commit", "--no-ff", spec_branch]

# Execute git merge
merge_result = run_git(
merge_cmd,
cwd=project_dir,
)

# 'Already up to date' is returned with exit code 0, but check both
# stdout locations defensively in case git behaviour varies.
stdout_lower = merge_result.stdout.lower() if merge_result.stdout else ""
if "already up to date" in stdout_lower:
# Already up to date - treat as success
if progress_callback is not None:
progress_callback(
MergeProgressStage.COMPLETE,
100,
"Merge complete (already up to date)",
)
return {
"success": True,
"resolved_files": [],
"stats": {
"files_merged": 0,
"conflicts_resolved": 0,
"ai_assisted": 0,
"auto_merged": 0,
"git_merge": True,
},
}

if merge_result.returncode != 0:
# Merge failed - abort to restore clean state and return failure
abort_result = run_git(["merge", "--abort"], cwd=project_dir)
if abort_result.returncode != 0:
debug_error(
MODULE,
"Failed to abort merge - repo may be in inconsistent state",
stderr=abort_result.stderr,
)
return None # Trigger fallback to avoid operating on inconsistent state
return {
"success": False,
"error": f"Git merge failed: {merge_result.stderr}",
"conflicts": [],
}
Comment on lines +783 to +797
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

rg -n -B 5 -A 15 'def merge_existing_build' apps/backend/core/workspace.py

Repository: AndyMik90/Auto-Claude

Length of output: 731


🏁 Script executed:

rg -n -B 2 -A 25 'smart_result is not None' apps/backend/core/workspace.py

Repository: AndyMik90/Auto-Claude

Length of output: 1766


🏁 Script executed:

rg -n -B 2 -A 45 'if smart_result is not None:' apps/backend/core/workspace.py | head -100

Repository: AndyMik90/Auto-Claude

Length of output: 2914


🏁 Script executed:

rg -n -B 2 -A 80 'if smart_result is not None:' apps/backend/core/workspace.py | tail -80

Repository: AndyMik90/Auto-Claude

Length of output: 4381


🏁 Script executed:

rg -n -B 5 -A 30 'def _try_smart_merge' apps/backend/core/workspace.py

Repository: AndyMik90/Auto-Claude

Length of output: 2503


🏁 Script executed:

rg -n -B 3 -A 3 'return None' apps/backend/core/workspace.py | grep -A 6 -B 6 '792'

Repository: AndyMik90/Auto-Claude

Length of output: 650


🏁 Script executed:

rg -n -B 10 'return None.*Trigger fallback' apps/backend/core/workspace.py

Repository: AndyMik90/Auto-Claude

Length of output: 1290


🏁 Script executed:

rg -n -B 2 -A 20 'Fall back to standard git merge' apps/backend/core/workspace.py

Repository: AndyMik90/Auto-Claude

Length of output: 1078


Return structured error instead of None when merge abort fails.

When git merge --abort fails (lines 786–792), the function returns None to "trigger fallback to avoid operating on inconsistent state." However, this fallback at line 340 calls manager.merge_worktree() on the already-inconsistent repo, contradicting the stated intent. The second merge attempt will likely fail identically or worse.

Instead, return a structured error dict with an explicit "repo may be in an inconsistent state" message (matching the pattern at lines 793–797). This allows the caller to handle the error gracefully within the if smart_result is not None: block (line 265) rather than triggering a problematic retry.

The same issue exists at line 715.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@apps/backend/core/workspace.py` around lines 783 - 797, The code currently
returns None when run_git(["merge", "--abort"]) fails (abort_result.returncode
!= 0) which causes the caller to retry (manager.merge_worktree()) on an
inconsistent repo; instead, change both places that return None (the abort
failure handling around abort_result and the similar case at the earlier
occurrence) to return a structured error dict matching the existing pattern
(e.g., {"success": False, "error": "Git merge failed: repo may be in an
inconsistent state", "conflicts": []}) so callers can handle the failure instead
of retrying; update the blocks that reference merge_result, abort_result,
run_git, and debug_error (and where manager.merge_worktree() may be retried) to
return that error dict consistently.


# Get list of files that were actually merged (staged but not yet committed)
diff_result = run_git(
["diff", "--cached", "--name-only"],
cwd=project_dir,
)
merged_files = [
Comment on lines +801 to +804

This comment was marked as outdated.

f.strip()
for f in diff_result.stdout.splitlines()
if f.strip() and not _is_auto_claude_file(f.strip())
]

# If caller wants a commit, create it now
if not no_commit:
commit_result = run_git(
["commit", "--no-edit"],
cwd=project_dir,
)
if commit_result.returncode != 0:
debug_warning(
MODULE,
"Failed to commit merge",
stderr=commit_result.stderr,
)
Comment on lines +811 to +821
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Commit failure is silently swallowed — callers will believe the merge succeeded.

When no_commit=False, a failed git commit --no-edit logs a debug warning but execution continues, returning success: True with git_merge: True. The caller in merge_existing_build (line 278) treats git_merge_used as a completed operation and prints a success message. The user will see "merge succeeded" while the repo is left with a pending uncommitted merge.

At minimum, surface the failure in the returned dict (e.g., set a commit_failed flag or downgrade success), or propagate the error so the caller can inform the user.

Proposed fix — propagate commit failure
         # If caller wants a commit, create it now
         if not no_commit:
             commit_result = run_git(
                 ["commit", "--no-edit"],
                 cwd=project_dir,
             )
             if commit_result.returncode != 0:
-                debug_warning(
+                debug_error(
                     MODULE,
                     "Failed to commit merge",
                     stderr=commit_result.stderr,
                 )
+                # Abort the merge to leave the repo in a clean state
+                run_git(["merge", "--abort"], cwd=project_dir)
+                return {
+                    "success": False,
+                    "error": f"Merge staged successfully but commit failed: {commit_result.stderr}",
+                    "conflicts": [],
+                }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@apps/backend/core/workspace.py` around lines 811 - 821, The commit failure is
currently only logged via debug_warning in the block using run_git (when
no_commit is False) inside the merge logic, but the function still returns
success=True and git_merge=True; update the code in that commit block to
propagate the failure: when commit_result.returncode != 0, either set a returned
flag like commit_failed=True (and include stderr) or set success=False (or raise
an exception) so callers such as merge_existing_build can detect the failed
commit; ensure the returned dict includes the new commit_failed/stderr fields or
that an exception is raised so the caller will not report the merge as
succeeded.


if progress_callback is not None:
progress_callback(
MergeProgressStage.COMPLETE,
100,
f"Analysis complete ({files_to_merge} files compatible)",
f"Merge complete ({len(merged_files)} files merged)",
)

return {
"success": True,
"resolved_files": merged_files,
"stats": {
"files_merged": files_to_merge,
"auto_resolved": auto_mergeable,
"files_merged": len(merged_files),
"conflicts_resolved": 0,
"ai_assisted": 0,
"auto_merged": len(merged_files),
"git_merge": True,
},
Comment on lines +836 to 839
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: A failed git commit is not propagated as an error, causing the function to return a success status while files remain staged but uncommitted.
Severity: MEDIUM

Suggested Fix

After the run_git call for the commit, check if commit_result.returncode is not equal to 0. If the commit has failed, the function should return a dictionary indicating failure, for example {"success": False, "error": "Failed to commit changes"}, instead of continuing and returning success.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: apps/backend/core/workspace.py#L836-L839

Potential issue: When a git commit operation fails within the
`apply_changes_and_resolve_conflicts` function, the code only logs a warning using
`debug_warning` and does not propagate the failure. The function proceeds to return a
success dictionary, such as `{"success": True, ...}`. This misleads the caller into
believing the commit was successful, when in reality the files have only been staged and
not committed, leading to a silent failure and a potentially inconsistent repository
state.

}

Expand Down
125 changes: 87 additions & 38 deletions tests/test_cli_workspace_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,12 @@ def test_fallback_debug_functions_no_error(self):
workspace_commands.debug_section("test", "message")

def test_fallback_is_debug_enabled_returns_false(self):
"""Fallback is_debug_enabled returns False."""
result = workspace_commands.is_debug_enabled()
assert result is False
"""Fallback is_debug_enabled returns False when debug is not enabled."""
# Ensure DEBUG env var is not set so is_debug_enabled() returns False
# (both fallback and real implementation return False when DEBUG is unset)
with patch.dict("os.environ", {"DEBUG": ""}, clear=False):
result = workspace_commands.is_debug_enabled()
assert result is False


# =============================================================================
Expand Down Expand Up @@ -652,6 +655,15 @@ def test_fallback_debug_functions_with_kwargs(
if 'cli.workspace_commands' in sys.modules:
del sys.modules['cli.workspace_commands']

# Ensure SDK mocks are in place before reimport (conftest cleanup may have removed them)
from unittest.mock import MagicMock as _MagicMock
sdk_modules_to_mock = ['claude_agent_sdk', 'claude_agent_sdk.types',
'claude_code_sdk', 'claude_code_sdk.types', 'dotenv']
saved_sdk_modules = {k: sys.modules.get(k) for k in sdk_modules_to_mock}
for mod_name in sdk_modules_to_mock:
if mod_name not in sys.modules:
sys.modules[mod_name] = _MagicMock()

try:
import cli.workspace_commands as wc

Expand All @@ -663,14 +675,21 @@ def test_fallback_debug_functions_with_kwargs(
wc.debug_error("test", "error", code=500)
wc.debug_section("test", "section")

# Verify is_debug_enabled works
assert wc.is_debug_enabled() is False
# Verify is_debug_enabled works (clear DEBUG env var for deterministic result)
with patch.dict("os.environ", {"DEBUG": ""}, clear=False):
assert wc.is_debug_enabled() is False

finally:
if debug_module:
sys.modules['debug'] = debug_module
if original_module:
sys.modules['cli.workspace_commands'] = original_module
# Restore SDK modules to their pre-test state
for mod_name, original_val in saved_sdk_modules.items():
if original_val is None:
sys.modules.pop(mod_name, None)
else:
sys.modules[mod_name] = original_val

@patch("subprocess.run")
def test_get_changed_files_first_exception_tries_fallback(
Expand Down Expand Up @@ -833,37 +852,52 @@ def test_fallback_debug_functions_when_debug_unavailable(self):
# Get the apps/backend directory
backend_dir = Path(__file__).parent.parent / "apps" / "backend"

# Run in subprocess with debug module hidden
# This triggers the except ImportError block at lines 335-363
# Run in subprocess with all required deps mocked (not installed in test venv)
# Verifies that workspace_commands debug functions work in any sandboxed environment
code = """
import sys
import os
from unittest.mock import MagicMock
import types

os.chdir(sys.argv[1])
sys.path.insert(0, sys.argv[1])

# Block debug module import
class DebugBlocker:
def find_module(self, fullname, path=None):
if fullname == 'debug' or fullname.startswith('debug.'):
return self
return None
def load_module(self, fullname):
raise ImportError(f"Blocked import of {fullname}")

sys.meta_path.insert(0, DebugBlocker())

# Now import - should use fallback functions (lines 335-363)
# Clear DEBUG env var so is_debug_enabled() returns False
os.environ.pop('DEBUG', None)

# Pre-mock core.debug so all modules that import from it get a no-op version
fake_core_debug = MagicMock()
fake_core_debug.is_debug_enabled = MagicMock(return_value=False)
sys.modules['core.debug'] = fake_core_debug

# Pre-mock 'debug' wrapper module (used by workspace_commands line 28 and 326)
fake_debug = types.ModuleType('debug')
for attr in ['debug', 'debug_detailed', 'debug_verbose', 'debug_success',
'debug_error', 'debug_section', 'debug_warning', 'debug_info',
'is_debug_enabled', 'get_debug_level', 'debug_env_status']:
setattr(fake_debug, attr, lambda *a, **kw: None)
fake_debug.is_debug_enabled = lambda: False
sys.modules['debug'] = fake_debug

# Pre-mock SDK and other modules not installed in the test venv
for mod_name in ['claude_agent_sdk', 'claude_agent_sdk.types',
'claude_code_sdk', 'claude_code_sdk.types', 'dotenv',
'graphiti_providers', 'graphiti_config']:
sys.modules[mod_name] = MagicMock()

# Now import - should work with all deps mocked
from cli.workspace_commands import debug, debug_verbose, debug_success, debug_error, debug_section, is_debug_enabled

# Verify fallback functions work without error
# Verify debug functions work without error
debug('test', 'message')
debug_verbose('test', 'verbose')
debug_success('test', 'success')
debug_error('test', 'error')
debug_section('test', 'section')
result = is_debug_enabled()

# Fallback is_debug_enabled returns False (line 363)
# is_debug_enabled returns False (DEBUG env var cleared above)
assert result == False, f"Expected False, got {result}"
print('OK')
"""
Expand All @@ -876,7 +910,7 @@ def load_module(self, fullname):
timeout=10,
)

# Verify subprocess succeeded - this validates fallback functions work
# Verify subprocess succeeded - this validates debug functions work in a sandboxed env
assert result.returncode == 0, f"Subprocess failed: stderr={result.stderr}"
assert "OK" in result.stdout, f"Expected 'OK' in output, got: {result.stdout}"

Expand Down Expand Up @@ -1232,41 +1266,56 @@ def test_fallback_functions_with_debug_blocked(self):

backend_dir = Path(__file__).parent.parent / "apps" / "backend"

# Run in subprocess with debug module completely blocked
# This is the same approach as test_fallback_debug_functions_when_debug_unavailable
# Run in subprocess with all required deps mocked (not installed in test venv)
# Verifies that workspace_commands debug functions work in any sandboxed environment
code = """
import sys
import os
from unittest.mock import MagicMock
import types

os.chdir(sys.argv[1])
sys.path.insert(0, sys.argv[1])

# Block debug module import completely
class DebugBlocker:
def find_module(self, fullname, path=None):
if fullname == 'debug' or fullname.startswith('debug.'):
return self
return None
def load_module(self, fullname):
raise ImportError(f"Blocked import of {fullname}")

sys.meta_path.insert(0, DebugBlocker())

# Now import workspace_commands - should trigger fallback functions (lines 335-363)
# Clear DEBUG env var so is_debug_enabled() returns False
os.environ.pop('DEBUG', None)

# Pre-mock core.debug so all modules that import from it get a no-op version
fake_core_debug = MagicMock()
fake_core_debug.is_debug_enabled = MagicMock(return_value=False)
sys.modules['core.debug'] = fake_core_debug

# Pre-mock 'debug' wrapper module (used by workspace_commands line 28 and 326)
fake_debug = types.ModuleType('debug')
for attr in ['debug', 'debug_detailed', 'debug_verbose', 'debug_success',
'debug_error', 'debug_section', 'debug_warning', 'debug_info',
'is_debug_enabled', 'get_debug_level', 'debug_env_status']:
setattr(fake_debug, attr, lambda *a, **kw: None)
fake_debug.is_debug_enabled = lambda: False
sys.modules['debug'] = fake_debug

# Pre-mock SDK and other modules not installed in the test venv
for mod_name in ['claude_agent_sdk', 'claude_agent_sdk.types',
'claude_code_sdk', 'claude_code_sdk.types', 'dotenv',
'graphiti_providers', 'graphiti_config']:
sys.modules[mod_name] = MagicMock()

# Now import workspace_commands - all deps are mocked
from cli.workspace_commands import (
debug, debug_detailed, debug_verbose,
debug_success, debug_error, debug_section,
is_debug_enabled
)

# Verify fallback functions work without error
# Verify debug functions work without error
debug('MODULE', 'test message')
debug_detailed('MODULE', 'detailed')
debug_verbose('MODULE', 'verbose')
debug_success('MODULE', 'success')
debug_error('MODULE', 'error')
debug_section('MODULE', 'section')

# Test is_debug_enabled returns False (line 363)
# Test is_debug_enabled returns False (DEBUG env var cleared above)
result = is_debug_enabled()
assert result == False, f"Expected False, got {result}"
print('OK')
Expand Down
Loading
Loading