Skip to content

Commit b9e1c52

Browse files
feat: Add --test-mode for resilient bootstrap with failure handling
Add --test-mode flag that enables resilient bootstrapping by marking failed packages as pre-built and continuing until all packages are processed. Uses optimal n+1 retry logic with comprehensive failure reporting including exception types, messages, and per-package context. Benefits: - Discover all build failures in one run rather than stopping on first failure - Support mixed source/binary dependency workflows - Better error context for debugging failed builds - Cleaner API boundaries between configuration and runtime context Fixes #713 Co-developed-with: Cursor IDE with Claude 4.0 Sonnet Signed-off-by: Lalatendu Mohanty <[email protected]>
1 parent a7ee0f8 commit b9e1c52

File tree

10 files changed

+530
-209
lines changed

10 files changed

+530
-209
lines changed

src/fromager/bootstrapper.py

Lines changed: 271 additions & 149 deletions
Large diffs are not rendered by default.

src/fromager/commands/bootstrap.py

Lines changed: 92 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,13 @@ def _get_requirements_from_args(
9797
default=False,
9898
help="Skip generating constraints.txt file to allow building collections with conflicting versions",
9999
)
100+
@click.option(
101+
"--test-mode",
102+
"test_mode",
103+
is_flag=True,
104+
default=False,
105+
help="Test mode: mark failed packages as pre-built and continue, report failures at end",
106+
)
100107
@click.argument("toplevel", nargs=-1)
101108
@click.pass_obj
102109
def bootstrap(
@@ -106,6 +113,7 @@ def bootstrap(
106113
cache_wheel_server_url: str | None,
107114
sdist_only: bool,
108115
skip_constraints: bool,
116+
test_mode: bool,
109117
toplevel: list[str],
110118
) -> None:
111119
"""Compute and build the dependencies of a set of requirements recursively
@@ -116,6 +124,11 @@ def bootstrap(
116124
"""
117125
logger.info(f"cache wheel server url: {cache_wheel_server_url}")
118126

127+
if test_mode:
128+
logger.info(
129+
"test mode enabled: will mark failed packages as pre-built and continue"
130+
)
131+
119132
to_build = _get_requirements_from_args(toplevel, requirements_files)
120133
if not to_build:
121134
raise RuntimeError(
@@ -148,6 +161,7 @@ def bootstrap(
148161
prev_graph,
149162
cache_wheel_server_url,
150163
sdist_only=sdist_only,
164+
test_mode=test_mode,
151165
)
152166

153167
# we need to resolve all the top level dependencies before we start bootstrapping.
@@ -183,9 +197,29 @@ def bootstrap(
183197

184198
for req in to_build:
185199
token = requirement_ctxvar.set(req)
186-
bt.bootstrap(req, requirements_file.RequirementType.TOP_LEVEL)
187-
progressbar.update()
188-
requirement_ctxvar.reset(token)
200+
try:
201+
bt.bootstrap(req, requirements_file.RequirementType.TOP_LEVEL)
202+
progressbar.update()
203+
if test_mode:
204+
logger.info("Successfully processed: %s", req)
205+
except Exception as err:
206+
if test_mode:
207+
# Test mode: record error, log, and continue processing
208+
logger.error(
209+
"test mode: failed to process %s: %s",
210+
req,
211+
err,
212+
exc_info=True, # Full traceback to debug log
213+
)
214+
bt.failed_builds.append(
215+
bootstrapper.BuildResult.failure(req=req, exception=err)
216+
)
217+
progressbar.update() # Update progress even on failure
218+
else:
219+
# Normal mode: re-raise the exception (fail-fast)
220+
raise
221+
finally:
222+
requirement_ctxvar.reset(token)
189223

190224
constraints_filename = wkctx.work_dir / "constraints.txt"
191225
if skip_constraints:
@@ -200,7 +234,57 @@ def bootstrap(
200234

201235
logger.debug("match_py_req LRU cache: %r", resolver.match_py_req.cache_info())
202236

203-
metrics.summarize(wkctx, "Bootstrapping")
237+
# Test mode summary reporting
238+
if test_mode:
239+
if bt.failed_builds:
240+
# Use repository's logging pattern for error reporting
241+
logger.error("test mode: the following packages failed to build:")
242+
for failure in sorted(
243+
bt.failed_builds, key=lambda f: str(f.req) if f.req else ""
244+
):
245+
if failure.req and failure.resolved_version:
246+
logger.error(
247+
" - %s==%s",
248+
failure.req,
249+
failure.resolved_version,
250+
)
251+
if failure.exception_type:
252+
logger.error(
253+
" Error: %s: %s",
254+
failure.exception_type,
255+
failure.exception_message,
256+
)
257+
else:
258+
logger.error(" - unknown package (missing context)")
259+
260+
# Categorize failures by exception type for better analysis
261+
failure_types: dict[str, list[str]] = {}
262+
for failure in bt.failed_builds:
263+
exc_type = failure.exception_type or "Unknown"
264+
pkg_name = (
265+
f"{failure.req}=={failure.resolved_version}"
266+
if failure.req and failure.resolved_version
267+
else "unknown"
268+
)
269+
failure_types.setdefault(exc_type, []).append(pkg_name)
270+
271+
logger.error("")
272+
logger.error("test mode: failure breakdown by type:")
273+
for exc_type, packages in sorted(failure_types.items()):
274+
logger.error(" %s: %d package(s)", exc_type, len(packages))
275+
276+
logger.error(
277+
"test mode: %d package(s) failed to build", len(bt.failed_builds)
278+
)
279+
# Follow repository's error exit pattern like __main__.py and lint.py
280+
raise SystemExit(
281+
f"Test mode completed with {len(bt.failed_builds)} build failures"
282+
)
283+
else:
284+
logger.info("test mode: all packages built successfully")
285+
metrics.summarize(wkctx, "Test Mode Bootstrapping")
286+
else:
287+
metrics.summarize(wkctx, "Bootstrapping")
204288

205289

206290
def write_constraints_file(
@@ -480,6 +564,9 @@ def bootstrap_parallel(
480564
remaining wheels in parallel. The bootstrap step downloads sdists
481565
and builds build-time dependency in serial. The build-parallel step
482566
builds the remaining wheels in parallel.
567+
568+
Note: --test-mode is not supported with bootstrap-parallel. Use
569+
'bootstrap --test-mode' for comprehensive failure testing.
483570
"""
484571
# Do not remove build environments in bootstrap phase to speed up the
485572
# parallel build phase.
@@ -495,6 +582,7 @@ def bootstrap_parallel(
495582
cache_wheel_server_url=cache_wheel_server_url,
496583
sdist_only=True,
497584
skip_constraints=skip_constraints,
585+
test_mode=False,
498586
toplevel=toplevel,
499587
)
500588

src/fromager/commands/graph.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def _find_customized_nodes(
125125
"""Filter nodes to find only those with customizations."""
126126
customized_nodes: list[DependencyNode] = []
127127
for node in nodes:
128-
pbi = wkctx.settings.package_build_info(node.canonicalized_name)
128+
pbi = wkctx.package_build_info(node.canonicalized_name)
129129
if node.canonicalized_name != ROOT and pbi.has_customizations:
130130
customized_nodes.append(node)
131131
return customized_nodes
@@ -161,7 +161,7 @@ def _find_customized_dependencies_for_node(
161161
continue
162162

163163
child = edge.destination_node
164-
child_pbi = wkctx.settings.package_build_info(child.canonicalized_name)
164+
child_pbi = wkctx.package_build_info(child.canonicalized_name)
165165
new_path = path + [current_node.key]
166166

167167
# Use the first requirement we encounter in the path
@@ -277,7 +277,7 @@ def get_node_id(node: str) -> str:
277277
if not name:
278278
node_type.append("toplevel")
279279
else:
280-
pbi = wkctx.settings.package_build_info(name)
280+
pbi = wkctx.package_build_info(name)
281281
all_patches: PatchMap = pbi.get_all_patches()
282282

283283
if node.pre_built:

src/fromager/commands/list_overrides.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def list_overrides(
6565
export_data = []
6666

6767
for name in overridden_packages:
68-
pbi = wkctx.settings.package_build_info(name)
68+
pbi = wkctx.package_build_info(name)
6969
ps = wkctx.settings.package_setting(name)
7070

7171
plugin_hooks: list[str] = []

src/fromager/context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def package_build_info(
164164
name = package.name
165165
else:
166166
name = package
167-
return self.settings.package_build_info(name)
167+
return self.settings.package_build_info(name, self)
168168

169169
def setup(self) -> None:
170170
# The work dir must already exist, so don't try to create it.

src/fromager/packagesettings.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -624,12 +624,26 @@ def get_available_memory_gib() -> float:
624624

625625

626626
class PackageBuildInfo:
627-
"""Package build information
627+
"""Variant-aware package build configuration and metadata.
628628
629-
Public API for PackageSettings with i
629+
Primary public API for accessing package-specific settings during the build
630+
process. Combines static configuration from YAML files with runtime context
631+
to provide variant-specific (cpu, cuda, etc.) build information.
632+
633+
Key responsibilities:
634+
- Determine if package should be built or use pre-built wheels
635+
- Provide patches to apply for specific versions
636+
- Configure build environment (parallel jobs, environment variables)
637+
- Manage package customizations (plugins, custom download URLs)
638+
- Calculate build tags from changelogs for wheel versioning
639+
640+
Instances are cached per package and accessed via ``WorkContext.package_build_info()``.
630641
"""
631642

632-
def __init__(self, settings: Settings, ps: PackageSettings) -> None:
643+
def __init__(
644+
self, settings: Settings, ps: PackageSettings, ctx: context.WorkContext
645+
) -> None:
646+
self._ctx = ctx
633647
self._variant = typing.cast(Variant, settings.variant)
634648
self._patches_dir = settings.patches_dir
635649
self._variant_changelog = settings.variant_changelog()
@@ -744,7 +758,7 @@ def has_customizations(self) -> bool:
744758

745759
@property
746760
def pre_built(self) -> bool:
747-
"""Does the variant use pre-build wheels?"""
761+
"""Does the variant use pre-built wheels?"""
748762
vi = self._ps.variants.get(self.variant)
749763
if vi is not None:
750764
return vi.pre_built
@@ -1146,23 +1160,27 @@ def package_setting(self, package: str | Package) -> PackageSettings:
11461160
self._package_settings[package] = ps
11471161
return ps
11481162

1149-
def package_build_info(self, package: str | Package) -> PackageBuildInfo:
1163+
def package_build_info(
1164+
self, package: str | Package, ctx: context.WorkContext
1165+
) -> PackageBuildInfo:
11501166
"""Get (cached) PackageBuildInfo for package and current variant"""
11511167
package = Package(canonicalize_name(package, validate=True))
11521168
pbi = self._pbi_cache.get(package)
11531169
if pbi is None:
11541170
ps = self.package_setting(package)
1155-
pbi = PackageBuildInfo(self, ps)
1171+
pbi = PackageBuildInfo(self, ps, ctx)
11561172
self._pbi_cache[package] = pbi
11571173
return pbi
11581174

11591175
def list_pre_built(self) -> set[Package]:
1160-
"""List packages marked as pre-built"""
1161-
return set(
1162-
name
1163-
for name in self._package_settings
1164-
if self.package_build_info(name).pre_built
1165-
)
1176+
"""List packages marked as pre-built by configuration"""
1177+
result = set()
1178+
for name in self._package_settings:
1179+
ps = self._package_settings[name]
1180+
vi = ps.variants.get(self._variant)
1181+
if vi is not None and vi.pre_built:
1182+
result.add(name)
1183+
return result
11661184

11671185
def list_overrides(self) -> set[Package]:
11681186
"""List packages with overrides

tests/test_bootstrap_test_mode.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
"""Tests for bootstrap --test-mode functionality.
2+
3+
Tests for test mode failure tracking and BuildResult.
4+
"""
5+
6+
from unittest import mock
7+
8+
import pytest
9+
from packaging.requirements import Requirement
10+
from packaging.version import Version
11+
12+
from fromager import bootstrapper
13+
from fromager.context import WorkContext
14+
15+
16+
class MockBuildError(Exception):
17+
"""Mock exception for simulating build failures."""
18+
19+
pass
20+
21+
22+
def test_test_mode_tracks_complete_failures(tmp_context: WorkContext) -> None:
23+
"""Test that test mode tracks failures with full context when both build and fallback fail."""
24+
bt = bootstrapper.Bootstrapper(tmp_context, test_mode=True)
25+
26+
# Mock to always fail
27+
def mock_build_wheel_and_sdist(req, version, pbi, build_sdist_only):
28+
raise MockBuildError(f"Build failed for {req.name}")
29+
30+
with mock.patch.object(
31+
bt, "_build_wheel_and_sdist", side_effect=mock_build_wheel_and_sdist
32+
):
33+
req = Requirement("broken-package==1.0")
34+
version = Version("1.0")
35+
pbi = tmp_context.package_build_info(req)
36+
37+
result = bt._build_package(req, version, pbi, build_sdist_only=False)
38+
39+
# Verify complete failure is tracked with full context
40+
assert result.failed
41+
assert result.req == req
42+
assert result.resolved_version == version
43+
assert result.exception_type == "MockBuildError"
44+
assert result.exception_message is not None
45+
assert "Build failed for broken-package" in result.exception_message
46+
47+
# Verify failure is in failed_builds list
48+
assert len(bt.failed_builds) == 1
49+
failed_build = bt.failed_builds[0]
50+
assert failed_build.req is not None
51+
assert failed_build.req.name == "broken-package"
52+
53+
54+
def test_normal_mode_still_fails_fast(tmp_context: WorkContext) -> None:
55+
"""Test that normal mode (test_mode=False) still raises exceptions immediately."""
56+
bt = bootstrapper.Bootstrapper(tmp_context, test_mode=False)
57+
58+
def mock_build_wheel_and_sdist(req, version, pbi, build_sdist_only):
59+
raise MockBuildError(f"Build failed for {req.name}")
60+
61+
with mock.patch.object(
62+
bt, "_build_wheel_and_sdist", side_effect=mock_build_wheel_and_sdist
63+
):
64+
req = Requirement("failing-package==1.0")
65+
version = Version("1.0")
66+
pbi = tmp_context.package_build_info(req)
67+
68+
# Should raise immediately in normal mode
69+
with pytest.raises(MockBuildError, match="Build failed for failing-package"):
70+
bt._build_package(req, version, pbi, build_sdist_only=False)
71+
72+
73+
def test_build_result_captures_exception_context() -> None:
74+
"""Test that BuildResult.failure() properly captures exception context."""
75+
req = Requirement("test-package>=1.0")
76+
version = Version("1.2.3")
77+
exception = ValueError("Something went wrong")
78+
79+
result = bootstrapper.BuildResult.failure(
80+
req=req, resolved_version=version, exception=exception
81+
)
82+
83+
# Verify all context is captured
84+
assert result.failed
85+
assert result.req == req
86+
assert result.resolved_version == version
87+
assert result.exception is exception
88+
assert result.exception_type == "ValueError"
89+
assert result.exception_message == "Something went wrong"
90+
assert result.wheel_filename is None
91+
assert result.sdist_filename is None

tests/test_commands.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,7 @@ def test_bootstrap_parallel_options() -> None:
1717
# graph_file internally.
1818
expected.discard("sdist_only")
1919
expected.discard("graph_file")
20+
# test_mode is not supported in bootstrap-parallel (serial mode only)
21+
expected.discard("test_mode")
2022

2123
assert set(get_option_names(bootstrap.bootstrap_parallel)) == expected

0 commit comments

Comments
 (0)