Skip to content

Commit 409554d

Browse files
feat: Add --test-mode for resilient bootstrap with failure handling
Add --test-mode flag that enables resilient bootstrapping by marking failed packages as pre-built and continuing until all packages are processed. Uses optimal n+1 retry logic with comprehensive failure reporting including exception types, messages, and per-package context. Major changes: - Enhanced BuildResult dataclass with req, resolved_version, and exception tracking for detailed failure analysis - Refactored pre_built_override from Settings to WorkContext for proper separation of static config vs runtime state - Introduced public WorkContext.package_build_info() API, replacing direct Settings access across commands (bootstrap, build, graph, list-overrides) - Fixed build-parallel command to use new public API - Added 4 essential test scenarios in test_bootstrap_test_mode.py Benefits: - Discover all build failures in one run rather than stopping on first failure - Support mixed source/binary dependency workflows - Better error context for debugging failed builds - Cleaner API boundaries between configuration and runtime context Fixes #713 Co-developed-with: Cursor IDE with Claude 4.0 Sonnet Signed-off-by: Lalatendu Mohanty <[email protected]>
1 parent 222f3f8 commit 409554d

File tree

11 files changed

+574
-206
lines changed

11 files changed

+574
-206
lines changed

src/fromager/bootstrapper.py

Lines changed: 279 additions & 144 deletions
Large diffs are not rendered by default.

src/fromager/commands/bootstrap.py

Lines changed: 89 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,13 @@ def _get_requirements_from_args(
9797
default=False,
9898
help="Skip generating constraints.txt file to allow building collections with conflicting versions",
9999
)
100+
@click.option(
101+
"--test-mode",
102+
"test_mode",
103+
is_flag=True,
104+
default=False,
105+
help="Test mode: mark failed packages as pre-built and continue, report failures at end",
106+
)
100107
@click.argument("toplevel", nargs=-1)
101108
@click.pass_obj
102109
def bootstrap(
@@ -106,6 +113,7 @@ def bootstrap(
106113
cache_wheel_server_url: str | None,
107114
sdist_only: bool,
108115
skip_constraints: bool,
116+
test_mode: bool,
109117
toplevel: list[str],
110118
) -> None:
111119
"""Compute and build the dependencies of a set of requirements recursively
@@ -116,6 +124,11 @@ def bootstrap(
116124
"""
117125
logger.info(f"cache wheel server url: {cache_wheel_server_url}")
118126

127+
if test_mode:
128+
logger.info(
129+
"test mode enabled: will mark failed packages as pre-built and continue"
130+
)
131+
119132
to_build = _get_requirements_from_args(toplevel, requirements_files)
120133
if not to_build:
121134
raise RuntimeError(
@@ -148,6 +161,7 @@ def bootstrap(
148161
prev_graph,
149162
cache_wheel_server_url,
150163
sdist_only=sdist_only,
164+
test_mode=test_mode,
151165
)
152166

153167
# we need to resolve all the top level dependencies before we start bootstrapping.
@@ -183,9 +197,26 @@ def bootstrap(
183197

184198
for req in to_build:
185199
token = requirement_ctxvar.set(req)
186-
bt.bootstrap(req, requirements_file.RequirementType.TOP_LEVEL)
187-
progressbar.update()
188-
requirement_ctxvar.reset(token)
200+
try:
201+
bt.bootstrap(req, requirements_file.RequirementType.TOP_LEVEL)
202+
progressbar.update()
203+
if test_mode:
204+
logger.info("Successfully processed: %s", req)
205+
except Exception as err:
206+
if test_mode:
207+
# Test mode: log error but continue processing
208+
logger.error(
209+
"test mode: failed to process %s: %s",
210+
req,
211+
err,
212+
exc_info=True, # Full traceback to debug log
213+
)
214+
progressbar.update() # Update progress even on failure
215+
else:
216+
# Normal mode: re-raise the exception (fail-fast)
217+
raise
218+
finally:
219+
requirement_ctxvar.reset(token)
189220

190221
constraints_filename = wkctx.work_dir / "constraints.txt"
191222
if skip_constraints:
@@ -200,7 +231,57 @@ def bootstrap(
200231

201232
logger.debug("match_py_req LRU cache: %r", resolver.match_py_req.cache_info())
202233

203-
metrics.summarize(wkctx, "Bootstrapping")
234+
# Test mode summary reporting
235+
if test_mode:
236+
if bt.failed_builds:
237+
# Use repository's logging pattern for error reporting
238+
logger.error("test mode: the following packages failed to build:")
239+
for failure in sorted(
240+
bt.failed_builds, key=lambda f: str(f.req) if f.req else ""
241+
):
242+
if failure.req and failure.resolved_version:
243+
logger.error(
244+
" - %s==%s",
245+
failure.req,
246+
failure.resolved_version,
247+
)
248+
if failure.exception_type:
249+
logger.error(
250+
" Error: %s: %s",
251+
failure.exception_type,
252+
failure.exception_message,
253+
)
254+
else:
255+
logger.error(" - unknown package (missing context)")
256+
257+
# Categorize failures by exception type for better analysis
258+
failure_types: dict[str, list[str]] = {}
259+
for failure in bt.failed_builds:
260+
exc_type = failure.exception_type or "Unknown"
261+
pkg_name = (
262+
f"{failure.req}=={failure.resolved_version}"
263+
if failure.req and failure.resolved_version
264+
else "unknown"
265+
)
266+
failure_types.setdefault(exc_type, []).append(pkg_name)
267+
268+
logger.error("")
269+
logger.error("test mode: failure breakdown by type:")
270+
for exc_type, packages in sorted(failure_types.items()):
271+
logger.error(" %s: %d package(s)", exc_type, len(packages))
272+
273+
logger.error(
274+
"test mode: %d package(s) failed to build", len(bt.failed_builds)
275+
)
276+
# Follow repository's error exit pattern like __main__.py and lint.py
277+
raise SystemExit(
278+
f"Test mode completed with {len(bt.failed_builds)} build failures"
279+
)
280+
else:
281+
logger.info("test mode: all packages built successfully")
282+
metrics.summarize(wkctx, "Test Mode Bootstrapping")
283+
else:
284+
metrics.summarize(wkctx, "Bootstrapping")
204285

205286

206287
def write_constraints_file(
@@ -480,6 +561,9 @@ def bootstrap_parallel(
480561
remaining wheels in parallel. The bootstrap step downloads sdists
481562
and builds build-time dependency in serial. The build-parallel step
482563
builds the remaining wheels in parallel.
564+
565+
Note: --test-mode is not supported with bootstrap-parallel. Use
566+
'bootstrap --test-mode' for comprehensive failure testing.
483567
"""
484568
# Do not remove build environments in bootstrap phase to speed up the
485569
# parallel build phase.
@@ -495,6 +579,7 @@ def bootstrap_parallel(
495579
cache_wheel_server_url=cache_wheel_server_url,
496580
sdist_only=True,
497581
skip_constraints=skip_constraints,
582+
test_mode=False,
498583
toplevel=toplevel,
499584
)
500585

src/fromager/commands/build.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -675,9 +675,7 @@ def update_progressbar_cb(future: concurrent.futures.Future) -> None:
675675
exclusive_nodes: DependencyNodeList = [
676676
node
677677
for node in buildable_nodes
678-
if wkctx.settings.package_build_info(
679-
node.canonicalized_name
680-
).exclusive_build
678+
if wkctx.package_build_info(node.canonicalized_name).exclusive_build
681679
]
682680
if exclusive_nodes:
683681
# Only build the first exclusive node this round

src/fromager/commands/graph.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def _find_customized_nodes(
125125
"""Filter nodes to find only those with customizations."""
126126
customized_nodes: list[DependencyNode] = []
127127
for node in nodes:
128-
pbi = wkctx.settings.package_build_info(node.canonicalized_name)
128+
pbi = wkctx.package_build_info(node.canonicalized_name)
129129
if node.canonicalized_name != ROOT and pbi.has_customizations:
130130
customized_nodes.append(node)
131131
return customized_nodes
@@ -161,7 +161,7 @@ def _find_customized_dependencies_for_node(
161161
continue
162162

163163
child = edge.destination_node
164-
child_pbi = wkctx.settings.package_build_info(child.canonicalized_name)
164+
child_pbi = wkctx.package_build_info(child.canonicalized_name)
165165
new_path = path + [current_node.key]
166166

167167
# Use the first requirement we encounter in the path
@@ -277,7 +277,7 @@ def get_node_id(node: str) -> str:
277277
if not name:
278278
node_type.append("toplevel")
279279
else:
280-
pbi = wkctx.settings.package_build_info(name)
280+
pbi = wkctx.package_build_info(name)
281281
all_patches: PatchMap = pbi.get_all_patches()
282282

283283
if node.pre_built:

src/fromager/commands/list_overrides.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def list_overrides(
6565
export_data = []
6666

6767
for name in overridden_packages:
68-
pbi = wkctx.settings.package_build_info(name)
68+
pbi = wkctx.package_build_info(name)
6969
ps = wkctx.settings.package_setting(name)
7070

7171
plugin_hooks: list[str] = []

src/fromager/context.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,19 @@ def __init__(
9595

9696
self._parallel_builds = False
9797

98+
self._pre_built_override: set[NormalizedName] = set()
99+
98100
def enable_parallel_builds(self) -> None:
99101
self._parallel_builds = True
100102

103+
def add_pre_built_override(self, package_name: str | NormalizedName) -> None:
104+
"""Mark a package as pre-built at runtime."""
105+
self._pre_built_override.add(canonicalize_name(package_name))
106+
107+
def is_pre_built_override(self, package_name: str | NormalizedName) -> bool:
108+
"""Check if a package is marked as pre-built override."""
109+
return canonicalize_name(package_name) in self._pre_built_override
110+
101111
@property
102112
def wheels_build(self) -> pathlib.Path:
103113
# when parallel builds are enabled, return a path that is unique for the
@@ -164,7 +174,7 @@ def package_build_info(
164174
name = package.name
165175
else:
166176
name = package
167-
return self.settings.package_build_info(name)
177+
return self.settings.package_build_info(name, self)
168178

169179
def setup(self) -> None:
170180
# The work dir must already exist, so don't try to create it.

src/fromager/packagesettings.py

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -624,12 +624,26 @@ def get_available_memory_gib() -> float:
624624

625625

626626
class PackageBuildInfo:
627-
"""Package build information
627+
"""Variant-aware package build configuration and metadata.
628628
629-
Public API for PackageSettings with i
629+
Primary public API for accessing package-specific settings during the build
630+
process. Combines static configuration from YAML files with runtime context
631+
to provide variant-specific (cpu, cuda, etc.) build information.
632+
633+
Key responsibilities:
634+
- Determine if package should be built or use pre-built wheels
635+
- Provide patches to apply for specific versions
636+
- Configure build environment (parallel jobs, environment variables)
637+
- Manage package customizations (plugins, custom download URLs)
638+
- Calculate build tags from changelogs for wheel versioning
639+
640+
Instances are cached per package and accessed via ``WorkContext.package_build_info()``.
630641
"""
631642

632-
def __init__(self, settings: Settings, ps: PackageSettings) -> None:
643+
def __init__(
644+
self, settings: Settings, ps: PackageSettings, ctx: context.WorkContext
645+
) -> None:
646+
self._ctx = ctx
633647
self._variant = typing.cast(Variant, settings.variant)
634648
self._patches_dir = settings.patches_dir
635649
self._variant_changelog = settings.variant_changelog()
@@ -745,6 +759,11 @@ def has_customizations(self) -> bool:
745759
@property
746760
def pre_built(self) -> bool:
747761
"""Does the variant use pre-build wheels?"""
762+
# Check if package is in runtime pre_built_override
763+
if self._ctx.is_pre_built_override(self.package):
764+
return True
765+
766+
# Check variant configuration
748767
vi = self._ps.variants.get(self.variant)
749768
if vi is not None:
750769
return vi.pre_built
@@ -1146,23 +1165,27 @@ def package_setting(self, package: str | Package) -> PackageSettings:
11461165
self._package_settings[package] = ps
11471166
return ps
11481167

1149-
def package_build_info(self, package: str | Package) -> PackageBuildInfo:
1168+
def package_build_info(
1169+
self, package: str | Package, ctx: context.WorkContext
1170+
) -> PackageBuildInfo:
11501171
"""Get (cached) PackageBuildInfo for package and current variant"""
11511172
package = Package(canonicalize_name(package, validate=True))
11521173
pbi = self._pbi_cache.get(package)
11531174
if pbi is None:
11541175
ps = self.package_setting(package)
1155-
pbi = PackageBuildInfo(self, ps)
1176+
pbi = PackageBuildInfo(self, ps, ctx)
11561177
self._pbi_cache[package] = pbi
11571178
return pbi
11581179

11591180
def list_pre_built(self) -> set[Package]:
1160-
"""List packages marked as pre-built"""
1161-
return set(
1162-
name
1163-
for name in self._package_settings
1164-
if self.package_build_info(name).pre_built
1165-
)
1181+
"""List packages marked as pre-built by configuration"""
1182+
result = set()
1183+
for name in self._package_settings:
1184+
ps = self._package_settings[name]
1185+
vi = ps.variants.get(self._variant)
1186+
if vi is not None and vi.pre_built:
1187+
result.add(name)
1188+
return result
11661189

11671190
def list_overrides(self) -> set[Package]:
11681191
"""List packages with overrides

0 commit comments

Comments
 (0)