diff --git a/.buckconfig b/.buckconfig new file mode 100644 index 00000000000..75702aa3955 --- /dev/null +++ b/.buckconfig @@ -0,0 +1,30 @@ +[cells] + root = . + toolchains = codex-rs/toolchains + prelude = prelude + none = none + +[cell_aliases] + # Buck2 prelude expects some common aliases to exist in some environments. + config = prelude + ovr_config = prelude + fbcode = none + fbsource = none + fbcode_macros = none + buck = none + +# Use the Buck2 prelude bundled with the buck2 binary. +[external_cells] + prelude = bundled + +[parser] + target_platform_detector_spec = target://...->prelude//platforms:default + +[codex] + # Local-only knob used by codex-rs/buck2 to approximate Cargo profiles. + # Override on the command line with: + # ./scripts/buck2 build -c codex.rust_profile=release //codex-rs/cli:codex + rust_profile = dev + +[build] + execution_platforms = prelude//platforms:default diff --git a/.buckroot b/.buckroot new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/.buckroot @@ -0,0 +1 @@ + diff --git a/.github/workflows/buck2.yml b/.github/workflows/buck2.yml new file mode 100644 index 00000000000..3949eae60a0 --- /dev/null +++ b/.github/workflows/buck2.yml @@ -0,0 +1,56 @@ +name: Buck2 (Experimental, non-blocking for PRs) + +on: + pull_request: {} + push: + branches: + - main + workflow_dispatch: + +concurrency: + group: buck2-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + buck2-test: + name: buck2 test //codex-rs/... + runs-on: ubuntu-24.04 + # Non-blocking while Buck2 support is still experimental. + # continue-on-error: true + timeout-minutes: 30 + + steps: + - name: Checkout + uses: actions/checkout@v4 + + # scripts/buck2, scripts/reindeer, etc. are DotSlash wrappers. + - name: Install DotSlash + uses: facebook/install-dotslash@v2 + + - name: Setup Rust toolchain + uses: dtolnay/rust-toolchain@1.90 + with: + # Match codex-rs/rust-toolchain.toml (and include rust-src for Buck2 toolchains). + components: rustfmt, clippy, rust-src + + - name: Install system deps (Linux) + shell: bash + run: | + set -euxo pipefail + sudo apt-get update + sudo apt-get install -y pkg-config libssl-dev + + - name: Setup Buck2 (local) + shell: bash + run: | + set -euxo pipefail + ./scripts/setup_buck2_local.sh + + - name: Run Buck2 tests + shell: bash + run: | + set -euxo pipefail + ./scripts/buck2 test -c test.rule_timeout_ms=1800000 --test-executor-stdout=- --test-executor-stderr=- //codex-rs/... diff --git a/.gitignore b/.gitignore index 07bc15ccdd1..c50686a9043 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,12 @@ build/ out/ storybook-static/ +# buck2 +buck-out/ +**/BUCK +codex-rs/third-party/ +codex-rs/cli/Cargo.lock + # ignore README for publishing codex-cli/README.md @@ -89,4 +95,3 @@ CHANGELOG.ignore.md # Python bytecode files __pycache__/ *.pyc - diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 70902b4bd59..5acd78c9f90 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1502,6 +1502,7 @@ dependencies = [ "clap", "codex-core", "codex-utils-absolute-path", + "codex-utils-cargo-bin", "landlock", "libc", "seccompiler", diff --git a/codex-rs/buck2/codex_rust_toolchain.bzl b/codex-rs/buck2/codex_rust_toolchain.bzl new file mode 100644 index 00000000000..9d3b2825177 --- /dev/null +++ b/codex-rs/buck2/codex_rust_toolchain.bzl @@ -0,0 +1,99 @@ +load("@prelude//rust:rust_toolchain.bzl", "PanicRuntime", "RustToolchainInfo") + +_DEFAULT_TRIPLE = select({ + "prelude//os:linux": select({ + "prelude//cpu:arm64": "aarch64-unknown-linux-gnu", + "prelude//cpu:riscv64": "riscv64gc-unknown-linux-gnu", + "prelude//cpu:x86_64": "x86_64-unknown-linux-gnu", + }), + "prelude//os:macos": select({ + "prelude//cpu:arm64": "aarch64-apple-darwin", + "prelude//cpu:x86_64": "x86_64-apple-darwin", + }), + "prelude//os:windows": select({ + "prelude//cpu:arm64": select({ + # Rustup's default ABI for the host on Windows is MSVC, not GNU. + "DEFAULT": "aarch64-pc-windows-msvc", + "prelude//abi:gnu": "aarch64-pc-windows-gnu", + "prelude//abi:msvc": "aarch64-pc-windows-msvc", + }), + "prelude//cpu:x86_64": select({ + "DEFAULT": "x86_64-pc-windows-msvc", + "prelude//abi:gnu": "x86_64-pc-windows-gnu", + "prelude//abi:msvc": "x86_64-pc-windows-msvc", + }), + }), +}) + + +def _codex_rust_toolchain_impl(ctx): + # Buck doesn't have a built-in notion of "Cargo profiles", but it's useful + # to provide a simple local knob that roughly matches `cargo build` vs + # `cargo build --release`. + # + # Default is "dev" to match local development expectations. + rust_profile = read_config("codex", "rust_profile", "dev") + extra_rustc_flags = [] + if rust_profile == "release": + # Roughly mirrors Cargo's release defaults (not a perfect match). + extra_rustc_flags = [ + "-C", + "opt-level=3", + "-C", + "debuginfo=0", + ] + + return [ + DefaultInfo(), + RustToolchainInfo( + allow_lints = ctx.attrs.allow_lints, + clippy_driver = RunInfo(args = [ctx.attrs.clippy_driver]), + clippy_toml = ctx.attrs.clippy_toml[DefaultInfo].default_outputs[0] if ctx.attrs.clippy_toml else None, + compiler = RunInfo(args = [ctx.attrs.rustc]), + default_edition = ctx.attrs.default_edition, + deny_lints = ctx.attrs.deny_lints, + doctests = ctx.attrs.doctests, + nightly_features = ctx.attrs.nightly_features, + panic_runtime = PanicRuntime("unwind"), + report_unused_deps = ctx.attrs.report_unused_deps, + rustc_binary_flags = ctx.attrs.rustc_binary_flags, + rustc_flags = ctx.attrs.rustc_flags + extra_rustc_flags, + rustc_target_triple = ctx.attrs.rustc_target_triple, + rustc_test_flags = ctx.attrs.rustc_test_flags, + rustdoc = RunInfo(args = [ctx.attrs.rustdoc]), + rustdoc_flags = ctx.attrs.rustdoc_flags, + warn_lints = ctx.attrs.warn_lints, + # Enable the prelude's "metadata-only rlib" behavior consistently + # across the crate graph. This avoids rustc "found possibly newer + # version of crate ..." (E0460) mismatches between binaries and + # libraries in large Rust graphs. + advanced_unstable_linking = ctx.attrs.advanced_unstable_linking, + ), + ] + + +codex_rust_toolchain = rule( + impl = _codex_rust_toolchain_impl, + attrs = { + "advanced_unstable_linking": attrs.bool(default = True), + "allow_lints": attrs.list(attrs.string(), default = []), + # Prefer explicit tool paths so the Buck execution directory doesn't + # affect rustup toolchain resolution. + "clippy_driver": attrs.string(default = "clippy-driver"), + "clippy_toml": attrs.option(attrs.dep(providers = [DefaultInfo]), default = None), + "default_edition": attrs.option(attrs.string(), default = None), + "deny_lints": attrs.list(attrs.string(), default = []), + "doctests": attrs.bool(default = False), + "nightly_features": attrs.bool(default = False), + "report_unused_deps": attrs.bool(default = False), + "rustc": attrs.string(default = "rustc"), + "rustc_binary_flags": attrs.list(attrs.arg(), default = []), + "rustc_flags": attrs.list(attrs.arg(), default = []), + "rustc_target_triple": attrs.string(default = _DEFAULT_TRIPLE), + "rustc_test_flags": attrs.list(attrs.arg(), default = []), + "rustdoc": attrs.string(default = "rustdoc"), + "rustdoc_flags": attrs.list(attrs.arg(), default = []), + "warn_lints": attrs.list(attrs.string(), default = []), + }, + is_toolchain_rule = True, +) diff --git a/codex-rs/buck2/reindeer_macros.bzl b/codex-rs/buck2/reindeer_macros.bzl new file mode 100644 index 00000000000..72ae2491323 --- /dev/null +++ b/codex-rs/buck2/reindeer_macros.bzl @@ -0,0 +1,111 @@ +load("@prelude//rust:cargo_buildscript.bzl", _prelude_buildscript_run = "buildscript_run") +load("@prelude//rust:cargo_package.bzl", "cargo") + + +def codex_noop_alias(**_kwargs): + # Reindeer normally emits aliases like `alias(name = "rand", actual = ":rand-0.8.5")` + # to provide stable unversioned target names. In a non-trivial workspace it's + # common to have multiple versions of the same crate in one graph, which + # leads to duplicate alias target names and buckification failures. + # + # For local Buck experiments (where we don't check in generated third-party + # BUCK files), it's simplest to disable these aliases and depend on + # versioned targets directly. + pass + + +def _codex_extra_srcs_for_manifest_dir(manifest_dir): + if not manifest_dir: + return [] + + # Use per-crate globs rooted at the crate's vendored manifest dir, which is + # passed through by Reindeer as CARGO_MANIFEST_DIR (e.g. vendor/foo-1.2.3). + # + # We include *all* files under the crate root so `include_str!` and + # `include_bytes!` work without per-crate Reindeer fixups. This is local-only + # buckification, so we prefer robustness over a minimal srcs list. + return glob( + ["{}/**".format(manifest_dir)], + exclude = [ + "{}/target/**".format(manifest_dir), + "{}/.git/**".format(manifest_dir), + ], + ) + + +def codex_rust_library(**kwargs): + # Make generated third-party targets consumable from anywhere in the repo. + kwargs["visibility"] = ["PUBLIC"] + env = kwargs.get("env", {}) + manifest_dir = env.get("CARGO_MANIFEST_DIR") + srcs = list(kwargs.get("srcs", [])) + srcs.extend(_codex_extra_srcs_for_manifest_dir(manifest_dir)) + kwargs["srcs"] = srcs + cargo.rust_library(**kwargs) + + +def codex_rust_binary(**kwargs): + kwargs["visibility"] = ["PUBLIC"] + env = kwargs.get("env", {}) + manifest_dir = env.get("CARGO_MANIFEST_DIR") + srcs = list(kwargs.get("srcs", [])) + srcs.extend(_codex_extra_srcs_for_manifest_dir(manifest_dir)) + kwargs["srcs"] = srcs + cargo.rust_binary(**kwargs) + + +def codex_buildscript_run(**kwargs): + # Many build scripts (especially those using `cc`/`cc-rs`) expect Cargo to + # provide a handful of profile env vars. Buck does not set these by default. + env = dict(kwargs.get("env", {})) + rust_profile = read_config("codex", "rust_profile", "dev") + if rust_profile == "release": + env.setdefault("OPT_LEVEL", "3") + env.setdefault("PROFILE", "release") + env.setdefault("DEBUG", "false") + else: + env.setdefault("OPT_LEVEL", "0") + env.setdefault("PROFILE", "debug") + env.setdefault("DEBUG", "true") + + # Provide common Cargo cfg env vars that some build scripts expect. + env.setdefault( + "CARGO_CFG_TARGET_OS", + select({ + "prelude//os:linux": "linux", + "prelude//os:macos": "macos", + "prelude//os:windows": "windows", + "DEFAULT": "", + }), + ) + env.setdefault( + "CARGO_CFG_TARGET_ARCH", + select({ + "prelude//cpu:arm64": "aarch64", + "prelude//cpu:x86_64": "x86_64", + "DEFAULT": "", + }), + ) + env.setdefault("CARGO_CFG_TARGET_ENDIAN", "little") + env.setdefault( + "CARGO_CFG_TARGET_ENV", + select({ + "prelude//os:linux": "gnu", + "DEFAULT": "", + }), + ) + + # Forward native link directives emitted by build scripts into rustc flags. + # Without this, crates like `ring` and `tree-sitter-*` will compile but fail + # to link due to missing native symbols. + kwargs.setdefault("rustc_link_lib", True) + kwargs.setdefault("rustc_link_search", True) + + # `CARGO_MANIFEST_DIR` is expected by many build scripts. We can usually + # derive it from the `manifest_dir` parameter that buildscript_run uses. + manifest_dir = kwargs.get("manifest_dir") + if type(manifest_dir) == type(""): + env.setdefault("CARGO_MANIFEST_DIR", "$(location {})".format(manifest_dir)) + + kwargs["env"] = env + _prelude_buildscript_run(**kwargs) diff --git a/codex-rs/linux-sandbox/Cargo.toml b/codex-rs/linux-sandbox/Cargo.toml index 1009791aa05..fe52781b44f 100644 --- a/codex-rs/linux-sandbox/Cargo.toml +++ b/codex-rs/linux-sandbox/Cargo.toml @@ -24,6 +24,7 @@ libc = { workspace = true } seccompiler = { workspace = true } [target.'cfg(target_os = "linux")'.dev-dependencies] +codex-utils-cargo-bin = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true, features = [ "io-std", diff --git a/codex-rs/linux-sandbox/tests/suite/landlock.rs b/codex-rs/linux-sandbox/tests/suite/landlock.rs index a4868ec057a..15cf3fe8131 100644 --- a/codex-rs/linux-sandbox/tests/suite/landlock.rs +++ b/codex-rs/linux-sandbox/tests/suite/landlock.rs @@ -60,8 +60,10 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) { exclude_tmpdir_env_var: true, exclude_slash_tmp: true, }; - let sandbox_program = env!("CARGO_BIN_EXE_codex-linux-sandbox"); - let codex_linux_sandbox_exe = Some(PathBuf::from(sandbox_program)); + let codex_linux_sandbox_exe = Some( + codex_utils_cargo_bin::cargo_bin("codex-linux-sandbox") + .expect("codex-linux-sandbox binary should be available for tests"), + ); let res = process_exec_tool_call( params, &sandbox_policy, @@ -154,8 +156,10 @@ async fn assert_network_blocked(cmd: &[&str]) { }; let sandbox_policy = SandboxPolicy::new_read_only_policy(); - let sandbox_program = env!("CARGO_BIN_EXE_codex-linux-sandbox"); - let codex_linux_sandbox_exe: Option = Some(PathBuf::from(sandbox_program)); + let codex_linux_sandbox_exe: Option = Some( + codex_utils_cargo_bin::cargo_bin("codex-linux-sandbox") + .expect("codex-linux-sandbox binary should be available for tests"), + ); let result = process_exec_tool_call( params, &sandbox_policy, diff --git a/codex-rs/reindeer.toml b/codex-rs/reindeer.toml new file mode 100644 index 00000000000..d579d49719d --- /dev/null +++ b/codex-rs/reindeer.toml @@ -0,0 +1,56 @@ +## +## Reindeer config for codex-rs (Cargo workspace) + Buck2. +## +## This file is checked in, but generated Buck artifacts (BUCK files and +## vendored crates) are intentionally gitignored for now. +## + +# Focus buckification on the main CLI to avoid pulling in platform-specific +# workspace members that can create duplicate "public alias" names. +manifest_path = "cli/Cargo.toml" + +# Place generated output under codex-rs/third-party/. +third_party_dir = "third-party" + +# Use vendored crates (no crates.io downloads during `buck2 build`). +vendor = true + +# Provide common Cargo env vars for crates/build scripts that use `env!()`. +cargo_env = [ + "CARGO_CRATE_NAME", + "CARGO_MANIFEST_DIR", + "CARGO_PKG_AUTHORS", + "CARGO_PKG_DESCRIPTION", + "CARGO_PKG_NAME", + "CARGO_PKG_REPOSITORY", + "CARGO_PKG_VERSION", + "CARGO_PKG_VERSION_MAJOR", + "CARGO_PKG_VERSION_MINOR", + "CARGO_PKG_VERSION_PATCH", + "CARGO_PKG_VERSION_PRE", +] + +[buck] +file_name = "BUCK" + +# These resolve to macros defined in codex-rs/buck2/reindeer_macros.bzl. +alias = "codex_noop_alias" +alias_with_platforms = "codex_noop_alias" +rust_library = "codex_rust_library" +rust_binary = "codex_rust_binary" +buildscript_genrule = "codex_buildscript_run" + +buckfile_imports = """ +load("//codex-rs/buck2:reindeer_macros.bzl", "codex_buildscript_run", "codex_noop_alias", "codex_rust_binary", "codex_rust_library") +""" + +generated_file_header = """ +## +## @generated by reindeer +## Do not edit by hand. +## +## To regenerate: +## (cd codex-rs && reindeer vendor && reindeer buckify) +## +""" + diff --git a/codex-rs/scripts/gen_buck_first_party.py b/codex-rs/scripts/gen_buck_first_party.py new file mode 100644 index 00000000000..e895adac8c7 --- /dev/null +++ b/codex-rs/scripts/gen_buck_first_party.py @@ -0,0 +1,636 @@ +#!/usr/bin/env python3 +""" +Generate BUCK files for first-party Rust crates in the codex-rs workspace. + +Reindeer generates third-party targets (in `codex-rs/third-party/BUCK`). This +script generates Buck targets for workspace members (local crates) that depend +on those third-party targets. + +The generated BUCK files are intentionally gitignored for now. +""" + +from __future__ import annotations + +import json +import pathlib +import re +import subprocess +import sys +from dataclasses import dataclass +from typing import Any, Optional + + +CODEX_RS_ROOT = pathlib.Path(__file__).resolve().parents[1] +REPO_ROOT = CODEX_RS_ROOT.parent +BUILDIFIER = REPO_ROOT / "scripts" / "buildifier" + +# All labels are rooted at the repo root Buck project. +CODEX_RS_LABEL_PREFIX = "//codex-rs" + +# Some integration tests rely on Cargo's behavior of allowing `include_str!` +# paths that traverse outside of a package directory. Buck does not allow `..` +# paths in `srcs`, so we skip those tests for now. +SKIP_INTEGRATION_TESTS_BY_PACKAGE: dict[str, set[str]] = { + "codex-core": {"all"}, +} + + +def cargo_metadata() -> dict[str, Any]: + out = subprocess.check_output( + ["cargo", "metadata", "--format-version=1", "--locked"], + cwd=CODEX_RS_ROOT, + text=True, + ) + return json.loads(out) + + +@dataclass(frozen=True) +class BuckDep: + label: str + local_name: str + crate_name: str + cfg: Optional[str] + + +def parse_dep_cfg(cfg: Optional[str]) -> list[str]: + """ + Map a subset of Cargo cfg(...) strings to Buck2 constraint keys. + + Returns a list of constraint keys that should include the dependency. + An empty list means "unconditional". + """ + if cfg is None: + return [] + + cfg = cfg.strip() + m = re.fullmatch(r'cfg\(target_os\s*=\s*"([^"]+)"\)', cfg) + if m: + os_name = m.group(1) + if os_name in ("linux", "macos", "windows"): + return [f"prelude//os:{os_name}"] + + if cfg == "cfg(windows)": + return ["prelude//os:windows"] + + if cfg == "cfg(unix)": + return ["prelude//os:linux", "prelude//os:macos"] + + # If we don't recognize the expression, treat it as unconditional to avoid + # silently dropping deps. This may cause platform build issues, in which + # case we can extend this mapping. + return [] + + +def buckify_features(features: list[str]) -> list[str]: + # Cargo metadata includes lots of feature names, including "default". + # Keep stable output for diffs. + return sorted(features) + + +def package_lib_target(pkg: dict[str, Any]) -> Optional[dict[str, Any]]: + for t in pkg.get("targets") or []: + kind = t.get("kind") or [] + if "proc-macro" in kind or "lib" in kind: + return t + return None + + +def package_bin_targets(pkg: dict[str, Any]) -> list[dict[str, Any]]: + return [t for t in (pkg.get("targets") or []) if "bin" in (t.get("kind") or [])] + +def package_test_targets(pkg: dict[str, Any]) -> list[dict[str, Any]]: + # These correspond to Cargo integration tests under `tests/`. + return [t for t in (pkg.get("targets") or []) if "test" in (t.get("kind") or [])] + + +def relpath_from_codex_rs(path: str) -> str: + return str(pathlib.Path(path).resolve().relative_to(CODEX_RS_ROOT)) + + +def write_buck_file(crate_dir: pathlib.Path, content: str) -> None: + buck_path = crate_dir / "BUCK" + buck_path.write_text(content, encoding="utf-8") + +def buildifier(paths: list[pathlib.Path]) -> None: + if not paths: + return + if not BUILDIFIER.exists(): + # Generated BUCK files are local-only; keep generation working even if + # buildifier isn't available. + return + + # Run buildifier from the repo root so it can infer workspace-relative paths. + proc = subprocess.run( + [str(BUILDIFIER), "-lint=off", "-mode=fix", *[str(p) for p in paths]], + cwd=REPO_ROOT, + text=True, + ) + if proc.returncode != 0: + print(f"warning: buildifier failed (exit {proc.returncode}); leaving BUCK files unformatted", file=sys.stderr) + + +def starlark_list(items: list[str], indent: str = " ") -> str: + if not items: + return "[]" + lines = ["["] + for it in items: + lines.append(f'{indent}"{it}",') + lines.append("]") + return "\n".join(lines) + + +def starlark_dict(d: dict[str, str], indent: str = " ") -> str: + if not d: + return "{}" + lines = ["{"] # insertion order is stable for our dict construction + for k, v in d.items(): + lines.append(f'{indent}"{k}": "{v}",') + lines.append("}") + return "\n".join(lines) + + +def starlark_deps_expr(base: list[str], conditional: dict[str, list[str]]) -> str: + """ + Render deps as a list plus (optional) `select()` fragments. + """ + expr = starlark_list(base) + for constraint, deps in sorted(conditional.items()): + expr = ( + f"{expr} + select({{\n" + f' "{constraint}": {starlark_list(deps, indent=" ")},\n' + f' "DEFAULT": [],\n' + "})" + ) + return expr + + +def starlark_str(s: str) -> str: + # Conservative escaping for readability (most strings here are already safe). + return s.replace("\\", "\\\\").replace('"', '\\"') + + +def parse_semver(version: str) -> tuple[str, str, str, str]: + m = re.match(r"^(\d+)\.(\d+)\.(\d+)(?:-(.*))?$", version) + if not m: + return ("0", "0", "0", "") + return (m.group(1), m.group(2), m.group(3), m.group(4) or "") + +def group_deps(deps: list[BuckDep]) -> tuple[list[str], dict[str, list[str]], dict[str, str]]: + """ + Group deps into (base_deps, conditional_deps, named_deps) for emitting in BUCK. + """ + base_deps: list[str] = [] + conditional_deps: dict[str, list[str]] = {} + named_deps: dict[str, str] = {} + for d in deps: + dep_constraints = parse_dep_cfg(d.cfg) + if dep_constraints: + for c in dep_constraints: + conditional_deps.setdefault(c, []).append(d.label) + else: + base_deps.append(d.label) + + # Handle renamed deps (Cargo `package = ...` style). + if d.local_name != d.crate_name: + named_deps[d.local_name] = d.label + + base_deps = sorted(set(base_deps)) + for k in list(conditional_deps.keys()): + conditional_deps[k] = sorted(set(conditional_deps[k])) + + return (base_deps, conditional_deps, named_deps) + + +def label_for_workspace_pkg(crate_rel_dir: str, rule_name: str) -> str: + # crate_rel_dir is relative to codex-rs/, but buck labels are rooted at the + # repo root, so we prefix with //codex-rs. + return f"{CODEX_RS_LABEL_PREFIX}/{crate_rel_dir}:{rule_name}" + + +def label_for_third_party(pkg_name: str, version: str) -> str: + # Reindeer generates versioned third-party targets and we disable the + # unversioned alias layer. + return f"{CODEX_RS_LABEL_PREFIX}/third-party:{pkg_name}-{version}" + +def collect_snap_resources(crate_dir: pathlib.Path) -> dict[str, str]: + """ + Collect `.snap` files for insta snapshot tests. + + Buck's rust_test uses an external runner that executes tests from the + project root with project-relative paths. Many snapshot tests expect to find + their `.snap` files under `codex-rs//...` from that root, which is + exactly where the rust rules place `resources`. + """ + resources: dict[str, str] = {} + for p in sorted(crate_dir.rglob("*.snap")): + rel = str(p.relative_to(crate_dir)) + resources[rel] = rel + return resources + +def buck_bin_rule_name(pkg_name: str, bin_name: str) -> str: + # Avoid collisions with the package's rust_library target, which we name + # after the package (pkg_name). + if bin_name == pkg_name: + return f"{bin_name}-bin" + return bin_name + + +def main() -> int: + meta = cargo_metadata() + packages = meta.get("packages", []) + + # Package ID -> package json + by_id: dict[str, Any] = {p["id"]: p for p in packages} + + # Workspace member IDs -> crate directory relative to codex-rs/ + workspace_members: list[str] = list(meta.get("workspace_members") or []) + workspace_dirs: dict[str, str] = {} + for pkg_id in workspace_members: + pkg = by_id[pkg_id] + manifest_path = pkg["manifest_path"] + crate_rel = relpath_from_codex_rs(manifest_path) + crate_dir = str(pathlib.Path(crate_rel).parent) + workspace_dirs[pkg_id] = crate_dir + + # Resolve graph nodes for dependency edges (including cfg/platform edges). + resolve = meta.get("resolve") or {} + nodes = resolve.get("nodes") or [] + node_by_id: dict[str, Any] = {n["id"]: n for n in nodes} + + # Build a workspace-wide mapping of Cargo binary name -> Buck label for the + # corresponding rust_binary target. Many integration tests use + # assert_cmd/escargot helpers that look for CARGO_BIN_EXE_* env vars (set by + # Cargo) to find binaries; we emulate that under Buck. + cargo_bin_to_label: dict[str, str] = {} + for pkg_id in workspace_members: + pkg = by_id[pkg_id] + crate_dir_rel = workspace_dirs[pkg_id] + for bin_t in package_bin_targets(pkg): + bin_name = bin_t["name"] + cargo_bin_to_label[bin_name] = label_for_workspace_pkg( + crate_dir_rel, + buck_bin_rule_name(pkg["name"], bin_name), + ) + + generated_buck_files: list[pathlib.Path] = [] + + for pkg_id in workspace_members: + pkg = by_id[pkg_id] + crate_dir_rel = workspace_dirs[pkg_id] + crate_dir = CODEX_RS_ROOT / crate_dir_rel + + lib_t = package_lib_target(pkg) + bin_ts = package_bin_targets(pkg) + test_ts = package_test_targets(pkg) + if lib_t is None and not bin_ts: + continue + + node = node_by_id.get(pkg_id) or {} + + normal_deps: list[BuckDep] = [] + dev_deps: list[BuckDep] = [] + for dep in node.get("deps") or []: + dep_id = dep["pkg"] + if dep_id == pkg_id: + # Cargo sometimes models "self dependencies" (commonly as a + # dev-dependency to enable features for tests). Buck should not + # treat this as a real crate edge, and using it creates cycles. + continue + dep_pkg = by_id.get(dep_id) + if not dep_pkg: + continue + + dep_kinds = dep.get("dep_kinds") or [] + + local_name = dep.get("name") or dep_pkg["name"] + crate_name = dep_pkg.get("targets", [{}])[0].get("name") or local_name + + if dep_id in workspace_dirs: + dep_label = label_for_workspace_pkg(workspace_dirs[dep_id], dep_pkg["name"]) + else: + dep_label = label_for_third_party(dep_pkg["name"], dep_pkg["version"]) + + # A single dependency may appear multiple times with different Cargo + # cfg(...) selectors (e.g. target.'cfg(target_os = "...")'). Cargo + # metadata encodes these as multiple dep_kind entries, so we emit a + # Buck dep entry for each one. + if not dep_kinds: + dep_kinds = [{"kind": None, "target": None}] + + for k in dep_kinds: + kind = k.get("kind") + if kind not in (None, "normal", "dev"): + continue + + dep_entry = BuckDep( + label=dep_label, + local_name=local_name, + crate_name=crate_name, + cfg=k.get("target"), + ) + if kind is None or kind == "normal": + normal_deps.append(dep_entry) + elif kind == "dev": + dev_deps.append(dep_entry) + + # Deduplicate while preserving stable output. + normal_deps = list({(d.label, d.local_name, d.crate_name, d.cfg): d for d in normal_deps}.values()) + dev_deps = list({(d.label, d.local_name, d.crate_name, d.cfg): d for d in dev_deps}.values()) + + base_deps, conditional_deps, named_deps = group_deps(normal_deps) + test_base_deps, test_conditional_deps, test_named_deps = group_deps(normal_deps + dev_deps) + + # Enabled features for this package (as resolved by Cargo). + features = buckify_features(list(node.get("features") or [])) + + edition = pkg.get("edition") or "2021" + ver_major, ver_minor, ver_patch, ver_pre = parse_semver(pkg.get("version") or "0.0.0") + + # Emulate Cargo's CARGO_BIN_EXE_* env vars for all known workspace + # binaries. This avoids test helpers trying to discover binaries via a + # Cargo target directory layout. + # + # These are runtime-only and should be provided via `run_env` so we + # don't accidentally make compile actions depend on executable outputs. + cargo_bin_env = {f"CARGO_BIN_EXE_{k}": f"$(location {v})" for k, v in sorted(cargo_bin_to_label.items())} + cargo_run_env = cargo_bin_env + + lines: list[str] = [] + lines.append("# @generated by scripts/gen_buck_first_party.py") + lines.append("# Regenerate with: (cd codex-rs && python3 scripts/gen_buck_first_party.py)") + lines.append("") + + # NOTE: We use a broad src glob to support include_str!/include_bytes! + # and other build-time file reads without crate-specific buckification. + src_glob = 'glob(["**"], exclude = ["BUCK", "target/**"])' + snap_resources = collect_snap_resources(crate_dir) + + deps_expr = starlark_deps_expr(base_deps, conditional_deps) + + if lib_t is not None: + crate_root = relpath_from_codex_rs(lib_t["src_path"]) + crate_root_rel = str(pathlib.Path(crate_root).relative_to(crate_dir.relative_to(CODEX_RS_ROOT))) + + lines.append("rust_library(") + lines.append(f' name = "{pkg["name"]}",') + lines.append(f' crate = "{lib_t["name"]}",') + lines.append(f' crate_root = "{crate_root_rel}",') + lines.append(f" srcs = {src_glob},") + lines.append(f' edition = "{edition}",') + lines.append( + " env = " + + starlark_dict( + { + "CARGO_CRATE_NAME": lib_t["name"], + "CARGO_MANIFEST_DIR": ".", + # Insta snapshots use a compile-time workspace root + # (option_env!("INSTA_WORKSPACE_ROOT")) and otherwise + # fall back to cargo metadata, which doesn't work under + # Buck's test sandbox. Pin this to repo root. + "INSTA_WORKSPACE_ROOT": ".", + "CARGO_PKG_AUTHORS": "", + "CARGO_PKG_DESCRIPTION": "", + "CARGO_PKG_NAME": pkg["name"], + "CARGO_PKG_REPOSITORY": "", + "CARGO_PKG_VERSION": pkg.get("version") or "0.0.0", + "CARGO_PKG_VERSION_MAJOR": ver_major, + "CARGO_PKG_VERSION_MINOR": ver_minor, + "CARGO_PKG_VERSION_PATCH": ver_patch, + "CARGO_PKG_VERSION_PRE": ver_pre, + }, + indent=" ", + ) + + "," + ) + if features: + lines.append(f" features = {starlark_list(features)},") + lines.append(f" deps = {deps_expr},") + if named_deps: + items = [f'"{k}": "{v}"' for k, v in sorted(named_deps.items())] + lines.append(" named_deps = {") + for it in items: + lines.append(f" {it},") + lines.append(" },") + lines.append(' visibility = ["PUBLIC"],') + lines.append(")") + + # Unit tests for the library (i.e., `#[cfg(test)]` within src/). + test_deps_expr = starlark_deps_expr(test_base_deps, test_conditional_deps) + lines.append("") + lines.append("rust_test(") + lines.append(f' name = "{pkg["name"]}-unit-tests",') + lines.append(f' crate = "{lib_t["name"]}",') + lines.append(f' crate_root = "{crate_root_rel}",') + lines.append(f" srcs = {src_glob},") + lines.append(f' edition = "{edition}",') + lines.append( + " env = " + + starlark_dict( + { + "CARGO_CRATE_NAME": lib_t["name"], + "CARGO_MANIFEST_DIR": ".", + "INSTA_WORKSPACE_ROOT": ".", + "CARGO_PKG_AUTHORS": "", + "CARGO_PKG_DESCRIPTION": "", + "CARGO_PKG_NAME": pkg["name"], + "CARGO_PKG_REPOSITORY": "", + "CARGO_PKG_VERSION": pkg.get("version") or "0.0.0", + "CARGO_PKG_VERSION_MAJOR": ver_major, + "CARGO_PKG_VERSION_MINOR": ver_minor, + "CARGO_PKG_VERSION_PATCH": ver_patch, + "CARGO_PKG_VERSION_PRE": ver_pre, + }, + indent=" ", + ) + + "," + ) + if cargo_run_env: + lines.append(" run_env = " + starlark_dict(cargo_run_env, indent=" ") + ",") + if snap_resources: + lines.append(" resources = " + starlark_dict(snap_resources, indent=" ") + ",") + if features: + lines.append(f" features = {starlark_list(features)},") + lines.append(f" deps = {test_deps_expr},") + if test_named_deps: + items = [f'"{k}": "{v}"' for k, v in sorted(test_named_deps.items())] + lines.append(" named_deps = {") + for it in items: + lines.append(f" {it},") + lines.append(" },") + lines.append(' visibility = ["PUBLIC"],') + lines.append(")") + + for bin_t in bin_ts: + bin_rule_name = buck_bin_rule_name(pkg["name"], bin_t["name"]) + crate_root = relpath_from_codex_rs(bin_t["src_path"]) + crate_root_rel = str(pathlib.Path(crate_root).relative_to(crate_dir.relative_to(CODEX_RS_ROOT))) + + # Cargo makes the package's library available to binaries in the + # same package, and package dependencies are also visible. + bin_base_deps = base_deps + if lib_t is not None: + bin_base_deps = [f":{pkg['name']}"] + base_deps + bin_deps_expr = starlark_deps_expr(bin_base_deps, conditional_deps) + + lines.append("") + lines.append("rust_binary(") + lines.append(f' name = "{bin_rule_name}",') + lines.append(f' crate = "{bin_t["name"]}",') + lines.append(f' crate_root = "{crate_root_rel}",') + lines.append(f" srcs = {src_glob},") + lines.append(f' edition = "{edition}",') + lines.append( + " env = " + + starlark_dict( + { + "CARGO_CRATE_NAME": bin_t["name"], + "CARGO_MANIFEST_DIR": ".", + "INSTA_WORKSPACE_ROOT": ".", + "CARGO_PKG_AUTHORS": "", + "CARGO_PKG_DESCRIPTION": "", + "CARGO_PKG_NAME": pkg["name"], + "CARGO_PKG_REPOSITORY": "", + "CARGO_PKG_VERSION": pkg.get("version") or "0.0.0", + "CARGO_PKG_VERSION_MAJOR": ver_major, + "CARGO_PKG_VERSION_MINOR": ver_minor, + "CARGO_PKG_VERSION_PATCH": ver_patch, + "CARGO_PKG_VERSION_PRE": ver_pre, + }, + indent=" ", + ) + + "," + ) + if features: + lines.append(f" features = {starlark_list(features)},") + lines.append(f" deps = {bin_deps_expr},") + if named_deps: + items = [f'"{k}": "{v}"' for k, v in sorted(named_deps.items())] + lines.append(" named_deps = {") + for it in items: + lines.append(f" {it},") + lines.append(" },") + lines.append(' visibility = ["PUBLIC"],') + lines.append(")") + + # Unit tests for the binary (i.e., `#[cfg(test)]` within src/main.rs). + bin_test_base_deps = list(test_base_deps) + if lib_t is not None: + # Cargo makes the package's library available to binaries; the + # unit test harness for `src/main.rs` needs that dependency too. + bin_test_base_deps = [f":{pkg['name']}"] + bin_test_base_deps + test_deps_expr = starlark_deps_expr(bin_test_base_deps, test_conditional_deps) + lines.append("") + lines.append("rust_test(") + lines.append(f' name = "{bin_rule_name}-unit-tests",') + lines.append(f' crate = "{bin_t["name"]}",') + lines.append(f' crate_root = "{crate_root_rel}",') + lines.append(f" srcs = {src_glob},") + lines.append(f' edition = "{edition}",') + lines.append( + " env = " + + starlark_dict( + { + "CARGO_CRATE_NAME": bin_t["name"], + "CARGO_MANIFEST_DIR": ".", + "INSTA_WORKSPACE_ROOT": ".", + "CARGO_PKG_AUTHORS": "", + "CARGO_PKG_DESCRIPTION": "", + "CARGO_PKG_NAME": pkg["name"], + "CARGO_PKG_REPOSITORY": "", + "CARGO_PKG_VERSION": pkg.get("version") or "0.0.0", + "CARGO_PKG_VERSION_MAJOR": ver_major, + "CARGO_PKG_VERSION_MINOR": ver_minor, + "CARGO_PKG_VERSION_PATCH": ver_patch, + "CARGO_PKG_VERSION_PRE": ver_pre, + }, + indent=" ", + ) + + "," + ) + if cargo_run_env: + lines.append(" run_env = " + starlark_dict(cargo_run_env, indent=" ") + ",") + if snap_resources: + lines.append(" resources = " + starlark_dict(snap_resources, indent=" ") + ",") + if features: + lines.append(f" features = {starlark_list(features)},") + lines.append(f" deps = {test_deps_expr},") + if test_named_deps: + items = [f'"{k}": "{v}"' for k, v in sorted(test_named_deps.items())] + lines.append(" named_deps = {") + for it in items: + lines.append(f" {it},") + lines.append(" },") + lines.append(' visibility = ["PUBLIC"],') + lines.append(")") + + # Integration tests under `tests/` (Cargo `kind = ["test"]`). + # + # Cargo gives these crates access to both normal + dev dependencies, and + # also makes the package's library available if it exists. + if test_ts: + test_deps_expr = starlark_deps_expr(test_base_deps, test_conditional_deps) + for test_t in test_ts: + test_name = test_t["name"] + if test_name in SKIP_INTEGRATION_TESTS_BY_PACKAGE.get(pkg["name"], set()): + continue + crate_root = relpath_from_codex_rs(test_t["src_path"]) + crate_root_rel = str(pathlib.Path(crate_root).relative_to(crate_dir.relative_to(CODEX_RS_ROOT))) + + integration_base_deps = list(test_base_deps) + if lib_t is not None: + integration_base_deps = [f":{pkg['name']}"] + integration_base_deps + integration_deps_expr = starlark_deps_expr(integration_base_deps, test_conditional_deps) + + lines.append("") + lines.append("rust_test(") + lines.append(f' name = "{test_name}-integration-test",') + lines.append(f' crate = "{test_name}",') + lines.append(f' crate_root = "{crate_root_rel}",') + lines.append(f" srcs = {src_glob},") + lines.append(f' edition = "{edition}",') + lines.append( + " env = " + + starlark_dict( + { + "CARGO_CRATE_NAME": test_name, + "CARGO_MANIFEST_DIR": ".", + "INSTA_WORKSPACE_ROOT": ".", + "CARGO_PKG_AUTHORS": "", + "CARGO_PKG_DESCRIPTION": "", + "CARGO_PKG_NAME": pkg["name"], + "CARGO_PKG_REPOSITORY": "", + "CARGO_PKG_VERSION": pkg.get("version") or "0.0.0", + "CARGO_PKG_VERSION_MAJOR": ver_major, + "CARGO_PKG_VERSION_MINOR": ver_minor, + "CARGO_PKG_VERSION_PATCH": ver_patch, + "CARGO_PKG_VERSION_PRE": ver_pre, + }, + indent=" ", + ) + + "," + ) + if cargo_run_env: + lines.append(" run_env = " + starlark_dict(cargo_run_env, indent=" ") + ",") + if snap_resources: + lines.append(" resources = " + starlark_dict(snap_resources, indent=" ") + ",") + if features: + lines.append(f" features = {starlark_list(features)},") + lines.append(f" deps = {integration_deps_expr},") + if test_named_deps: + items = [f'"{k}": "{v}"' for k, v in sorted(test_named_deps.items())] + lines.append(" named_deps = {") + for it in items: + lines.append(f" {it},") + lines.append(" },") + lines.append(' visibility = ["PUBLIC"],') + lines.append(")") + + write_buck_file(crate_dir, "\n".join(lines) + "\n") + generated_buck_files.append(crate_dir / "BUCK") + + buildifier(generated_buck_files) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/codex-rs/scripts/gen_reindeer_fixups.py b/codex-rs/scripts/gen_reindeer_fixups.py new file mode 100644 index 00000000000..e71eb28deb0 --- /dev/null +++ b/codex-rs/scripts/gen_reindeer_fixups.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 +""" +Generate Reindeer fixups for third-party crates. + +Reindeer requires an explicit decision for each crate with a build script: +either run it or ignore it. For most third-party crates we run build scripts. + +We intentionally do not generate `extra_srcs` fixups here: + - Reindeer validates fixup globs against its chosen crate source directory, + and with vendoring enabled this can be a filtered view of the package that + does not include top-level docs/fixtures (README, tests data, etc). + - Instead, we include common non-Rust sources via the `codex_rust_*` wrapper + macros in `codex-rs/buck2/reindeer_macros.bzl` using Buck `glob(...)`. + +This script is checked in; its outputs are not. + +The generated fixups live under `codex-rs/third-party/fixups/` and are +intentionally gitignored for now (see the repo root `.gitignore`, which ignores +`codex-rs/third-party/`). This script is designed to be re-run and will +overwrite any existing generated fixups. +""" + +from __future__ import annotations + +import json +import os +import pathlib +import re +import shutil +import subprocess +import sys +from typing import Any + + +CODEX_RS_ROOT = pathlib.Path(__file__).resolve().parents[1] +THIRD_PARTY_DIR = CODEX_RS_ROOT / "third-party" +VENDOR_DIR = THIRD_PARTY_DIR / "vendor" +FIXUPS_DIR = THIRD_PARTY_DIR / "fixups" + + +def cargo_metadata() -> dict[str, Any]: + out = subprocess.check_output( + ["cargo", "metadata", "--format-version=1", "--locked"], + cwd=CODEX_RS_ROOT, + text=True, + ) + return json.loads(out) + + +def reindeer_reachable_package_ids(meta: dict[str, Any]) -> set[str]: + """ + Approximate the set of packages Reindeer cares about for `buckify`. + + Reindeer buckifies dependencies of workspace members, but it does not need + deps that are only reachable through `dev` edges (tests/benches/examples). + """ + + resolve = meta.get("resolve") or {} + nodes = resolve.get("nodes") or [] + by_id: dict[str, Any] = {n["id"]: n for n in nodes} + + roots = list(meta.get("workspace_members") or []) + reachable: set[str] = set() + stack = list(roots) + while stack: + pkg_id = stack.pop() + if pkg_id in reachable: + continue + reachable.add(pkg_id) + + node = by_id.get(pkg_id) + if not node: + continue + + for dep in node.get("deps") or []: + kinds = dep.get("dep_kinds") or [] + if kinds and all(k.get("kind") == "dev" for k in kinds): + continue + stack.append(dep["pkg"]) + + return reachable + + +def vendored_links_value(name: str, version: str) -> str | None: + """ + If the vendored crate declares `links = "..."` in its Cargo.toml, return it. + + Cargo provides this to build scripts via the CARGO_MANIFEST_LINKS env var. + Some build scripts rely on it (e.g. ring). + """ + + cargo_toml = VENDOR_DIR / f"{name}-{version}" / "Cargo.toml" + if not cargo_toml.exists(): + return None + + text = cargo_toml.read_text(encoding="utf-8", errors="replace") + + # Very small parser: search within the [package] section first, then fall back. + pkg_idx = text.find("[package]") + if pkg_idx != -1: + rest = text[pkg_idx:] + next_table = rest.find("\n[", 1) + pkg_block = rest if next_table == -1 else rest[:next_table] + m = re.search(r'(?m)^\s*links\s*=\s*"([^"]+)"\s*$', pkg_block) + if m: + return m.group(1) + + m = re.search(r'(?m)^\s*links\s*=\s*"([^"]+)"\s*$', text) + if m: + return m.group(1) + return None + + +def openssl_dep_version_number_hex() -> str | None: + """ + Derive a DEP_OPENSSL_VERSION_NUMBER value (hex string, no 0x prefix). + + Under Cargo, openssl-sys emits a `cargo:version_number=...` metadata line + and Cargo converts it into DEP_OPENSSL_VERSION_NUMBER for dependents. + Buck's buildscript runner does not currently propagate those DEP_* env vars, + so we approximate the value here for Buck builds. + """ + + def pkg_config_modversion() -> str | None: + try: + out = subprocess.check_output( + ["pkg-config", "--modversion", "openssl"], + cwd=CODEX_RS_ROOT, + text=True, + stderr=subprocess.DEVNULL, + ) + except (OSError, subprocess.CalledProcessError): + return None + v = out.strip() + return v if v else None + + def parse_openssl_version( + v: str, + ) -> tuple[int, int, int, str | None] | None: + """ + Parse versions like: + - 3.0.13 + - 1.1.1w + - 1.1.0h + """ + + m = re.match(r"^(\d+)\.(\d+)\.(\d+)([a-z])?$", v) + if not m: + return None + major, minor, patch = (int(m.group(1)), int(m.group(2)), int(m.group(3))) + suffix = m.group(4) + return (major, minor, patch, suffix) + + v = pkg_config_modversion() + parsed = parse_openssl_version(v) if v else None + + # In CI we install libssl-dev, so pkg-config should generally be available. + # If it isn't, guess OpenSSL 3.x on modern Linux distros. + if parsed is None: + if os.name == "posix": + # 3.0.0: (3<<28)|(0<<20)|(0<<4) + return f"{(3 << 28):x}" + return None + + major, minor, patch, suffix = parsed + + # OpenSSL 3 uses (major<<28)|(minor<<20)|(patch<<4). + if major >= 3: + version = (major << 28) | (minor << 20) | (patch << 4) + return f"{version:x}" + + # OpenSSL 1.x uses 0xMNNFFPPS where PP is the patch number and S is the + # patch status nibble. For lettered patch releases, 'a' => 1, 'b' => 2, etc. + if major == 1: + patch_num = 0 + if suffix: + patch_num = ord(suffix) - ord("a") + 1 + # Release status (0xf) matches Cargo/rust-openssl default behavior. + version = (major << 28) | (minor << 20) | (patch << 12) | (patch_num << 4) | 0xF + return f"{version:x}" + + return None + + +def openssl_dep_conf() -> str | None: + """ + Derive a DEP_OPENSSL_CONF value (comma-separated macro names). + + Under Cargo, openssl-sys emits `cargo:conf=...` and Cargo converts it into + DEP_OPENSSL_CONF for dependents. The `openssl` crate's build script reads + DEP_OPENSSL_CONF and re-emits per-macro `osslconf="..."` cfgs so it can + conditionalize APIs that are disabled in the system OpenSSL build (e.g. + OPENSSL_NO_IDEA). + """ + + def pkg_config_cflags() -> list[str] | None: + try: + out = subprocess.check_output( + ["pkg-config", "--cflags", "openssl"], + cwd=CODEX_RS_ROOT, + text=True, + stderr=subprocess.DEVNULL, + ) + except (OSError, subprocess.CalledProcessError): + return None + return [w for w in out.split() if w] + + def preprocess_opensslconf(cflags: list[str]) -> str | None: + snippet = "#include \n" + for cc in ("cc", "clang"): + try: + return subprocess.check_output( + [cc, "-dM", "-E", "-xc", "-", *cflags], + cwd=CODEX_RS_ROOT, + input=snippet, + text=True, + stderr=subprocess.DEVNULL, + ) + except (OSError, subprocess.CalledProcessError): + continue + return None + + cflags = pkg_config_cflags() + if not cflags: + return None + + out = preprocess_opensslconf(cflags) + if not out: + return None + + # Match the behavior expected by openssl/build.rs: it splits DEP_OPENSSL_CONF + # by ',' and emits cfgs `osslconf="..."` for each token. + conf: set[str] = set() + for line in out.splitlines(): + # Lines look like: "#define OPENSSL_NO_IDEA 1" + m = re.match(r"^#define\s+(OPENSSL_[A-Z0-9_]+)\b", line) + if not m: + continue + macro = m.group(1) + if macro.startswith("OPENSSL_NO_"): + conf.add(macro) + return ",".join(sorted(conf)) if conf else "" + + +def toml_inline_table(entries: dict[str, str]) -> str: + # Emit stable TOML with double-quoted strings. + parts = [f"{k} = {json.dumps(v)}" for k, v in sorted(entries.items())] + return "{ " + ", ".join(parts) + " }" + + +def main() -> int: + meta = cargo_metadata() + packages = meta.get("packages", []) + reachable_ids = reindeer_reachable_package_ids(meta) + + # First-party packages whose build.rs we intentionally ignore under Buck. + buildscript_run_overrides: dict[str, bool] = { + # build.rs only adds rerun-if-changed + "codex-execpolicy-legacy": False, + # Windows-only resource compilation + "codex-windows-sandbox": False, + } + + # Packages where Cargo metadata reports a build script, but Reindeer does not + # require/accept a buildscript fixup in this workspace. + skip_packages: set[str] = { + "indexmap", + "quinn", + } + + reachable_pkgs = [p for p in packages if p["id"] in reachable_ids] + + # name -> version -> has_buildscript + by_name: dict[str, dict[str, bool]] = {} + for pkg in reachable_pkgs: + name = pkg["name"] + version = pkg["version"] + has_buildscript = any( + "custom-build" in (t.get("kind") or []) for t in (pkg.get("targets") or []) + ) + by_name.setdefault(name, {})[version] = has_buildscript + + # Nuke and regenerate: fixups are gitignored and generated. + if FIXUPS_DIR.exists(): + shutil.rmtree(FIXUPS_DIR) + FIXUPS_DIR.mkdir(parents=True, exist_ok=True) + + wrote = 0 + for name, versions in sorted(by_name.items()): + if name in skip_packages: + continue + + buildscript_versions = sorted([v for v, has in versions.items() if has]) + if not buildscript_versions: + continue + + run = buildscript_run_overrides.get(name, True) + + fixup_path = FIXUPS_DIR / name / "fixups.toml" + fixup_path.parent.mkdir(parents=True, exist_ok=True) + + stanzas: list[str] = [] + for v in buildscript_versions: + if run: + links = vendored_links_value(name, v) + stanzas.append(f"['cfg(version = \"={v}\")'.buildscript.run]") + stanzas.append("rustc_link_lib = true") + stanzas.append("rustc_link_search = true") + env_vars: dict[str, str] = {} + # Some crates rely on this being set when `links = "..."` + # exists in Cargo.toml (Cargo passes it to build scripts). + if links: + env_vars["CARGO_MANIFEST_LINKS"] = links + # The `openssl` crate's build script derives cfgs from + # DEP_OPENSSL_VERSION_NUMBER (emitted by openssl-sys). Buck's + # buildscript runner does not currently propagate those DEP_* + # env vars, so approximate the value for Buck builds. + if name == "openssl" and v == "0.10.73": + dep_ver = openssl_dep_version_number_hex() + if dep_ver: + env_vars["DEP_OPENSSL_VERSION_NUMBER"] = dep_ver + dep_conf = openssl_dep_conf() + if dep_conf is not None: + # Empty string is meaningful: "no conf macros enabled". + env_vars["DEP_OPENSSL_CONF"] = dep_conf + elif sys.platform.startswith("linux"): + # If we can't determine the conf macros, be conservative + # on Linux (Ubuntu CI commonly disables IDEA). + env_vars["DEP_OPENSSL_CONF"] = "OPENSSL_NO_IDEA" + if env_vars: + stanzas.append(f"env = {toml_inline_table(env_vars)}") + stanzas.append("") + else: + stanzas.append(f"['cfg(version = \"={v}\")']") + stanzas.append("buildscript.run = false") + stanzas.append("") + + content = "\n".join(stanzas).rstrip() + "\n" + fixup_path.write_text(content, encoding="utf-8") + wrote += 1 + + if wrote: + print( + f"Wrote buildscript fixups.toml for {wrote} crates under " + f"{os.path.relpath(FIXUPS_DIR, CODEX_RS_ROOT)}" + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/codex-rs/scripts/patch_third_party_buck_for_tests.py b/codex-rs/scripts/patch_third_party_buck_for_tests.py new file mode 100644 index 00000000000..8c401581eb4 --- /dev/null +++ b/codex-rs/scripts/patch_third_party_buck_for_tests.py @@ -0,0 +1,510 @@ +#!/usr/bin/env python3 +""" +Patch the Reindeer-generated `codex-rs/third-party/BUCK` to add a small overlay +of third-party crates that are only used as *dev-dependencies* in the Cargo +workspace. + +Reindeer currently generates Buck targets for Cargo "normal" deps (plus build +deps for build scripts), but it does not emit targets for dev-dependencies +because it does not generate rules for Cargo test/example/bench targets. + +For Buck2-driven `rust_test()` targets in first-party crates, we still need a +handful of dev-only third-party crates to exist as targets in +`//codex-rs/third-party`. + +This script is run as part of `scripts/setup_buck2_local.sh` and is intended to +be safe to run repeatedly. +""" + +from __future__ import annotations + +import json +import os +import pathlib +import re +import subprocess +import sys +from dataclasses import dataclass +from typing import Any, Optional + + +CODEX_RS_ROOT = pathlib.Path(__file__).resolve().parents[1] +REPO_ROOT = CODEX_RS_ROOT.parent +BUILDIFIER = REPO_ROOT / "scripts" / "buildifier" +THIRD_PARTY_BUCK = CODEX_RS_ROOT / "third-party" / "BUCK" +VENDOR_DIR = CODEX_RS_ROOT / "third-party" / "vendor" + +BEGIN_MARKER = "# BEGIN CODEX LOCAL DEV TEST DEPS (generated)\n" +END_MARKER = "# END CODEX LOCAL DEV TEST DEPS (generated)\n" + + +def cargo_metadata() -> dict[str, Any]: + out = subprocess.check_output( + ["cargo", "metadata", "--format-version=1", "--locked"], + cwd=CODEX_RS_ROOT, + text=True, + ) + return json.loads(out) + + +def parse_semver(version: str) -> tuple[str, str, str, str]: + m = re.match(r"^(\d+)\.(\d+)\.(\d+)(?:-(.*))?$", version) + if not m: + return ("0", "0", "0", "") + return (m.group(1), m.group(2), m.group(3), m.group(4) or "") + + +def parse_dep_cfg(cfg: Optional[str]) -> list[str]: + """ + Best-effort mapping for cfg(...) strings to Buck2 constraint keys. + This mirrors the mapping in scripts/gen_buck_first_party.py. + """ + if cfg is None: + return [] + + cfg = cfg.strip() + m = re.fullmatch(r'cfg\(target_os\s*=\s*"([^"]+)"\)', cfg) + if m: + os_name = m.group(1) + if os_name in ("linux", "macos", "windows"): + return [f"prelude//os:{os_name}"] + + if cfg == "cfg(windows)": + return ["prelude//os:windows"] + + if cfg == "cfg(unix)": + return ["prelude//os:linux", "prelude//os:macos"] + + return [] + + +def starlark_list(items: list[str], indent: str = " ") -> str: + if not items: + return "[]" + lines = ["["] + for it in items: + lines.append(f'{indent}"{it}",') + lines.append("]") + return "\n".join(lines) + +def starlark_str(s: str) -> str: + # Conservative escaping suitable for Starlark string literals. + return ( + s.replace("\\", "\\\\") + .replace('"', '\\"') + .replace("\r", "\\r") + .replace("\n", "\\n") + ) + + +def starlark_dict(d: dict[str, str], indent: str = " ") -> str: + if not d: + return "{}" + lines = ["{"] + for k, v in d.items(): + lines.append(f'{indent}"{starlark_str(k)}": "{starlark_str(v)}",') + lines.append("}") + return "\n".join(lines) + + +def starlark_deps_expr(base: list[str], conditional: dict[str, list[str]]) -> str: + expr = starlark_list(base) + for constraint, deps in sorted(conditional.items()): + expr = ( + f"{expr} + select({{\n" + f' "{constraint}": {starlark_list(deps, indent=" ")},\n' + ' "DEFAULT": [],\n' + "})" + ) + return expr + + +@dataclass(frozen=True) +class BuckDep: + label: str + local_name: str + crate_name: str + cfg: Optional[str] + + +def group_deps(deps: list[BuckDep]) -> tuple[list[str], dict[str, list[str]], dict[str, str]]: + base_deps: list[str] = [] + conditional_deps: dict[str, list[str]] = {} + named_deps: dict[str, str] = {} + for d in deps: + dep_constraints = parse_dep_cfg(d.cfg) + if dep_constraints: + for c in dep_constraints: + conditional_deps.setdefault(c, []).append(d.label) + else: + base_deps.append(d.label) + + if d.local_name != d.crate_name: + named_deps[d.local_name] = d.label + + base_deps = sorted(set(base_deps)) + for k in list(conditional_deps.keys()): + conditional_deps[k] = sorted(set(conditional_deps[k])) + + return (base_deps, conditional_deps, named_deps) + + +def strip_existing_overlay(text: str) -> str: + start = text.find(BEGIN_MARKER) + if start == -1: + return text + end = text.find(END_MARKER, start) + if end == -1: + # If the file is corrupted, be conservative and do not try to patch. + raise SystemExit(f"Found BEGIN marker but not END marker in {THIRD_PARTY_BUCK}") + return text[:start] + text[end + len(END_MARKER) :] + + +def patch_reindeer_output_for_test_features(text: str) -> str: + """ + Best-effort patching of Reindeer output to make certain dev-only crates build. + + Today, our locally-generated targets for dev-dependencies (e.g. `insta`) may + require feature combinations on their transitive deps that are not enabled + by the normal (non-test) workspace graph that Reindeer buckifies. + + We keep this narrowly-scoped and idempotent. + """ + # `insta` requires `similar` with the `inline` feature. + similar_rule_name = "similar-2.7.0" + lines = text.splitlines(keepends=True) + for i, line in enumerate(lines): + if "name" in line and similar_rule_name in line and "name" in line: + # Walk until the end of this rule. + j = i + while j < len(lines) and lines[j].strip() != ")": + j += 1 + if j >= len(lines): + break + + block = lines[i : j + 1] + # Only treat it as patched if the *feature* is present. The crate + # itself contains an `inline.rs` source file, so substring checks + # would be a false positive. + if any(l.strip() == '"inline",' for l in block): + break + + for k, bl in enumerate(block): + if bl.lstrip().startswith("features = ["): + block.insert(k + 1, ' "inline",\n') + lines[i : j + 1] = block + return "".join(lines) + + # No existing features list; add one just before `visibility`. + for k, bl in enumerate(block): + if bl.lstrip().startswith("visibility ="): + block.insert(k, ' features = ["inline"],\n') + lines[i : j + 1] = block + return "".join(lines) + break + + return text + + +def present_target_names(text: str) -> set[str]: + # This is intentionally simple: it's used only to decide whether a target + # already exists so we don't duplicate it. + # Reindeer emits string literals with escaped quotes (e.g. `name = \"foo\"`) + # so we accept an optional backslash before both quote characters. + return set(re.findall(r'(?m)^\s*name\s*=\s*\\?"([^"]+)\\?"', text)) + + +def vendor_crate_root(pkg_name: str, version: str) -> pathlib.Path: + crate_dir = VENDOR_DIR / f"{pkg_name}-{version}" + # Prefer src/lib.rs, but fall back to whichever crate root Cargo reports. + lib_rs = crate_dir / "src" / "lib.rs" + if lib_rs.exists(): + # Paths in `codex-rs/third-party/BUCK` are relative to the `third-party` + # package directory, so use `vendor/...` rather than an absolute path or + # a `third-party/vendor/...` path. + return pathlib.Path(f"vendor/{pkg_name}-{version}/src/lib.rs") + raise SystemExit(f"Could not find {crate_dir}/src/lib.rs (needed for {pkg_name} {version})") + + +def append_overlay(meta: dict[str, Any], existing_buck: str) -> str: + by_id: dict[str, Any] = {p["id"]: p for p in (meta.get("packages") or [])} + workspace_members: set[str] = set(meta.get("workspace_members") or []) + node_by_id: dict[str, Any] = {n["id"]: n for n in (meta.get("resolve") or {}).get("nodes") or []} + + existing_targets = present_target_names(existing_buck) + + def target_name_for_pkg_id(pid: str) -> str: + p = by_id[pid] + return f"{p['name']}-{p['version']}" + + # Find third-party packages that are reachable via any dev-dependency edge + # from any workspace member. + dev_needed_pkg_ids: set[str] = set() + for pkg_id in workspace_members: + node = node_by_id.get(pkg_id) or {} + for dep in node.get("deps") or []: + dep_id = dep["pkg"] + if dep_id in workspace_members: + continue + dep_kinds = dep.get("dep_kinds") or [] + if not any(k.get("kind") == "dev" for k in dep_kinds): + continue + dep_pkg = by_id.get(dep_id) + if not dep_pkg: + continue + # Only patch registry crates. Workspace members have source=None. + if dep_pkg.get("source") is None: + continue + dev_needed_pkg_ids.add(dep_id) + + # Reindeer buckify does not emit targets for dev-deps. We patch in any dev + # deps that are missing *and* any of their transitive normal deps that are + # also missing (e.g., proc-macro helper crates). + to_add: set[str] = {pid for pid in dev_needed_pkg_ids if target_name_for_pkg_id(pid) not in existing_targets} + queue = list(to_add) + + while queue: + pid = queue.pop() + node = node_by_id.get(pid) or {} + for dep in node.get("deps") or []: + dep_id = dep["pkg"] + dep_pkg = by_id.get(dep_id) + if not dep_pkg: + continue + if dep_pkg.get("source") is None: + continue + + dep_kinds = dep.get("dep_kinds") or [] + if not any(k.get("kind") is None or k.get("kind") == "normal" for k in dep_kinds): + continue + + tgt = target_name_for_pkg_id(dep_id) + if tgt in existing_targets or dep_id in to_add: + continue + + to_add.add(dep_id) + queue.append(dep_id) + + missing_pkg_ids = sorted(to_add, key=lambda pid: (by_id[pid]["name"], by_id[pid]["version"])) + if os.environ.get("CODEX_PATCH_THIRD_PARTY_DEBUG") == "1": + missing_targets = [target_name_for_pkg_id(pid) for pid in missing_pkg_ids] + preview = existing_buck[:120].replace("\n", "\\n") + print(f"patch_third_party_buck_for_tests: buck_preview={preview!r}") + print(f"patch_third_party_buck_for_tests: existing_targets={len(existing_targets)}") + print(f"patch_third_party_buck_for_tests: dev_needed={len(dev_needed_pkg_ids)} missing={len(missing_targets)}") + for t in sorted(missing_targets)[:50]: + print(f" missing: {t}") + if not missing_pkg_ids: + return existing_buck + + lines: list[str] = [] + lines.append("\n" + BEGIN_MARKER.rstrip("\n")) + lines.append("# This section is appended by codex-rs/scripts/patch_third_party_buck_for_tests.py") + lines.append("# to make Buck `rust_test()` targets usable for first-party dev-deps.") + lines.append("") + + for pkg_id in missing_pkg_ids: + pkg = by_id[pkg_id] + name = pkg["name"] + version = pkg["version"] + rule_name = f"{name}-{version}" + if rule_name in existing_targets: + continue + + # Find the lib target to determine the Rust crate name and edition. + lib_targets = [ + t + for t in (pkg.get("targets") or []) + if "lib" in (t.get("kind") or []) or "proc-macro" in (t.get("kind") or []) + ] + if not lib_targets: + # These dev deps should all be libraries. + continue + lib_t = lib_targets[0] + crate_name = lib_t["name"] + edition = pkg.get("edition") or "2021" + ver_major, ver_minor, ver_patch, ver_pre = parse_semver(version) + proc_macro = "proc-macro" in (lib_t.get("kind") or []) + + # Compute deps for the library (normal deps only). + node = node_by_id.get(pkg_id) or {} + deps: list[BuckDep] = [] + for dep in node.get("deps") or []: + dep_id = dep["pkg"] + dep_pkg = by_id.get(dep_id) + if not dep_pkg: + continue + + dep_kinds = dep.get("dep_kinds") or [] + local_name = dep.get("name") or dep_pkg["name"] + dep_crate_name = dep_pkg.get("targets", [{}])[0].get("name") or local_name + dep_label = f":{dep_pkg['name']}-{dep_pkg['version']}" + + # One dependency may have multiple cfg(...) selectors; include them all. + for k in dep_kinds: + kind = k.get("kind") + if kind is None or kind == "normal": + deps.append( + BuckDep( + label=dep_label, + local_name=local_name, + crate_name=dep_crate_name, + cfg=k.get("target"), + ) + ) + + base_deps, conditional_deps, named_deps = group_deps(deps) + deps_expr = starlark_deps_expr(base_deps, conditional_deps) + + crate_root_rel = vendor_crate_root(name, version) + + env = { + "CARGO_CRATE_NAME": crate_name, + "CARGO_MANIFEST_DIR": f"vendor/{name}-{version}", + "CARGO_PKG_AUTHORS": ":".join(pkg.get("authors") or []), + "CARGO_PKG_DESCRIPTION": pkg.get("description") or "", + "CARGO_PKG_NAME": name, + "CARGO_PKG_REPOSITORY": pkg.get("repository") or "", + "CARGO_PKG_VERSION": version, + "CARGO_PKG_VERSION_MAJOR": ver_major, + "CARGO_PKG_VERSION_MINOR": ver_minor, + "CARGO_PKG_VERSION_PATCH": ver_patch, + "CARGO_PKG_VERSION_PRE": ver_pre, + } + + has_build_rs = (VENDOR_DIR / f"{name}-{version}" / "build.rs").exists() + if has_build_rs: + build_bin = f"{rule_name}-build-script-build" + build_run = f"{rule_name}-build-script-run" + build_rs_rel = pathlib.Path(f"vendor/{name}-{version}/build.rs") + + # Build deps for compiling the build script itself. + build_deps: list[BuckDep] = [] + for dep in node.get("deps") or []: + dep_id = dep["pkg"] + dep_pkg = by_id.get(dep_id) + if not dep_pkg: + continue + dep_kinds = dep.get("dep_kinds") or [] + local_name = dep.get("name") or dep_pkg["name"] + dep_crate_name = dep_pkg.get("targets", [{}])[0].get("name") or local_name + dep_label = f":{dep_pkg['name']}-{dep_pkg['version']}" + for k in dep_kinds: + if k.get("kind") == "build": + build_deps.append( + BuckDep( + label=dep_label, + local_name=local_name, + crate_name=dep_crate_name, + cfg=k.get("target"), + ) + ) + build_base_deps, build_conditional_deps, build_named_deps = group_deps(build_deps) + build_deps_expr = starlark_deps_expr(build_base_deps, build_conditional_deps) + + lines.append("codex_rust_binary(") + lines.append(f' name = "{build_bin}",') + lines.append(f' srcs = ["{build_rs_rel}"],') + lines.append(' crate = "build_script_build",') + lines.append(f' crate_root = "{build_rs_rel}",') + lines.append(f' edition = "{edition}",') + lines.append(" env = " + starlark_dict({**env, "CARGO_CRATE_NAME": "build_script_build"}, indent=" ") + ",") + lines.append(' visibility = [],') + if build_base_deps or build_conditional_deps: + lines.append(f" deps = {build_deps_expr},") + if build_named_deps: + items = [f'"{k}": "{v}"' for k, v in sorted(build_named_deps.items())] + lines.append(" named_deps = {") + for it in items: + lines.append(f" {it},") + lines.append(" },") + lines.append(")") + lines.append("") + + lines.append("codex_buildscript_run(") + lines.append(f' name = "{build_run}",') + lines.append(f' package_name = "{name}",') + lines.append(f' buildscript_rule = ":{build_bin}",') + # Keep env small; codex_buildscript_run fills in common profile vars. + lines.append( + " env = " + + starlark_dict( + { + "CARGO_PKG_AUTHORS": env["CARGO_PKG_AUTHORS"], + "CARGO_PKG_DESCRIPTION": env["CARGO_PKG_DESCRIPTION"], + "CARGO_PKG_REPOSITORY": env["CARGO_PKG_REPOSITORY"], + "CARGO_PKG_VERSION_MAJOR": env["CARGO_PKG_VERSION_MAJOR"], + "CARGO_PKG_VERSION_MINOR": env["CARGO_PKG_VERSION_MINOR"], + "CARGO_PKG_VERSION_PATCH": env["CARGO_PKG_VERSION_PATCH"], + "CARGO_PKG_VERSION_PRE": env["CARGO_PKG_VERSION_PRE"], + }, + indent=" ", + ) + + "," + ) + lines.append(" rustc_link_lib = True,") + lines.append(" rustc_link_search = True,") + lines.append(f' version = "{version}",') + lines.append(")") + lines.append("") + + # Build-script outputs for crates that use `OUT_DIR`. + env = {**env, "OUT_DIR": f"$(location :{build_run}[out_dir])"} + + lines.append("codex_rust_library(") + lines.append(f' name = "{rule_name}",') + lines.append(f' srcs = ["{crate_root_rel}"],') + lines.append(f' crate = "{crate_name}",') + lines.append(f' crate_root = "{crate_root_rel}",') + lines.append(f' edition = "{edition}",') + lines.append(" env = " + starlark_dict(env, indent=" ") + ",") + features = sorted([f for f in (node.get("features") or []) if not f.startswith("dep:")]) + if features: + lines.append(f" features = {starlark_list(features)},") + if proc_macro: + lines.append(" proc_macro = True,") + if has_build_rs: + lines.append(f' rustc_flags = ["@$(location :{rule_name}-build-script-run[rustc_flags])"],') + lines.append(" visibility = [],") + if base_deps or conditional_deps: + lines.append(f" deps = {deps_expr},") + if named_deps: + items = [f'"{k}": "{v}"' for k, v in sorted(named_deps.items())] + lines.append(" named_deps = {") + for it in items: + lines.append(f" {it},") + lines.append(" },") + lines.append(")") + lines.append("") + + lines.append(END_MARKER.rstrip("\n") + "\n") + return existing_buck + "\n".join(lines) + + +def main() -> int: + if not THIRD_PARTY_BUCK.exists(): + raise SystemExit(f"Missing {THIRD_PARTY_BUCK}; run `reindeer buckify` first.") + meta = cargo_metadata() + existing = THIRD_PARTY_BUCK.read_text(encoding="utf-8") + existing = strip_existing_overlay(existing) + existing = patch_reindeer_output_for_test_features(existing) + patched = append_overlay(meta, existing) + if patched != existing: + THIRD_PARTY_BUCK.write_text(patched, encoding="utf-8") + if BUILDIFIER.exists(): + # Keep the local-only generated file readable. + proc = subprocess.run( + [str(BUILDIFIER), "-lint=off", "-mode=fix", str(THIRD_PARTY_BUCK)], + cwd=REPO_ROOT, + text=True, + ) + if proc.returncode != 0: + print( + f"warning: buildifier failed (exit {proc.returncode}); leaving {THIRD_PARTY_BUCK} unformatted", + file=sys.stderr, + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/docs/buck2.md b/docs/buck2.md new file mode 100644 index 00000000000..176db83a7b3 --- /dev/null +++ b/docs/buck2.md @@ -0,0 +1,104 @@ +# Buck2 (Experimental, Local-Only) + +This repo has an **experimental** Buck2 + Reindeer setup for `codex-rs`. + +For now, this is intended for **local development only**. We are not ready to: + +- commit generated `BUCK` files for first-party crates +- commit the vendored third-party crate sources under `codex-rs/third-party/` + +Those artifacts are intentionally gitignored to avoid repo bloat while we +evaluate performance and developer experience. + +## Prereqs + +- A working Rust toolchain (see `codex-rs/rust-toolchain.toml`). +- Buck2 + Reindeer are pinned via: + - `./scripts/buck2` + - `./scripts/reindeer` + +## One-Time Setup (Generates Local-Only Files) + +Run: + +```sh +./scripts/setup_buck2_local.sh +``` + +This script will: + +- vendor third-party crates into `codex-rs/third-party/vendor/` (large) +- run `reindeer` to produce third-party Buck targets (gitignored) +- generate first-party `BUCK` files for workspace members in `codex-rs/**/BUCK` (gitignored) +- format generated `BUCK` files with the pinned `./scripts/buildifier` + +Notes: + +- `codex-rs/third-party/` is currently large (on the order of GBs) due to vendoring. +- All generated artifacts live under gitignored paths (see `.gitignore`). + +## Building + +Buck targets are rooted at the repo root. + +Dev build (default): + +```sh +./scripts/buck2 build //codex-rs/cli:codex +``` + +Release-ish build: + +```sh +./scripts/buck2 build -c codex.rust_profile=release //codex-rs/cli:codex +``` + +The `codex.rust_profile` config knob is defined in `.buckconfig` and is used to +approximate Cargo profiles. + +## Testing + +This setup generates `rust_test()` rules for first-party crates, so you can run: + +```sh +./scripts/buck2 test //codex-rs/... +``` + +Targeted runs are often more useful: + +```sh +./scripts/buck2 test //codex-rs/cli:codex-cli-unit-tests +``` + +### Current Limitations + +This is still a work-in-progress, so some tests may fail under Buck2 even if +they pass under Cargo. Common reasons: + +- Integration tests that assume `cargo test` semantics (e.g. using `escargot` to + invoke Cargo during the test). +- Snapshot tests (via `insta`) can behave differently because Buck2 executes + tests under an isolated sandbox with project-relative paths. + +If you see failures, prefer running a smaller target (like a single crate’s unit +tests) while we iterate on broader compatibility. + +## Cleaning Up + +To delete Buck2 outputs/caches for this repo: + +```sh +./scripts/buck2 clean +``` + +To reclaim disk space from the vendored crates, remove: + +```sh +rm -rf codex-rs/third-party +``` + +## Repo Layout Notes + +- The repo root is the Buck root (`.buckroot`). +- `prelude/` is a tiny placeholder directory so `.buckconfig` can declare the + `prelude` cell while using Buck2’s bundled prelude implementation. diff --git a/prelude/README.md b/prelude/README.md new file mode 100644 index 00000000000..07c0bebe640 --- /dev/null +++ b/prelude/README.md @@ -0,0 +1,6 @@ +This directory exists solely to give Buck2 a stable in-repo path for the +`prelude` cell declared in `.buckconfig`. + +The actual prelude implementation is provided by the Buck2 binary itself via +`[external_cells] prelude = bundled`. + diff --git a/scripts/buck2 b/scripts/buck2 new file mode 100755 index 00000000000..82979561dd9 --- /dev/null +++ b/scripts/buck2 @@ -0,0 +1,91 @@ +#!/usr/bin/env dotslash + +{ + "name": "buck2", + "platforms": { + "macos-aarch64": { + "size": 33146442, + "hash": "blake3", + "digest": "2940f7436659784c8e6e628ebdbe01c02b565901e37c0ccc22d910a549c8f56a", + "format": "zst", + "path": "buck2-aarch64-apple-darwin", + "providers": [ + { + "url": "https://github.com/facebook/buck2/releases/download/2025-12-15/buck2-aarch64-apple-darwin.zst" + } + ] + }, + "windows-aarch64": { + "size": 28937384, + "hash": "blake3", + "digest": "1f91d6fd0db612b6ce62b18d9267dec87ea076d83f6bd316d29a3264f9dad4f7", + "format": "zst", + "path": "buck2-aarch64-pc-windows-msvc.exe", + "providers": [ + { + "url": "https://github.com/facebook/buck2/releases/download/2025-12-15/buck2-aarch64-pc-windows-msvc.exe.zst" + } + ] + }, + "linux-aarch64": { + "size": 34435966, + "hash": "blake3", + "digest": "0e0a599cf3d9da6d81fe8b615fefa0d57d0b4c3117cbc9984a74d45aaf061cad", + "format": "zst", + "path": "buck2-aarch64-unknown-linux-musl", + "providers": [ + { + "url": "https://github.com/facebook/buck2/releases/download/2025-12-15/buck2-aarch64-unknown-linux-musl.zst" + } + ] + }, + "linux-riscv64": { + "size": 36811888, + "hash": "blake3", + "digest": "52dcb7df8353283d95858cc03eb4acb4014d4ae06c3ad244f2c32c9ff5e4343c", + "format": "zst", + "path": "buck2-riscv64gc-unknown-linux-gnu", + "providers": [ + { + "url": "https://github.com/facebook/buck2/releases/download/2025-12-15/buck2-riscv64gc-unknown-linux-gnu.zst" + } + ] + }, + "macos-x86_64": { + "size": 35331653, + "hash": "blake3", + "digest": "3834c662779ac9c3de22a8b0e6b85aacc2df21a716fa5a9316374e676ab69e64", + "format": "zst", + "path": "buck2-x86_64-apple-darwin", + "providers": [ + { + "url": "https://github.com/facebook/buck2/releases/download/2025-12-15/buck2-x86_64-apple-darwin.zst" + } + ] + }, + "windows-x86_64": { + "size": 30576683, + "hash": "blake3", + "digest": "9df9a89e9ffba06eef0f3808dd802280380922cf26eba87aebb6313847c4754e", + "format": "zst", + "path": "buck2-x86_64-pc-windows-msvc.exe", + "providers": [ + { + "url": "https://github.com/facebook/buck2/releases/download/2025-12-15/buck2-x86_64-pc-windows-msvc.exe.zst" + } + ] + }, + "linux-x86_64": { + "size": 35814139, + "hash": "blake3", + "digest": "3c78e830016fd68156913a830a5376b278ae366ffc976f155ba3d4b455a2f6e9", + "format": "zst", + "path": "buck2-x86_64-unknown-linux-musl", + "providers": [ + { + "url": "https://github.com/facebook/buck2/releases/download/2025-12-15/buck2-x86_64-unknown-linux-musl.zst" + } + ] + } + } +} diff --git a/scripts/buildifier b/scripts/buildifier new file mode 100755 index 00000000000..7faca665480 --- /dev/null +++ b/scripts/buildifier @@ -0,0 +1,80 @@ +#!/usr/bin/env dotslash + +// Buildifier is published as raw binaries (not archived), so this manifest omits +// the optional `format` field. +// +// TODO: Add `windows-aarch64` once the buildtools releases include a Windows +// arm64 artifact. Support was added in https://github.com/bazelbuild/buildtools/issues/874, +// but as of v8.2.1, only windows-amd64 is available. +{ + "name": "buildifier", + "platforms": { + "macos-aarch64": { + "size": 7717890, + "hash": "blake3", + "digest": "1f01da46c314bd22890d92731b44bcfc555331c87bc113c72c9be13eaa27cb59", + "path": "buildifier", + "providers": [ + { + "url": "https://github.com/bazelbuild/buildtools/releases/download/v8.2.1/buildifier-darwin-arm64" + } + ] + }, + "linux-aarch64": { + "size": 7755744, + "hash": "blake3", + "digest": "9a35407ecf3d4aa0daf493a364c1a68a2a469f0f96c295f85ab29deb46265f16", + "path": "buildifier", + "providers": [ + { + "url": "https://github.com/bazelbuild/buildtools/releases/download/v8.2.1/buildifier-linux-arm64" + } + ] + }, + "linux-riscv64": { + "size": 7895176, + "hash": "blake3", + "digest": "90abd14b0eac635de23273588d2224624541cf6a45f548334252182a45a2f9a6", + "path": "buildifier", + "providers": [ + { + "url": "https://github.com/bazelbuild/buildtools/releases/download/v8.2.1/buildifier-linux-riscv64" + } + ] + }, + "macos-x86_64": { + "size": 7776816, + "hash": "blake3", + "digest": "f367d5e52779571e4da8363b5db2cb60b64c1c97d83ab53de3eaa60d598e8bf1", + "path": "buildifier", + "providers": [ + { + "url": "https://github.com/bazelbuild/buildtools/releases/download/v8.2.1/buildifier-darwin-amd64" + } + ] + }, + "windows-x86_64": { + "size": 8020480, + "hash": "blake3", + "digest": "1aa436cdcbda55ab32ac18bf76940c8bc83d548c424e9e7db660ee38fb9a91fc", + "path": "buildifier.exe", + "providers": [ + { + "url": "https://github.com/bazelbuild/buildtools/releases/download/v8.2.1/buildifier-windows-amd64.exe" + } + ] + }, + "linux-x86_64": { + "size": 7882884, + "hash": "blake3", + "digest": "0405f14324a9dae109a9fffb556b49adff9d6fd710f2648d2a6787e000f6b8f9", + "path": "buildifier", + "providers": [ + { + "url": "https://github.com/bazelbuild/buildtools/releases/download/v8.2.1/buildifier-linux-amd64" + } + ] + } + } +} + diff --git a/scripts/reindeer b/scripts/reindeer new file mode 100755 index 00000000000..73f8fe7d001 --- /dev/null +++ b/scripts/reindeer @@ -0,0 +1,67 @@ +#!/usr/bin/env dotslash + +{ + "name": "reindeer", + "platforms": { + "macos-aarch64": { + "size": 10333626, + "hash": "blake3", + "digest": "087035eba5af3addbbd79354b48ba8c3b5696d1d301ee68023b386e113a68b13", + "format": "zst", + "path": "reindeer-aarch64-apple-darwin", + "providers": [ + { + "url": "https://github.com/facebookincubator/reindeer/releases/download/v2025.11.24.00/reindeer-aarch64-apple-darwin.zst" + } + ] + }, + "linux-aarch64": { + "size": 12023922, + "hash": "blake3", + "digest": "1c41919ddadbdd2390b43a6d4a97267e079038fcb3c42db14868627b0b6eaa6d", + "format": "zst", + "path": "reindeer-aarch64-unknown-linux-musl", + "providers": [ + { + "url": "https://github.com/facebookincubator/reindeer/releases/download/v2025.11.24.00/reindeer-aarch64-unknown-linux-musl.zst" + } + ] + }, + "macos-x86_64": { + "size": 10444977, + "hash": "blake3", + "digest": "8242100bf3b07bd5665c07f42e082a2546b39a2d096c556c19db62d879604c3c", + "format": "zst", + "path": "reindeer-x86_64-apple-darwin", + "providers": [ + { + "url": "https://github.com/facebookincubator/reindeer/releases/download/v2025.11.24.00/reindeer-x86_64-apple-darwin.zst" + } + ] + }, + "windows-x86_64": { + "size": 9285431, + "hash": "blake3", + "digest": "c90e79a55f15b20021eb5ee6ddd4149cb1f6c013cfca4d7ab3902e94b1de0dd3", + "format": "zst", + "path": "reindeer-x86_64-pc-windows-msvc.exe", + "providers": [ + { + "url": "https://github.com/facebookincubator/reindeer/releases/download/v2025.11.24.00/reindeer-x86_64-pc-windows-msvc.exe.zst" + } + ] + }, + "linux-x86_64": { + "size": 12135295, + "hash": "blake3", + "digest": "ad5cdd028d244d47f6ba2368f9c8a9ab1c19451a046b917ce84b07acc7fee319", + "format": "zst", + "path": "reindeer-x86_64-unknown-linux-musl", + "providers": [ + { + "url": "https://github.com/facebookincubator/reindeer/releases/download/v2025.11.24.00/reindeer-x86_64-unknown-linux-musl.zst" + } + ] + } + } +} diff --git a/scripts/setup_buck2_local.sh b/scripts/setup_buck2_local.sh new file mode 100755 index 00000000000..3b7031107af --- /dev/null +++ b/scripts/setup_buck2_local.sh @@ -0,0 +1,218 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +usage() { + cat <<'EOF' +Usage: scripts/setup_buck2_local.sh [--build] + +Sets up local-only Buck2 + Reindeer artifacts for codex-rs: + - vendors crates into codex-rs/third-party/vendor/ + - generates codex-rs/third-party/BUCK via reindeer buckify + - generates codex-rs/**/BUCK for workspace crates + - generates a toolchain definition under codex-rs/toolchains/BUCK + +All generated Buck artifacts are expected to be gitignored (BUCK files, +codex-rs/third-party, buck-out). + +Environment: + REINDEER_BIN Optional path to reindeer binary (default: scripts/reindeer) + BUCK2_BIN Optional path to buck2 binary (default: scripts/buck2) + +If invoked from within Codex CLI, you may want: + env -u BASH_EXEC_WRAPPER -u CODEX_ESCALATE_SOCKET buck2 build //codex-rs/cli:codex +EOF +} + +DO_BUILD=0 +if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then + usage + exit 0 +elif [[ "${1:-}" == "--build" ]]; then + DO_BUILD=1 +elif [[ -n "${1:-}" ]]; then + echo "Unknown argument: $1" >&2 + usage >&2 + exit 2 +fi + +REINDEER_BIN="${REINDEER_BIN:-}" +if [[ -z "${REINDEER_BIN}" ]]; then + if [[ -x "${REPO_ROOT}/scripts/reindeer" ]]; then + REINDEER_BIN="${REPO_ROOT}/scripts/reindeer" + elif command -v reindeer >/dev/null 2>&1; then + REINDEER_BIN="$(command -v reindeer)" + else + echo "Could not find reindeer. Set REINDEER_BIN or use scripts/reindeer." >&2 + exit 1 + fi +fi + +BUCK2_BIN="${BUCK2_BIN:-}" +if [[ -z "${BUCK2_BIN}" ]]; then + if [[ -x "${REPO_ROOT}/scripts/buck2" ]]; then + BUCK2_BIN="${REPO_ROOT}/scripts/buck2" + elif command -v buck2 >/dev/null 2>&1; then + BUCK2_BIN="$(command -v buck2)" + else + echo "Could not find buck2. Set BUCK2_BIN or use scripts/buck2." >&2 + exit 1 + fi +fi + +BUILDIFIER_BIN="${BUILDIFIER_BIN:-}" +if [[ -z "${BUILDIFIER_BIN}" ]]; then + if [[ -e "${REPO_ROOT}/scripts/buildifier" ]]; then + BUILDIFIER_BIN="${REPO_ROOT}/scripts/buildifier" + elif command -v buildifier >/dev/null 2>&1; then + BUILDIFIER_BIN="$(command -v buildifier)" + else + BUILDIFIER_BIN="" + fi +fi + +cd "${REPO_ROOT}" + +# Resolve the Rust toolchain used by codex-rs so Buck uses the same compiler +# regardless of the working directory Buck actions run in. +RUST_TOOLCHAIN="$( + python3 - <<'PY' +import pathlib, re +text = pathlib.Path("codex-rs/rust-toolchain.toml").read_text(encoding="utf-8") +m = re.search(r'(?m)^channel\s*=\s*"([^"]+)"\s*$', text) +if not m: + raise SystemExit("Could not find toolchain channel in codex-rs/rust-toolchain.toml") +print(m.group(1)) +PY +)" + +if command -v rustup >/dev/null 2>&1; then + RUSTC_PATH="$(rustup which rustc --toolchain "${RUST_TOOLCHAIN}")" + RUSTDOC_PATH="$(rustup which rustdoc --toolchain "${RUST_TOOLCHAIN}")" + if rustup which clippy-driver --toolchain "${RUST_TOOLCHAIN}" >/dev/null 2>&1; then + CLIPPY_DRIVER_PATH="$(rustup which clippy-driver --toolchain "${RUST_TOOLCHAIN}")" + else + CLIPPY_DRIVER_PATH="clippy-driver" + fi +else + echo "rustup not found; falling back to rustc/rustdoc from PATH (may not match codex-rs toolchain)." >&2 + RUSTC_PATH="rustc" + RUSTDOC_PATH="rustdoc" + CLIPPY_DRIVER_PATH="clippy-driver" +fi + +# Reindeer canonicalizes third_party_dir up-front, so it must exist. +mkdir -p codex-rs/third-party + +# Ensure we have an ignored Cargo.lock next to the manifest Reindeer uses. +if [[ ! -e "codex-rs/cli/Cargo.lock" ]]; then + ( + cd codex-rs/cli + # Prefer a symlink (fast), but fall back to copying if symlinks are unsupported. + if ln -s ../Cargo.lock Cargo.lock 2>/dev/null; then + true + else + cp ../Cargo.lock Cargo.lock + fi + ) +fi + +# Ensure Buck can load bzl files from codex-rs/buck2 by creating an (ignored) +# BUCK file to define the package. +mkdir -p codex-rs/buck2 +if [[ ! -e "codex-rs/buck2/BUCK" ]]; then + : > codex-rs/buck2/BUCK +fi + +# Generate a minimal toolchain package under the toolchains cell. +mkdir -p codex-rs/toolchains +# This BUCK file is local-only (gitignored), so we bake in absolute tool paths. +cat > codex-rs/toolchains/BUCK </dev/null 2>&1; then + du -sh codex-rs/third-party 2>/dev/null || true + du -sh codex-rs/third-party/vendor 2>/dev/null || true +fi + +echo "" +echo "Next:" +echo " ${BUCK2_BIN} build //codex-rs/cli:codex" + +if [[ "${DO_BUILD}" -eq 1 ]]; then + echo "" + echo "Building //codex-rs/cli:codex ..." + # When running inside Codex CLI, these wrapper env vars can interfere with buck2. + env -u BASH_EXEC_WRAPPER -u CODEX_ESCALATE_SOCKET "${BUCK2_BIN}" build //codex-rs/cli:codex +fi