From 58df47578abd3ad8ff6fa867f6f62c75ac943527 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 26 May 2026 21:57:14 -0700 Subject: [PATCH 01/12] Add extract-to-cache build mode to copilot-sdk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Splits CLI provisioning into two modes selected by the existing `bundled-cli` cargo feature: - `bundled-cli` on (default, release): unchanged. build.rs downloads, extracts on first use via embeddedcli::install_at. - `bundled-cli` off (dev): build.rs downloads, SHA-verifies, and extracts the binary directly into /github-copilot-sdk/cli/ / (staging dir + atomic rename). Emits a build-time COPILOT_CLI_DEV_PATH env var that resolve.rs returns directly. Replaces the three-source version resolution chain in build.rs (COPILOT_CLI_VERSION env, bundled_cli_version.txt snapshot, lockfile fallback) with a two-source chain: - cli-version.txt at the crate root (published crates, vendored slots). - ../nodejs/package-lock.json (mono-repo contributor build) — matches the .NET _GetCopilotCliVersion MSBuild target and Go cmd/bundler conventions. Cache layout flips from /github-copilot-sdk-{version}/ to the shared /github-copilot-sdk/cli// so both build modes populate the same directory. Old per-user caches become orphaned (harmless; the next launch re-populates the new path). COPILOT_CLI_PATH env override still wins in both modes. Stale env override falls through to the next source with a warn! log. Drops: - rust/scripts/snapshot-bundled-cli-version.sh (replaced by an inline node one-liner in the publish workflow that writes cli-version.txt from nodejs/package-lock.json before cargo publish). - COPILOT_CLI_VERSION env override path. - bundled_cli_version.txt snapshot path + parse_snapshot. - cargo:rerun-if-env-changed=COPILOT_CLI_VERSION. Adds tests/cli_resolution_test.rs covering pin-file format, env override (both modes), stale env override fallthrough, and dev-mode extracted binary path. The existing rust-sdk-tests CI job runs with --no-default-features so dev-mode resolution is exercised on every PR; the bundle job exercises embed mode. Both jobs now share the BUNDLED_CLI_CACHE_DIR archive cache to avoid double-downloading. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/publish.yml | 12 +- .github/workflows/rust-sdk-tests.yml | 29 +- rust/.gitignore | 2 +- rust/Cargo.toml | 3 +- rust/README.md | 46 ++-- rust/build.rs | 269 +++++++++++++------ rust/scripts/snapshot-bundled-cli-version.sh | 67 ----- rust/src/embeddedcli.rs | 17 +- rust/src/lib.rs | 1 + rust/src/resolve.rs | 56 +++- rust/tests/cli_resolution_test.rs | 131 +++++++++ 11 files changed, 445 insertions(+), 188 deletions(-) delete mode 100644 rust/scripts/snapshot-bundled-cli-version.sh create mode 100644 rust/tests/cli_resolution_test.rs diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 0c192b50c..f42818103 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -167,12 +167,14 @@ jobs: workspaces: "rust" - name: Set version run: sed -i -E 's/^version = ".*"$/version = "${{ needs.version.outputs.version }}"/' Cargo.toml - - name: Snapshot CLI version + hashes for build.rs - run: bash scripts/snapshot-bundled-cli-version.sh - - name: Verify snapshot file exists + - name: Write cli-version.txt from package-lock.json run: | - if [[ ! -f bundled_cli_version.txt ]]; then - echo "::error::bundled_cli_version.txt was not generated. The Snapshot step must run before packaging." + node -e "console.log(require('../nodejs/package-lock.json').packages['node_modules/@github/copilot'].version)" > cli-version.txt + echo "Wrote cli-version.txt: $(cat cli-version.txt)" + - name: Verify cli-version.txt exists + run: | + if [[ ! -f cli-version.txt ]]; then + echo "::error::cli-version.txt was not generated. The previous step must run before packaging." exit 1 fi - name: Package (dry run) diff --git a/.github/workflows/rust-sdk-tests.yml b/.github/workflows/rust-sdk-tests.yml index 952294e13..23e686eb7 100644 --- a/.github/workflows/rust-sdk-tests.yml +++ b/.github/workflows/rust-sdk-tests.yml @@ -73,18 +73,38 @@ jobs: prefix-key: v1-rust-no-bin cache-bin: false + - name: Read pinned @github/copilot CLI version + id: cli-version + working-directory: ./nodejs + run: | + version=$(node -p "require('./package-lock.json').packages['node_modules/@github/copilot'].version") + echo "version=$version" >> "$GITHUB_OUTPUT" + echo "Pinned CLI version: $version" + + # Share the bundled-CLI archive cache with the `bundle` job: build.rs + # now downloads in both modes (embed for `bundle`, extract-to-cache + # for this `test` job's `--no-default-features` build). + - name: Cache bundled CLI tarball + uses: actions/cache@v4 + with: + path: ./rust/.bundled-cli-cache + key: bundled-cli-${{ matrix.os }}-${{ steps.cli-version.outputs.version }} + - name: cargo fmt --check (nightly) if: runner.os == 'Linux' run: cargo +nightly-2026-04-14 fmt --all -- --config-path .rustfmt.nightly.toml --check - name: cargo clippy if: runner.os == 'Linux' + env: + BUNDLED_CLI_CACHE_DIR: ${{ github.workspace }}/rust/.bundled-cli-cache run: cargo clippy --all-targets --features test-support -- --no-deps -D warnings -D clippy::unwrap_used -D clippy::disallowed_macros -D clippy::await_holding_invalid_type - name: cargo doc if: runner.os == 'Linux' env: RUSTDOCFLAGS: "-D warnings" + BUNDLED_CLI_CACHE_DIR: ${{ github.workspace }}/rust/.bundled-cli-cache run: cargo doc --no-deps --all-features - name: Install test harness dependencies @@ -101,9 +121,12 @@ jobs: RUST_E2E_CONCURRENCY: 4 COPILOT_HMAC_KEY: ${{ secrets.COPILOT_DEVELOPER_CLI_INTEGRATION_HMAC_KEY }} COPILOT_CLI_PATH: ${{ steps.setup-copilot.outputs.cli-path }} - # `--no-default-features` disables the bundled-cli download; the - # tests use the CLI provided by setup-copilot via COPILOT_CLI_PATH. - # The dedicated `bundle` job below exercises the bundling pipeline. + BUNDLED_CLI_CACHE_DIR: ${{ github.workspace }}/rust/.bundled-cli-cache + # `--no-default-features` selects dev mode: build.rs still downloads + # + verifies + extracts the CLI to the per-user cache, but doesn't + # embed it. Tests exec against the setup-copilot CLI via + # COPILOT_CLI_PATH (the env override wins over the dev cache). + # The dedicated `bundle` job below exercises the embed pipeline. run: cargo test --no-default-features --features test-support -- --test-threads=4 --nocapture # Validates the bundled-CLI build path on all three supported diff --git a/rust/.gitignore b/rust/.gitignore index 03bbce707..c4095ffc0 100644 --- a/rust/.gitignore +++ b/rust/.gitignore @@ -1,3 +1,3 @@ /target Cargo.lock.bak -bundled_cli_version.txt +cli-version.txt diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 1e02f267c..383ac3901 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -19,7 +19,7 @@ include = [ "Cargo.toml", "README.md", "LICENSE", - "bundled_cli_version.txt", + "cli-version.txt", ] [lib] @@ -69,6 +69,7 @@ tempfile = "3" tokio = { version = "1", features = ["rt-multi-thread"] } [build-dependencies] +dirs = "5" flate2 = "1" sha2 = "0.10" tar = "0.4" diff --git a/rust/README.md b/rust/README.md index 00e26dbaa..c77615f94 100644 --- a/rust/README.md +++ b/rust/README.md @@ -756,30 +756,39 @@ none of them are scheduled for removal. ## Embedded CLI -The SDK bundles the Copilot CLI binary inside the consumer's compiled crate by default. No env var setup, no separate install — just `cargo build` and you get a self-contained binary. +The SDK provisions the Copilot CLI binary at build time. By default the `bundled-cli` feature embeds the verified binary directly in your compiled crate, so end-user binaries are self-contained — no env var setup, no separate install, just `cargo build`. -To opt out (e.g. for binary-size-sensitive consumers, or environments that provide the CLI via PATH), set `default-features = false`: +For development you usually want the smaller dev build: ```toml github-copilot-sdk = { version = "0.1", default-features = false } ``` +This still pins, downloads, and SHA-verifies the same CLI version, but extracts the binary into a per-user cache instead of embedding it. The runtime resolver returns that cached path. The cache is shared across crates and across embed/dev builds. + ### How it works -1. **Pinned at publish time.** When the rust crate is published, a workflow step writes `bundled_cli_version.txt` (CLI version + per-platform SHA-256 hashes) into the crate from the in-effect `nodejs/package-lock.json` and the matching GitHub Release's `SHA256SUMS.txt`. This file is gitignored locally; it only exists in the published crate tarball. +1. **Version pin.** `build.rs` reads the CLI version from one of two sources: + - `cli-version.txt` at the crate root (present in published crate tarballs and vendored slots). + - Otherwise, `../nodejs/package-lock.json` (mono-repo contributor build — matches the .NET and Go SDK conventions in this repo). + +2. **Build time:** `build.rs` downloads the platform-appropriate archive from the [`github/copilot-cli` GitHub Releases](https://github.com/github/copilot-cli/releases) (`copilot-{platform}.tar.gz` on macOS/Linux, `.zip` on Windows), live-fetches the matching `SHA256SUMS.txt`, and verifies the archive hash. Then: + - **`bundled-cli` on (default, release):** embeds the raw archive bytes via `include_bytes!()`. Runtime extracts on first `Client::start()`. + - **`bundled-cli` off (dev):** extracts the binary directly into the platform cache (staging dir + atomic rename), idempotent across rebuilds. -2. **Build time:** The SDK's `build.rs` resolves the version + per-platform SHA-256: - - `COPILOT_CLI_VERSION` env var (advanced override; fetches live `SHA256SUMS.txt`). - - Otherwise, `bundled_cli_version.txt` from the published crate. - - Otherwise (mono-repo contributor build), live read from `../nodejs/package-lock.json` + live fetch of `SHA256SUMS.txt`. +3. **Runtime:** in both modes the binary lives at: - It then downloads the platform-appropriate archive from the [`github/copilot-cli` GitHub Releases](https://github.com/github/copilot-cli/releases) (`copilot-{platform}.tar.gz` on macOS/Linux, `.zip` on Windows), verifies the SHA-256, extracts the `copilot` binary, compresses it with zstd, and embeds via `include_bytes!()`. + | OS | Path | + |----|------| + | macOS | `~/Library/Caches/github-copilot-sdk/cli//copilot` | + | Linux | `${XDG_CACHE_HOME:-~/.cache}/github-copilot-sdk/cli//copilot` | + | Windows | `%LOCALAPPDATA%\github-copilot-sdk\cli\\copilot.exe` | -3. **Runtime:** On the first call to `github_copilot_sdk::Client::start()`, the embedded archive is lazily extracted to the platform cache dir (`%LOCALAPPDATA%\github-copilot-sdk-{version}\` on Windows, `~/Library/Caches/github-copilot-sdk-{version}/` on macOS, `$XDG_CACHE_HOME/github-copilot-sdk-{version}/` (or `~/.cache/...`) on Linux). Subsequent runs reuse the extracted binary. + Old version directories accumulate in siblings; clean them up at your leisure. ### Overriding the extraction location -Use [`ClientOptions::with_bundled_cli_extract_dir`] when you need to place the extracted binary somewhere other than the platform cache dir (CI runners with ephemeral homes, sandboxes that disallow cache paths, etc.): +[`ClientOptions::with_bundled_cli_extract_dir`] redirects embed-mode extraction to a custom directory (CI runners with ephemeral homes, sandboxes that disallow cache paths, etc.): ```rust,ignore use std::path::PathBuf; @@ -790,15 +799,22 @@ let options = ClientOptions::new() let client = Client::start(options).await?; ``` +In dev mode this option is a no-op — the binary's path is fixed at build time and there is no archive to re-extract. Set `COPILOT_CLI_PATH` instead if you need to point the runtime at a different binary. + ### Resolution priority -`copilot_binary()` checks these sources in order: +`Client::start` resolves the CLI in this order: + +1. Explicit `CliProgram::Path(path)` on `ClientOptions::program`. +2. `COPILOT_CLI_PATH` environment variable, if it points at a real file. +3. **`bundled-cli` on:** the embedded archive, lazily extracted on first call. +4. **`bundled-cli` off:** the build-time-extracted binary in the per-user cache. + +There is no PATH scanning. If none of the above resolves, `Client::start` returns `Error::BinaryNotFound`. -1. Explicit `CliProgram::Path(path)` on `ClientOptions::program` -2. `COPILOT_CLI_PATH` environment variable -3. Embedded CLI (when the `bundled-cli` feature is enabled, which it is by default) +### Download cache (build-time) -There is no PATH scanning. If both 1+2 are unset and the SDK was built with `default-features = false`, `Client::start` returns `Error::BinaryNotFound`. +`build.rs` re-downloads on every clean build by default. Set `BUNDLED_CLI_CACHE_DIR=` to cache the verified archive between builds (CI keys this on `-` for ~zero-cost rebuilds on cache hits). ### Platforms diff --git a/rust/build.rs b/rust/build.rs index c64dbff9b..ea4d80d0e 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -1,22 +1,14 @@ use std::io::Read; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::time::Duration; use sha2::Digest; fn main() { - println!("cargo:rerun-if-env-changed=COPILOT_CLI_VERSION"); println!("cargo:rerun-if-env-changed=BUNDLED_CLI_CACHE_DIR"); println!("cargo::rustc-check-cfg=cfg(has_bundled_cli)"); - - // The `bundled-cli` cargo feature gates bundling at the build-system level. - // When disabled (e.g. via `default-features = false`), runtime archive - // helpers (tar/flate2/zip) are not in the graph and no download happens. - if std::env::var_os("CARGO_FEATURE_BUNDLED_CLI").is_none() { - return; - } - - println!("cargo:rerun-if-changed=bundled_cli_version.txt"); + println!("cargo::rustc-check-cfg=cfg(has_dev_cli)"); + println!("cargo:rerun-if-changed=cli-version.txt"); println!("cargo:rerun-if-changed=../nodejs/package-lock.json"); let Some(platform) = target_platform() else { @@ -27,26 +19,30 @@ fn main() { let out_dir = std::env::var("OUT_DIR").expect("OUT_DIR is always set by cargo"); let out = Path::new(&out_dir); - // Resolve version + per-asset SHA-256 from one of three sources, in order: - // 1. `COPILOT_CLI_VERSION` env-var override (live SHA256SUMS.txt fetch) - // 2. `bundled_cli_version.txt` snapshot at the crate root (published-crate - // consumer; generated by the publish workflow) - // 3. Sibling `../nodejs/package-lock.json` (mono-repo contributor build; - // live SHA256SUMS.txt fetch) - let (version, expected_hash) = resolve_version_and_hash(platform.asset_name); + // Two-source version resolution: + // 1. `cli-version.txt` at the crate root (published-crate consumer, + // vendored-slot consumer). + // 2. Sibling `../nodejs/package-lock.json` (mono-repo contributor build, + // same convention as .NET's `_GetCopilotCliVersion` MSBuild target + // and the Go `cmd/bundler` tool). + // + // No env-var override and no committed per-asset SHA snapshot — the SHA + // is always fetched live against the version's `SHA256SUMS.txt`. + let version = read_version(); + let asset_name = platform.asset_name; + let expected_hash = fetch_live_sha256(&version, asset_name); let base_url = format!("https://github.com/github/copilot-cli/releases/download/v{version}"); let cache_dir = std::env::var("BUNDLED_CLI_CACHE_DIR") .ok() .map(std::path::PathBuf::from); - let asset_name = platform.asset_name; // Versioned cache key since copilot asset names don't include the version. let cache_key = format!("v{version}-{asset_name}"); - // Download the archive (or read from cache) and verify SHA-256. The raw - // archive is what gets embedded — extraction happens at runtime. Quiet on - // cache hit; logs `Downloading` + `Caching archive at` on cache miss. + // Download the archive (or read from cache) and verify SHA-256. Shared + // by both build modes — embed mode includes the bytes, dev mode extracts + // them into the per-user cache. let archive = cached_download( &format!("{base_url}/{asset_name}"), &cache_key, @@ -54,12 +50,32 @@ fn main() { &cache_dir, ); - // Sanity check: the runtime extraction path expects `binary_name` inside - // the archive. Fail the build now (with a clear message) rather than - // shipping a broken bundle if the upstream archive layout ever changes. + // Sanity check: the extraction path expects `binary_name` inside the + // archive. Fail the build now (with a clear message) rather than + // shipping a broken bundle / cache if the upstream archive layout ever + // changes. verify_binary_present_in_archive(&archive, platform.binary_name, asset_name); - std::fs::write(out.join("copilot_cli.archive"), &archive) + if std::env::var_os("CARGO_FEATURE_BUNDLED_CLI").is_some() { + emit_embedded(out, &archive, &version, platform.binary_name); + println!("cargo:rustc-cfg=has_bundled_cli"); + } else { + let dev_path = extract_to_cache(&archive, &version, platform); + // Path is set as a build-time env var so `env!()` in resolve.rs can + // read it without runtime fs probing. The cargo cfg makes the + // dev-mode branch in resolve.rs discoverable. + println!( + "cargo:rustc-env=COPILOT_CLI_DEV_PATH={}", + dev_path.display() + ); + println!("cargo:rustc-cfg=has_dev_cli"); + } +} + +/// Emit the `bundled_cli.rs` glue + `copilot_cli.archive` blob into `OUT_DIR` +/// for embed mode (`bundled-cli` cargo feature on). +fn emit_embedded(out: &Path, archive: &[u8], version: &str, binary_name: &str) { + std::fs::write(out.join("copilot_cli.archive"), archive) .expect("failed to write copilot_cli.archive"); let generated = format!( @@ -68,83 +84,55 @@ pub(super) static CLI_ARCHIVE: &[u8] = include_bytes!("copilot_cli.archive"); pub(super) static CLI_VERSION: &str = "{version}"; pub(super) static CLI_BINARY_NAME: &str = "{binary_name}"; "#, - binary_name = platform.binary_name, ); std::fs::write(out.join("bundled_cli.rs"), generated).expect("failed to write bundled_cli.rs"); - - println!("cargo:rustc-cfg=has_bundled_cli"); } -/// Resolve the CLI version and the expected SHA-256 hash for the current -/// target's archive. Picks one of three sources in order. Panics with a clear -/// error if none are available. -fn resolve_version_and_hash(asset_name: &str) -> (String, String) { - // 1. Env-var override — fetches live SHA256SUMS for the overridden version. - if let Ok(version) = std::env::var("COPILOT_CLI_VERSION") { - let hash = fetch_live_sha256(&version, asset_name); - return (version, hash); - } - - // 2. Snapshot file at the crate root (published-crate consumer). +/// Resolve the CLI version from one of two sources. Panics with a clear +/// error if neither is available. +fn read_version() -> String { let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR is set"); - let snapshot = Path::new(&manifest_dir).join("bundled_cli_version.txt"); - if snapshot.is_file() { - let contents = std::fs::read_to_string(&snapshot) - .unwrap_or_else(|e| panic!("failed to read {}: {e}", snapshot.display())); - return parse_snapshot(&contents, asset_name) - .unwrap_or_else(|e| panic!("invalid {}: {e}", snapshot.display())); + + // 1. Pin file at the crate root (published crates / vendored slots). + let pin = Path::new(&manifest_dir).join("cli-version.txt"); + if pin.is_file() { + let contents = std::fs::read_to_string(&pin) + .unwrap_or_else(|e| panic!("failed to read {}: {e}", pin.display())); + let version = contents.trim(); + if version.is_empty() { + panic!("{} is empty", pin.display()); + } + if version.contains(char::is_whitespace) { + panic!( + "{} contains whitespace; expected a single version line (e.g. `1.0.55-0`)", + pin.display() + ); + } + return version.to_string(); } - // 3. Mono-repo lockfile — read version, fetch live SHA256SUMS. + // 2. Mono-repo lockfile fallback. let lockfile = Path::new(&manifest_dir) .join("..") .join("nodejs") .join("package-lock.json"); if lockfile.is_file() { - let version = read_version_from_package_lock(&lockfile); - let hash = fetch_live_sha256(&version, asset_name); - return (version, hash); + return read_version_from_package_lock(&lockfile); } panic!( - "Could not resolve the Copilot CLI version to bundle.\n\ + "Could not resolve the Copilot CLI version.\n\ Tried:\n\ - - COPILOT_CLI_VERSION env var (unset)\n\ - {} (missing)\n\ - {} (missing)\n\ - To opt out of bundling, set `default-features = false` on the github-copilot-sdk dependency.", - snapshot.display(), + In a published crate or vendored slot, `cli-version.txt` should be present.\n\ + In the github/copilot-sdk mono-repo, `../nodejs/package-lock.json` is the source.", + pin.display(), lockfile.display(), ); } -/// Parse the `bundled_cli_version.txt` snapshot file. Format is one -/// `key=value` per line. The first line is `version=X.Y.Z`; subsequent lines -/// map asset filename to hex SHA-256. Blank lines and lines starting with `#` -/// are skipped. -fn parse_snapshot(contents: &str, asset_name: &str) -> Result<(String, String), String> { - let mut version: Option = None; - let mut hash: Option = None; - for (line_no, raw) in contents.lines().enumerate() { - let line = raw.trim(); - if line.is_empty() || line.starts_with('#') { - continue; - } - let (key, value) = line - .split_once('=') - .ok_or_else(|| format!("line {}: expected `key=value`, got `{raw}`", line_no + 1))?; - match key.trim() { - "version" => version = Some(value.trim().to_string()), - k if k == asset_name => hash = Some(value.trim().to_string()), - _ => {} - } - } - let version = version.ok_or("missing `version=` line")?; - let hash = hash.ok_or_else(|| format!("missing hash for asset `{asset_name}`"))?; - Ok((version, hash)) -} - /// Read the `@github/copilot` version from `nodejs/package-lock.json`. fn read_version_from_package_lock(path: &Path) -> String { let contents = std::fs::read_to_string(path) @@ -180,6 +168,7 @@ fn fetch_live_sha256(version: &str, asset_name: &str) -> String { find_sha256_for_asset(checksums_text, asset_name) } +#[derive(Clone, Copy)] struct Platform { asset_name: &'static str, binary_name: &'static str, @@ -218,6 +207,126 @@ fn target_platform() -> Option { } } +/// Dev-mode extraction: write the single binary entry from `archive` to +/// `/github-copilot-sdk/cli//` (staging +/// dir + atomic rename). Idempotent — returns the existing path if a previous +/// build already populated the target. Mirrors the cache layout +/// `embeddedcli::default_install_dir` uses at runtime so the two modes are +/// interchangeable. +fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBuf { + let cache_root = dirs::cache_dir().unwrap_or_else(std::env::temp_dir); + let install_dir = cache_root + .join("github-copilot-sdk") + .join("cli") + .join(version); + let final_path = install_dir.join(platform.binary_name); + + if final_path.is_file() { + return final_path; + } + + let parent = install_dir + .parent() + .expect("install_dir always has a parent"); + std::fs::create_dir_all(parent) + .unwrap_or_else(|e| panic!("failed to create cache parent {}: {e}", parent.display())); + + // Staging dir is a sibling of the final per-version dir so the atomic + // rename stays on the same filesystem. PID + nanos disambiguate parallel + // builds racing on the same cache. + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + let staging = parent.join(format!( + ".staging-{version}-{pid}-{nanos}", + pid = std::process::id(), + )); + std::fs::create_dir_all(&staging) + .unwrap_or_else(|e| panic!("failed to create staging dir {}: {e}", staging.display())); + + let bytes = extract_binary_bytes(archive, platform); + let staging_binary = staging.join(platform.binary_name); + std::fs::write(&staging_binary, &bytes) + .unwrap_or_else(|e| panic!("failed to write {}: {e}", staging_binary.display())); + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(&staging_binary, std::fs::Permissions::from_mode(0o755)) + .unwrap_or_else(|e| panic!("failed to chmod {}: {e}", staging_binary.display())); + } + + match std::fs::rename(&staging, &install_dir) { + Ok(()) => {} + Err(_) if final_path.is_file() => { + // Another concurrent build won the race; their bytes are the same + // (verified SHA), so just clean up our staging copy. + let _ = std::fs::remove_dir_all(&staging); + } + Err(e) => { + let _ = std::fs::remove_dir_all(&staging); + panic!( + "failed to rename {} -> {}: {e}", + staging.display(), + install_dir.display() + ); + } + } + + final_path +} + +/// Extract the single `binary_name` entry from the release archive. Reused +/// between embed mode's `verify_binary_present_in_archive` and dev mode's +/// `extract_to_cache`. Panics if the entry isn't found — callers have +/// already invoked `verify_binary_present_in_archive`. +fn extract_binary_bytes(archive: &[u8], platform: Platform) -> Vec { + if platform.asset_name.ends_with(".zip") { + let cursor = std::io::Cursor::new(archive); + let mut zip = zip::ZipArchive::new(cursor) + .unwrap_or_else(|e| panic!("failed to open zip archive: {e}")); + for i in 0..zip.len() { + let mut entry = zip + .by_index(i) + .unwrap_or_else(|e| panic!("failed to read zip entry {i}: {e}")); + let name = entry.name().to_string(); + if name == platform.binary_name || name.ends_with(&format!("/{}", platform.binary_name)) + { + let mut bytes = Vec::with_capacity(entry.size() as usize); + std::io::copy(&mut entry, &mut bytes) + .unwrap_or_else(|e| panic!("failed to read zip entry bytes: {e}")); + return bytes; + } + } + } else { + let gz = flate2::read::GzDecoder::new(archive); + let mut tar = tar::Archive::new(gz); + for entry in tar + .entries() + .unwrap_or_else(|e| panic!("failed to read tar entries: {e}")) + { + let mut entry = entry.unwrap_or_else(|e| panic!("failed to read tar entry: {e}")); + let path = entry + .path() + .unwrap_or_else(|e| panic!("failed to read tar entry path: {e}")); + let name = path.to_string_lossy().into_owned(); + if name == platform.binary_name || name.ends_with(&format!("/{}", platform.binary_name)) + { + let mut bytes = Vec::with_capacity(entry.size() as usize); + entry + .read_to_end(&mut bytes) + .unwrap_or_else(|e| panic!("failed to read tar entry bytes: {e}")); + return bytes; + } + } + } + panic!( + "binary `{}` not found in archive `{}` (extract_to_cache)", + platform.binary_name, platform.asset_name + ); +} + /// Read a file from the download cache, or download it (with retries) and save /// to cache. Verifies SHA-256 on every path. Evicts stale/corrupt cache entries /// automatically. Cache I/O failures are treated as cache misses — they never diff --git a/rust/scripts/snapshot-bundled-cli-version.sh b/rust/scripts/snapshot-bundled-cli-version.sh deleted file mode 100644 index b1eb1a2a1..000000000 --- a/rust/scripts/snapshot-bundled-cli-version.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env bash -# -# Snapshot the Copilot CLI version + per-platform SHA-256 hashes for the -# rust crate's bundled-CLI build.rs. Runs at SDK publish time, mirroring -# how .NET's _GenerateVersionProps BeforeTargets="Pack" target writes -# GitHub.Copilot.SDK.props before NuGet packing. -# -# Inputs: -# - ../nodejs/package-lock.json (sibling) - source of the pinned version. -# - https://github.com/github/copilot-cli/releases/v{version}/SHA256SUMS.txt - -# authoritative per-platform hashes. -# -# Output: -# - bundled_cli_version.txt (in the rust crate root). Gitignored. - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -RUST_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" -REPO_ROOT="$(cd "${RUST_DIR}/.." && pwd)" -LOCKFILE="${REPO_ROOT}/nodejs/package-lock.json" -OUTPUT="${RUST_DIR}/bundled_cli_version.txt" - -if [[ ! -f "${LOCKFILE}" ]]; then - echo "error: ${LOCKFILE} not found" >&2 - exit 1 -fi - -VERSION="$(node -e "console.log(require('${LOCKFILE}').packages['node_modules/@github/copilot'].version)")" -if [[ -z "${VERSION}" ]]; then - echo "error: could not read @github/copilot version from ${LOCKFILE}" >&2 - exit 1 -fi - -CHECKSUMS_URL="https://github.com/github/copilot-cli/releases/download/v${VERSION}/SHA256SUMS.txt" -echo "Fetching ${CHECKSUMS_URL}" -SHA256SUMS="$(curl -fsSL --retry 3 --retry-delay 2 "${CHECKSUMS_URL}")" - -ASSETS=( - "copilot-darwin-arm64.tar.gz" - "copilot-darwin-x64.tar.gz" - "copilot-linux-arm64.tar.gz" - "copilot-linux-x64.tar.gz" - "copilot-win32-arm64.zip" - "copilot-win32-x64.zip" -) - -declare -A HASHES -for asset in "${ASSETS[@]}"; do - hash="$(printf '%s\n' "${SHA256SUMS}" | awk -v a="${asset}" '$2 == a { print $1 }')" - if [[ -z "${hash}" ]]; then - echo "error: SHA256SUMS.txt missing entry for ${asset}" >&2 - exit 1 - fi - HASHES[$asset]="${hash}" -done - -{ - echo "# Auto-generated by rust/scripts/snapshot-bundled-cli-version.sh" - echo "# Do not edit. Regenerated by the publish workflow on every release." - echo "version=${VERSION}" - for asset in "${ASSETS[@]}"; do - echo "${asset}=${HASHES[$asset]}" - done -} > "${OUTPUT}" - -echo "Wrote ${OUTPUT} (version=${VERSION}, ${#ASSETS[@]} hashes)" \ No newline at end of file diff --git a/rust/src/embeddedcli.rs b/rust/src/embeddedcli.rs index e1eb147dc..97f31e74d 100644 --- a/rust/src/embeddedcli.rs +++ b/rust/src/embeddedcli.rs @@ -4,7 +4,8 @@ //! //! build.rs downloads the platform's `copilot-{platform}.{tar.gz,zip}` //! archive from GitHub Releases, SHA-256 verifies it against the version -//! pinned in `bundled_cli_version.txt`, and embeds the **raw archive bytes** +//! pinned in `cli-version.txt` (or `../nodejs/package-lock.json` in the +//! mono-repo), and embeds the **raw archive bytes** //! into the consumer's compiled artifact via `include_bytes!()`. Extraction //! to a real on-disk path is deferred until the first call to //! [`path`] / [`install_at`] — at which point the bytes are part of the @@ -31,14 +32,15 @@ mod build_time { include!(concat!(env!("OUT_DIR"), "/bundled_cli.rs")); } +#[cfg(feature = "bundled-cli")] static INSTALLED_PATH: OnceLock> = OnceLock::new(); /// Returns the path to the installed CLI binary, lazily extracting the /// embedded archive on first call. /// /// On first call this extracts the embedded archive to -/// `/github-copilot-sdk-{version}/copilot[.exe]` and -/// returns the resulting path. The cache dir comes from +/// `/github-copilot-sdk/cli//copilot[.exe]` +/// and returns the resulting path. The cache dir comes from /// [`dirs::cache_dir()`] — `%LOCALAPPDATA%` on Windows, /// `~/Library/Caches/` on macOS, `$XDG_CACHE_HOME` (or `~/.cache/`) on /// Linux. Subsequent calls return the cached result. The extraction @@ -47,6 +49,7 @@ static INSTALLED_PATH: OnceLock> = OnceLock::new(); /// trusted mean no further hashing is needed. /// /// Returns `None` if no CLI was embedded at build time. +#[cfg(feature = "bundled-cli")] pub(crate) fn path() -> Option { INSTALLED_PATH .get_or_init(|| { @@ -69,11 +72,12 @@ pub(crate) fn path() -> Option { } /// Install the embedded CLI binary into the given directory instead of the -/// default `/github-copilot-sdk-{version}/` location +/// default `/github-copilot-sdk/cli//` location /// (see [`path`] for the per-platform mapping). /// /// Idempotent: skips extraction if the target binary already exists. /// Returns `None` when the SDK was built without a bundled CLI. +#[cfg(feature = "bundled-cli")] #[allow(dead_code)] // Used by resolve.rs when ClientOptions::bundled_cli_extract_dir is set. pub(crate) fn install_at(extract_dir: &Path) -> Option { #[cfg(has_bundled_cli)] @@ -98,10 +102,11 @@ pub(crate) fn install_at(extract_dir: &Path) -> Option { #[cfg(has_bundled_cli)] fn default_install_dir(version: &str) -> PathBuf { let cache = dirs::cache_dir().unwrap_or_else(std::env::temp_dir); + let root = cache.join("github-copilot-sdk").join("cli"); if version.is_empty() { - cache.join("github-copilot-sdk") + root.join("unversioned") } else { - cache.join(format!("github-copilot-sdk-{}", sanitize_version(version))) + root.join(sanitize_version(version)) } } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index cad6ee629..ee8606740 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -7,6 +7,7 @@ pub mod canvas; mod canvas_dispatch; /// Bundled CLI binary extraction and caching. +#[cfg(feature = "bundled-cli")] pub(crate) mod embeddedcli; /// Event handler traits for session lifecycle. pub mod handler; diff --git a/rust/src/resolve.rs b/rust/src/resolve.rs index 7a1b29a04..c4346d9bc 100644 --- a/rust/src/resolve.rs +++ b/rust/src/resolve.rs @@ -7,10 +7,11 @@ //! 2. The `COPILOT_CLI_PATH` environment variable. //! 3. The bundled CLI embedded in this crate at build time (gated on the //! default `bundled-cli` cargo feature). +//! 4. The build-time-extracted CLI in the per-user cache (when +//! `bundled-cli` is disabled, i.e. dev builds). //! //! There is no PATH scanning and no walking of standard install locations. -//! If you've opted out of bundling (via `default-features = false`) and -//! neither `CliProgram::Path` nor `COPILOT_CLI_PATH` is set, +//! If none of the above resolves to a real file, //! [`Client::start`](crate::Client::start) returns //! [`Error::BinaryNotFound`](crate::Error::BinaryNotFound). @@ -24,7 +25,9 @@ use crate::Error; /// Resolve the CLI binary, optionally overriding the directory the bundled /// CLI is extracted to. Called by `Client::start` to thread /// `ClientOptions::bundled_cli_extract_dir` through to -/// `embeddedcli::install_at`. +/// `embeddedcli::install_at`. `extract_dir` only affects embed mode — in +/// dev mode the binary path is baked in at build time and `extract_dir` +/// is ignored (there's no archive to re-extract). pub(crate) fn copilot_binary_with_extract_dir( extract_dir: Option<&Path>, ) -> Result { @@ -35,16 +38,27 @@ pub(crate) fn copilot_binary_with_extract_dir( } warn!( path = %candidate.display(), - "COPILOT_CLI_PATH is set but does not point to a file; falling back to bundled CLI" + "COPILOT_CLI_PATH is set but does not point to a file; falling back" ); } - let bundled = match extract_dir { - Some(dir) => crate::embeddedcli::install_at(dir), - None => crate::embeddedcli::path(), - }; - if let Some(path) = bundled { - return Ok(path); + #[cfg(feature = "bundled-cli")] + { + let bundled = match extract_dir { + Some(dir) => crate::embeddedcli::install_at(dir), + None => crate::embeddedcli::path(), + }; + if let Some(path) = bundled { + return Ok(path); + } + } + + #[cfg(not(feature = "bundled-cli"))] + { + let _ = extract_dir; + if let Some(path) = dev_cli_path() { + return Ok(path); + } } Err(Error::BinaryNotFound { @@ -55,3 +69,25 @@ pub(crate) fn copilot_binary_with_extract_dir( `CliProgram::Path(...)` on `ClientOptions::program`.", }) } + +/// Path to the CLI extracted into the per-user cache by `build.rs` when +/// `bundled-cli` is disabled. Returns `None` if the cached file is missing +/// (e.g. the user deleted the cache after building). +#[cfg(not(feature = "bundled-cli"))] +fn dev_cli_path() -> Option { + // `has_dev_cli` is emitted by build.rs only when it successfully extracted + // the CLI for a supported target. On unsupported targets (where + // target_platform() returns None) the cfg is absent. + #[cfg(has_dev_cli)] + { + let path = PathBuf::from(env!("COPILOT_CLI_DEV_PATH")); + if path.is_file() { + return Some(path); + } + warn!( + path = %path.display(), + "build-time-extracted CLI is missing from cache; rebuild the crate to re-extract" + ); + } + None +} diff --git a/rust/tests/cli_resolution_test.rs b/rust/tests/cli_resolution_test.rs new file mode 100644 index 000000000..a839ffda9 --- /dev/null +++ b/rust/tests/cli_resolution_test.rs @@ -0,0 +1,131 @@ +//! Tests for the build-time and runtime CLI provisioning path. +//! +//! Covers the `COPILOT_CLI_PATH` env override, the dev-mode (no +//! `bundled-cli`) build-time extracted binary, and the embed-mode lazy +//! extraction. Mutating `COPILOT_CLI_PATH` is process-global, so all such +//! tests use `serial_test` to avoid races with each other (and with the +//! e2e tests which also read it). + +use std::path::PathBuf; + +use github_copilot_sdk::{CliProgram, Client, ClientOptions}; +use serial_test::serial; + +fn unset_env(key: &str) { + // SAFETY: tests are serialized with #[serial], so no other thread can + // observe the env mid-write. This is the standard pattern for testing + // env-driven behavior in this crate. + unsafe { std::env::remove_var(key) }; +} + +fn set_env(key: &str, value: &str) { + unsafe { std::env::set_var(key, value) }; +} + +/// COPILOT_CLI_PATH wins when it points at a real file, regardless of +/// build mode. +#[tokio::test] +#[serial(copilot_cli_path)] +async fn env_override_resolves_to_pointed_file() { + let tmp = tempfile::NamedTempFile::new().expect("create tempfile"); + // Make the temp file executable on POSIX so resolve doesn't reject it; + // resolve only checks `is_file()`, but downstream `Client::start` + // wants to exec the binary. We only need the resolver to return the + // path here, so a `--version`-like wrapper isn't required. + let path = tmp.path().to_path_buf(); + + set_env( + "COPILOT_CLI_PATH", + path.to_str().expect("utf-8 tempfile path"), + ); + let opts = ClientOptions::default().with_program(CliProgram::Resolve); + + // `Client::start` reads the env var via resolve.rs. We don't want to + // actually launch a subprocess against our empty temp file, so go + // through the public API just far enough to observe the resolution. + // The easiest observable behavior is that `Client::start` doesn't + // return `Error::BinaryNotFound` — it'll fail later trying to exec + // the empty file, which we tolerate. + let result = Client::start(opts).await; + unset_env("COPILOT_CLI_PATH"); + + match result { + Ok(_) => {} + Err(e) => { + let msg = format!("{e}"); + assert!( + !msg.contains("not found"), + "expected COPILOT_CLI_PATH to win; got {msg}" + ); + } + } + + // Drop tmp explicitly so the file outlives the assertions above. + drop(tmp); + let _ = path; +} + +/// A stale (non-existent) COPILOT_CLI_PATH falls through to the next +/// resolution source (embed or dev) rather than failing outright. +#[tokio::test] +#[serial(copilot_cli_path)] +async fn stale_env_override_falls_through() { + set_env("COPILOT_CLI_PATH", "/definitely/does/not/exist/copilot"); + let opts = ClientOptions::default().with_program(CliProgram::Resolve); + let result = Client::start(opts).await; + unset_env("COPILOT_CLI_PATH"); + + // In a normally-configured build (either bundled-cli or dev mode) the + // resolver should find a binary via the next source. Failing here + // would mean fallthrough is broken. + if let Err(e) = &result { + let msg = format!("{e}"); + assert!( + !msg.contains("BinaryNotFound") && !msg.contains("not bundled"), + "stale COPILOT_CLI_PATH should fall through to bundled/dev CLI; got {msg}" + ); + } +} + +/// In dev mode (no `bundled-cli` feature) build.rs writes the extracted +/// binary into the per-user cache and emits its path as +/// `COPILOT_CLI_DEV_PATH`. The runtime resolver returns that path. +#[cfg(not(feature = "bundled-cli"))] +#[test] +fn dev_mode_extracted_binary_exists() { + let path = PathBuf::from(env!("COPILOT_CLI_DEV_PATH")); + assert!( + path.is_file(), + "expected build.rs to extract the CLI to {} (dev mode)", + path.display() + ); + + // Confirm the cache layout matches what runtime resolution expects. + let display = path.display().to_string(); + assert!( + display.contains("github-copilot-sdk") && display.contains("/cli/"), + "dev path {} does not match expected `github-copilot-sdk/cli//` layout", + display + ); +} + +/// Build-time version pin: `cli-version.txt` (when present) must be a +/// single-line non-empty exact version. When absent, build.rs falls +/// through to `../nodejs/package-lock.json` — both are accepted, this +/// test only checks the pin file's format if it's there. +#[test] +fn pin_file_when_present_is_well_formed() { + let manifest_dir = env!("CARGO_MANIFEST_DIR"); + let pin = PathBuf::from(manifest_dir).join("cli-version.txt"); + if !pin.is_file() { + // Mono-repo build path — no assertion needed. + return; + } + let contents = std::fs::read_to_string(&pin).expect("read cli-version.txt"); + let trimmed = contents.trim(); + assert!(!trimmed.is_empty(), "cli-version.txt is empty"); + assert!( + !trimmed.contains(char::is_whitespace), + "cli-version.txt should be a single version line, got {trimmed:?}" + ); +} From 8fbe8e54aea31a4e4cb0d2f8f312d8a387a84735 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 26 May 2026 22:00:45 -0700 Subject: [PATCH 02/12] Drop 'mono-repo' jargon in favor of 'github/copilot-sdk repo' Term isn't established in this repo's prose. The fallback path that reads ../nodejs/package-lock.json fires for contributor builds inside github/copilot-sdk itself; spelling it out is clearer than introducing a new term. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/README.md | 2 +- rust/build.rs | 11 ++++++----- rust/src/embeddedcli.rs | 5 +++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/rust/README.md b/rust/README.md index c77615f94..813d42418 100644 --- a/rust/README.md +++ b/rust/README.md @@ -770,7 +770,7 @@ This still pins, downloads, and SHA-verifies the same CLI version, but extracts 1. **Version pin.** `build.rs` reads the CLI version from one of two sources: - `cli-version.txt` at the crate root (present in published crate tarballs and vendored slots). - - Otherwise, `../nodejs/package-lock.json` (mono-repo contributor build — matches the .NET and Go SDK conventions in this repo). + - Otherwise, `../nodejs/package-lock.json` (contributor build inside the github/copilot-sdk repo — matches the .NET and Go SDK conventions here). 2. **Build time:** `build.rs` downloads the platform-appropriate archive from the [`github/copilot-cli` GitHub Releases](https://github.com/github/copilot-cli/releases) (`copilot-{platform}.tar.gz` on macOS/Linux, `.zip` on Windows), live-fetches the matching `SHA256SUMS.txt`, and verifies the archive hash. Then: - **`bundled-cli` on (default, release):** embeds the raw archive bytes via `include_bytes!()`. Runtime extracts on first `Client::start()`. diff --git a/rust/build.rs b/rust/build.rs index ea4d80d0e..782135101 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -22,9 +22,10 @@ fn main() { // Two-source version resolution: // 1. `cli-version.txt` at the crate root (published-crate consumer, // vendored-slot consumer). - // 2. Sibling `../nodejs/package-lock.json` (mono-repo contributor build, - // same convention as .NET's `_GetCopilotCliVersion` MSBuild target - // and the Go `cmd/bundler` tool). + // 2. Sibling `../nodejs/package-lock.json` (contributor build inside + // the github/copilot-sdk repo — same convention as .NET's + // `_GetCopilotCliVersion` MSBuild target and the Go `cmd/bundler` + // tool). // // No env-var override and no committed per-asset SHA snapshot — the SHA // is always fetched live against the version's `SHA256SUMS.txt`. @@ -112,7 +113,7 @@ fn read_version() -> String { return version.to_string(); } - // 2. Mono-repo lockfile fallback. + // 2. Lockfile fallback (contributor build inside github/copilot-sdk). let lockfile = Path::new(&manifest_dir) .join("..") .join("nodejs") @@ -127,7 +128,7 @@ fn read_version() -> String { - {} (missing)\n\ - {} (missing)\n\ In a published crate or vendored slot, `cli-version.txt` should be present.\n\ - In the github/copilot-sdk mono-repo, `../nodejs/package-lock.json` is the source.", + Inside the github/copilot-sdk repo, `../nodejs/package-lock.json` is the source.", pin.display(), lockfile.display(), ); diff --git a/rust/src/embeddedcli.rs b/rust/src/embeddedcli.rs index 97f31e74d..fe571616e 100644 --- a/rust/src/embeddedcli.rs +++ b/rust/src/embeddedcli.rs @@ -4,8 +4,9 @@ //! //! build.rs downloads the platform's `copilot-{platform}.{tar.gz,zip}` //! archive from GitHub Releases, SHA-256 verifies it against the version -//! pinned in `cli-version.txt` (or `../nodejs/package-lock.json` in the -//! mono-repo), and embeds the **raw archive bytes** +//! pinned in `cli-version.txt` (or `../nodejs/package-lock.json` when +//! building inside the github/copilot-sdk repo itself), and embeds the +//! **raw archive bytes** //! into the consumer's compiled artifact via `include_bytes!()`. Extraction //! to a real on-disk path is deferred until the first call to //! [`path`] / [`install_at`] — at which point the bytes are part of the From 11db905d85b27ac53d565087ae6512961fa6f7bc Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 26 May 2026 22:08:29 -0700 Subject: [PATCH 03/12] Restore per-platform SHA snapshot to keep publish-time trust boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without a committed SHA snapshot, build.rs would fetch both `SHA256SUMS.txt` and the archive from the same release URL on every consumer build. An attacker who later compromises a release tag could swap both files together and the hash check would pass against their own sum — the verification would only protect against transit corruption, not against upstream tampering. Restore the snapshot model: - New `rust/scripts/write-cli-shas.sh` writes `cli-shas.txt` with one `=` line per supported platform, fetched from the release matching `cli-version.txt`. Gitignored locally. - `build.rs::read_expected_sha` prefers the snapshot when present; falls back to live `SHA256SUMS.txt` only when absent (contributor builds inside github/copilot-sdk). - Publish workflow runs `write-cli-shas.sh` after writing cli-version.txt and before `cargo publish`, so the published crate carries hashes captured at publish time. Same flow as the old snapshot-bundled-cli-version.sh, just split across one-line pin file + sibling SHAs file. - `Cargo.toml` includes `cli-shas.txt` in the crate package. Verified locally: - Snapshot path: build with both `cli-version.txt` + `cli-shas.txt` present — skips the live SHA fetch, downloads + verifies against the committed hash. - Tamper detection: corrupting a hash in `cli-shas.txt` causes the build to panic with the existing "could indicate a supply-chain attack" message. - Fallback path: with neither pin file present, build.rs reads version from `../nodejs/package-lock.json` and fetches SHA256SUMS.txt live — unchanged from the pre-snapshot contributor flow. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/publish.yml | 14 +++++--- rust/.gitignore | 1 + rust/Cargo.toml | 1 + rust/build.rs | 58 ++++++++++++++++++++++++++++-- rust/scripts/write-cli-shas.sh | 65 ++++++++++++++++++++++++++++++++++ 5 files changed, 131 insertions(+), 8 deletions(-) create mode 100755 rust/scripts/write-cli-shas.sh diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index f42818103..43b3143eb 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -171,12 +171,16 @@ jobs: run: | node -e "console.log(require('../nodejs/package-lock.json').packages['node_modules/@github/copilot'].version)" > cli-version.txt echo "Wrote cli-version.txt: $(cat cli-version.txt)" - - name: Verify cli-version.txt exists + - name: Snapshot per-platform SHAs for build.rs + run: bash scripts/write-cli-shas.sh + - name: Verify pin files exist run: | - if [[ ! -f cli-version.txt ]]; then - echo "::error::cli-version.txt was not generated. The previous step must run before packaging." - exit 1 - fi + for f in cli-version.txt cli-shas.txt; do + if [[ ! -f "$f" ]]; then + echo "::error::$f was not generated. Previous steps must run before packaging." + exit 1 + fi + done - name: Package (dry run) run: cargo publish --dry-run --allow-dirty - name: Upload artifact diff --git a/rust/.gitignore b/rust/.gitignore index c4095ffc0..79afedd4c 100644 --- a/rust/.gitignore +++ b/rust/.gitignore @@ -1,3 +1,4 @@ /target Cargo.lock.bak cli-version.txt +cli-shas.txt diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 383ac3901..57ee3be86 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -20,6 +20,7 @@ include = [ "README.md", "LICENSE", "cli-version.txt", + "cli-shas.txt", ] [lib] diff --git a/rust/build.rs b/rust/build.rs index 782135101..e74ff0d90 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -9,6 +9,7 @@ fn main() { println!("cargo::rustc-check-cfg=cfg(has_bundled_cli)"); println!("cargo::rustc-check-cfg=cfg(has_dev_cli)"); println!("cargo:rerun-if-changed=cli-version.txt"); + println!("cargo:rerun-if-changed=cli-shas.txt"); println!("cargo:rerun-if-changed=../nodejs/package-lock.json"); let Some(platform) = target_platform() else { @@ -27,11 +28,28 @@ fn main() { // `_GetCopilotCliVersion` MSBuild target and the Go `cmd/bundler` // tool). // - // No env-var override and no committed per-asset SHA snapshot — the SHA - // is always fetched live against the version's `SHA256SUMS.txt`. + // Two-source version resolution: + // 1. `cli-version.txt` at the crate root (published-crate consumer, + // vendored-slot consumer). + // 2. Sibling `../nodejs/package-lock.json` (contributor build inside + // the github/copilot-sdk repo — same convention as .NET's + // `_GetCopilotCliVersion` MSBuild target and the Go `cmd/bundler` + // tool). + // + // SHA-256 resolution mirrors that: + // 1. `cli-shas.txt` at the crate root — per-asset hashes committed at + // publish time, mapping `=` per line. This + // is the trust boundary for published crates / vendored slots: + // verifying the download against a publish-time-committed hash + // means an attacker who later re-points the release tag can't + // silently swap in poisoned bytes on later consumer builds. + // 2. Live fetch of `SHA256SUMS.txt` from the release URL. Only fires + // when no snapshot is committed (i.e. contributor builds inside + // this repo, which already trust the live release for the + // version too). let version = read_version(); let asset_name = platform.asset_name; - let expected_hash = fetch_live_sha256(&version, asset_name); + let expected_hash = read_expected_sha(&version, asset_name); let base_url = format!("https://github.com/github/copilot-cli/releases/download/v{version}"); let cache_dir = std::env::var("BUNDLED_CLI_CACHE_DIR") @@ -90,6 +108,40 @@ pub(super) static CLI_BINARY_NAME: &str = "{binary_name}"; std::fs::write(out.join("bundled_cli.rs"), generated).expect("failed to write bundled_cli.rs"); } +/// Resolve the SHA-256 hash for `asset_name`. Prefers the committed +/// snapshot at `cli-shas.txt`; falls back to live fetch of the release's +/// `SHA256SUMS.txt`. See the doc comment in `main` for the threat-model +/// rationale. +fn read_expected_sha(version: &str, asset_name: &str) -> String { + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR is set"); + let snapshot = Path::new(&manifest_dir).join("cli-shas.txt"); + if snapshot.is_file() { + let contents = std::fs::read_to_string(&snapshot) + .unwrap_or_else(|e| panic!("failed to read {}: {e}", snapshot.display())); + return parse_sha_snapshot(&contents, asset_name) + .unwrap_or_else(|e| panic!("invalid {}: {e}", snapshot.display())); + } + fetch_live_sha256(version, asset_name) +} + +/// Parse `cli-shas.txt`. Format is one `=` per line. +/// Blank lines and `#`-prefixed lines are skipped. +fn parse_sha_snapshot(contents: &str, asset_name: &str) -> Result { + for (line_no, raw) in contents.lines().enumerate() { + let line = raw.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + let (key, value) = line + .split_once('=') + .ok_or_else(|| format!("line {}: expected `key=value`, got `{raw}`", line_no + 1))?; + if key.trim() == asset_name { + return Ok(value.trim().to_string()); + } + } + Err(format!("missing hash for asset `{asset_name}`")) +} + /// Resolve the CLI version from one of two sources. Panics with a clear /// error if neither is available. fn read_version() -> String { diff --git a/rust/scripts/write-cli-shas.sh b/rust/scripts/write-cli-shas.sh new file mode 100755 index 000000000..3a8cafa93 --- /dev/null +++ b/rust/scripts/write-cli-shas.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# +# Snapshot per-platform SHA-256 hashes for the Copilot CLI release matching +# `cli-version.txt` into `cli-shas.txt`. Run by the publish workflow before +# `cargo publish`, and by `github/github-app`'s `scripts/sync-copilot-sdk.sh` +# when vendoring the SDK. +# +# Committing these hashes makes the publish workflow the trust boundary: +# every later consumer build verifies the downloaded archive against a +# hash that was captured at publish time, so an attacker who later +# re-points the release tag can't silently swap in poisoned bytes. +# +# Inputs: +# - cli-version.txt (in the rust crate root) - pinned CLI version. +# - https://github.com/github/copilot-cli/releases/v${VERSION}/SHA256SUMS.txt +# +# Output: +# - cli-shas.txt (in the rust crate root). Gitignored locally. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RUST_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +VERSION_FILE="${RUST_DIR}/cli-version.txt" +OUTPUT="${RUST_DIR}/cli-shas.txt" + +if [[ ! -f "${VERSION_FILE}" ]]; then + echo "error: ${VERSION_FILE} not found; write it before running this script" >&2 + exit 1 +fi + +VERSION="$(tr -d '[:space:]' < "${VERSION_FILE}")" +if [[ -z "${VERSION}" ]]; then + echo "error: ${VERSION_FILE} is empty" >&2 + exit 1 +fi + +CHECKSUMS_URL="https://github.com/github/copilot-cli/releases/download/v${VERSION}/SHA256SUMS.txt" +echo "Fetching ${CHECKSUMS_URL}" +SHA256SUMS="$(curl -fsSL --retry 3 --retry-delay 2 "${CHECKSUMS_URL}")" + +ASSETS=( + "copilot-darwin-arm64.tar.gz" + "copilot-darwin-x64.tar.gz" + "copilot-linux-arm64.tar.gz" + "copilot-linux-x64.tar.gz" + "copilot-win32-arm64.zip" + "copilot-win32-x64.zip" +) + +{ + echo "# Auto-generated by rust/scripts/write-cli-shas.sh" + echo "# Per-platform SHA-256 hashes for Copilot CLI v${VERSION}." + echo "# Do not edit by hand; regenerate after bumping cli-version.txt." + for asset in "${ASSETS[@]}"; do + hash="$(printf '%s\n' "${SHA256SUMS}" | awk -v a="${asset}" '$2 == a { print $1 }')" + if [[ -z "${hash}" ]]; then + echo "error: SHA256SUMS.txt missing entry for ${asset}" >&2 + exit 1 + fi + echo "${asset}=${hash}" + done +} > "${OUTPUT}" + +echo "Wrote ${OUTPUT} (${#ASSETS[@]} hashes for v${VERSION})" From bb17e1d3596dd7a4d39c769a287d9b7faed3ef8a Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 26 May 2026 22:14:37 -0700 Subject: [PATCH 04/12] Collapse pin to single combined cli-version.txt; restore proven script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduces churn by going back to one combined snapshot file: - File: cli-version.txt (matches the design-doc name) holds both the version line and per-platform SHA-256 hashes. Same combined format the proven bundled_cli_version.txt used. - Script: snapshot-bundled-cli-version.sh restored byte-for-byte from history except for the OUTPUT filename. Same logic, same name, same output structure. - build.rs: restored the proven resolve_version_and_hash + parse_snapshot pair (minus the dropped COPILOT_CLI_VERSION env path). Drops the cli-shas.txt / read_expected_sha / parse_sha_snapshot helpers I introduced in the previous commit — those split the snapshot into two files for no real benefit. - Publish workflow: one step calling the script, like before. Cargo.toml include[] and .gitignore drop cli-shas.txt. Test for pin format updated to validate the combined lines. Verified locally: cargo check passes in both feature modes with the snapshot present, cargo test --no-default-features passes (4/4). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/publish.yml | 20 +-- rust/.gitignore | 1 - rust/Cargo.toml | 1 - rust/build.rs | 134 +++++++------------ rust/scripts/snapshot-bundled-cli-version.sh | 67 ++++++++++ rust/scripts/write-cli-shas.sh | 65 --------- rust/tests/cli_resolution_test.rs | 30 +++-- 7 files changed, 146 insertions(+), 172 deletions(-) create mode 100755 rust/scripts/snapshot-bundled-cli-version.sh delete mode 100755 rust/scripts/write-cli-shas.sh diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 43b3143eb..a93bec32f 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -167,20 +167,14 @@ jobs: workspaces: "rust" - name: Set version run: sed -i -E 's/^version = ".*"$/version = "${{ needs.version.outputs.version }}"/' Cargo.toml - - name: Write cli-version.txt from package-lock.json + - name: Snapshot CLI version + hashes for build.rs + run: bash scripts/snapshot-bundled-cli-version.sh + - name: Verify cli-version.txt exists run: | - node -e "console.log(require('../nodejs/package-lock.json').packages['node_modules/@github/copilot'].version)" > cli-version.txt - echo "Wrote cli-version.txt: $(cat cli-version.txt)" - - name: Snapshot per-platform SHAs for build.rs - run: bash scripts/write-cli-shas.sh - - name: Verify pin files exist - run: | - for f in cli-version.txt cli-shas.txt; do - if [[ ! -f "$f" ]]; then - echo "::error::$f was not generated. Previous steps must run before packaging." - exit 1 - fi - done + if [[ ! -f cli-version.txt ]]; then + echo "::error::cli-version.txt was not generated. The Snapshot step must run before packaging." + exit 1 + fi - name: Package (dry run) run: cargo publish --dry-run --allow-dirty - name: Upload artifact diff --git a/rust/.gitignore b/rust/.gitignore index 79afedd4c..c4095ffc0 100644 --- a/rust/.gitignore +++ b/rust/.gitignore @@ -1,4 +1,3 @@ /target Cargo.lock.bak cli-version.txt -cli-shas.txt diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 57ee3be86..383ac3901 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -20,7 +20,6 @@ include = [ "README.md", "LICENSE", "cli-version.txt", - "cli-shas.txt", ] [lib] diff --git a/rust/build.rs b/rust/build.rs index e74ff0d90..acae20d86 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -9,7 +9,6 @@ fn main() { println!("cargo::rustc-check-cfg=cfg(has_bundled_cli)"); println!("cargo::rustc-check-cfg=cfg(has_dev_cli)"); println!("cargo:rerun-if-changed=cli-version.txt"); - println!("cargo:rerun-if-changed=cli-shas.txt"); println!("cargo:rerun-if-changed=../nodejs/package-lock.json"); let Some(platform) = target_platform() else { @@ -20,36 +19,19 @@ fn main() { let out_dir = std::env::var("OUT_DIR").expect("OUT_DIR is always set by cargo"); let out = Path::new(&out_dir); - // Two-source version resolution: - // 1. `cli-version.txt` at the crate root (published-crate consumer, - // vendored-slot consumer). + // Resolve version + per-asset SHA-256 from one of two sources, in order: + // 1. `cli-version.txt` snapshot at the crate root (published-crate + // consumer; generated by the publish workflow). Combined format: + // `version=X` line + per-asset hash lines. Committing the hashes + // makes the publish workflow the trust boundary — an attacker who + // later re-points the release tag can't silently poison consumer + // builds. // 2. Sibling `../nodejs/package-lock.json` (contributor build inside - // the github/copilot-sdk repo — same convention as .NET's - // `_GetCopilotCliVersion` MSBuild target and the Go `cmd/bundler` - // tool). - // - // Two-source version resolution: - // 1. `cli-version.txt` at the crate root (published-crate consumer, - // vendored-slot consumer). - // 2. Sibling `../nodejs/package-lock.json` (contributor build inside - // the github/copilot-sdk repo — same convention as .NET's - // `_GetCopilotCliVersion` MSBuild target and the Go `cmd/bundler` - // tool). - // - // SHA-256 resolution mirrors that: - // 1. `cli-shas.txt` at the crate root — per-asset hashes committed at - // publish time, mapping `=` per line. This - // is the trust boundary for published crates / vendored slots: - // verifying the download against a publish-time-committed hash - // means an attacker who later re-points the release tag can't - // silently swap in poisoned bytes on later consumer builds. - // 2. Live fetch of `SHA256SUMS.txt` from the release URL. Only fires - // when no snapshot is committed (i.e. contributor builds inside - // this repo, which already trust the live release for the - // version too). - let version = read_version(); + // the github/copilot-sdk repo; live SHA256SUMS.txt fetch). Matches + // the .NET `_GetCopilotCliVersion` MSBuild target and the Go + // `cmd/bundler` tool. + let (version, expected_hash) = resolve_version_and_hash(platform.asset_name); let asset_name = platform.asset_name; - let expected_hash = read_expected_sha(&version, asset_name); let base_url = format!("https://github.com/github/copilot-cli/releases/download/v{version}"); let cache_dir = std::env::var("BUNDLED_CLI_CACHE_DIR") @@ -108,70 +90,32 @@ pub(super) static CLI_BINARY_NAME: &str = "{binary_name}"; std::fs::write(out.join("bundled_cli.rs"), generated).expect("failed to write bundled_cli.rs"); } -/// Resolve the SHA-256 hash for `asset_name`. Prefers the committed -/// snapshot at `cli-shas.txt`; falls back to live fetch of the release's -/// `SHA256SUMS.txt`. See the doc comment in `main` for the threat-model -/// rationale. -fn read_expected_sha(version: &str, asset_name: &str) -> String { +/// Resolve the CLI version and the expected SHA-256 hash for the current +/// target's archive. Picks one of two sources in order. Panics with a clear +/// error if neither is available. +fn resolve_version_and_hash(asset_name: &str) -> (String, String) { let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR is set"); - let snapshot = Path::new(&manifest_dir).join("cli-shas.txt"); + + // 1. Snapshot file at the crate root (published-crate consumer, + // vendored-slot consumer). Combined version + per-asset hashes. + let snapshot = Path::new(&manifest_dir).join("cli-version.txt"); if snapshot.is_file() { let contents = std::fs::read_to_string(&snapshot) .unwrap_or_else(|e| panic!("failed to read {}: {e}", snapshot.display())); - return parse_sha_snapshot(&contents, asset_name) + return parse_snapshot(&contents, asset_name) .unwrap_or_else(|e| panic!("invalid {}: {e}", snapshot.display())); } - fetch_live_sha256(version, asset_name) -} - -/// Parse `cli-shas.txt`. Format is one `=` per line. -/// Blank lines and `#`-prefixed lines are skipped. -fn parse_sha_snapshot(contents: &str, asset_name: &str) -> Result { - for (line_no, raw) in contents.lines().enumerate() { - let line = raw.trim(); - if line.is_empty() || line.starts_with('#') { - continue; - } - let (key, value) = line - .split_once('=') - .ok_or_else(|| format!("line {}: expected `key=value`, got `{raw}`", line_no + 1))?; - if key.trim() == asset_name { - return Ok(value.trim().to_string()); - } - } - Err(format!("missing hash for asset `{asset_name}`")) -} - -/// Resolve the CLI version from one of two sources. Panics with a clear -/// error if neither is available. -fn read_version() -> String { - let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR is set"); - // 1. Pin file at the crate root (published crates / vendored slots). - let pin = Path::new(&manifest_dir).join("cli-version.txt"); - if pin.is_file() { - let contents = std::fs::read_to_string(&pin) - .unwrap_or_else(|e| panic!("failed to read {}: {e}", pin.display())); - let version = contents.trim(); - if version.is_empty() { - panic!("{} is empty", pin.display()); - } - if version.contains(char::is_whitespace) { - panic!( - "{} contains whitespace; expected a single version line (e.g. `1.0.55-0`)", - pin.display() - ); - } - return version.to_string(); - } - - // 2. Lockfile fallback (contributor build inside github/copilot-sdk). + // 2. Lockfile fallback (contributor build inside github/copilot-sdk) — + // read version, fetch live SHA256SUMS. let lockfile = Path::new(&manifest_dir) .join("..") .join("nodejs") .join("package-lock.json"); if lockfile.is_file() { - return read_version_from_package_lock(&lockfile); + let version = read_version_from_package_lock(&lockfile); + let hash = fetch_live_sha256(&version, asset_name); + return (version, hash); } panic!( @@ -181,11 +125,37 @@ fn read_version() -> String { - {} (missing)\n\ In a published crate or vendored slot, `cli-version.txt` should be present.\n\ Inside the github/copilot-sdk repo, `../nodejs/package-lock.json` is the source.", - pin.display(), + snapshot.display(), lockfile.display(), ); } +/// Parse the `cli-version.txt` snapshot file. Format is one `key=value` per +/// line. The first non-comment line is `version=X.Y.Z`; subsequent lines map +/// asset filename to hex SHA-256. Blank lines and lines starting with `#` +/// are skipped. +fn parse_snapshot(contents: &str, asset_name: &str) -> Result<(String, String), String> { + let mut version: Option = None; + let mut hash: Option = None; + for (line_no, raw) in contents.lines().enumerate() { + let line = raw.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + let (key, value) = line + .split_once('=') + .ok_or_else(|| format!("line {}: expected `key=value`, got `{raw}`", line_no + 1))?; + match key.trim() { + "version" => version = Some(value.trim().to_string()), + k if k == asset_name => hash = Some(value.trim().to_string()), + _ => {} + } + } + let version = version.ok_or("missing `version=` line")?; + let hash = hash.ok_or_else(|| format!("missing hash for asset `{asset_name}`"))?; + Ok((version, hash)) +} + /// Read the `@github/copilot` version from `nodejs/package-lock.json`. fn read_version_from_package_lock(path: &Path) -> String { let contents = std::fs::read_to_string(path) diff --git a/rust/scripts/snapshot-bundled-cli-version.sh b/rust/scripts/snapshot-bundled-cli-version.sh new file mode 100755 index 000000000..7f78d529b --- /dev/null +++ b/rust/scripts/snapshot-bundled-cli-version.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# +# Snapshot the Copilot CLI version + per-platform SHA-256 hashes for the +# rust crate's bundled-CLI build.rs. Runs at SDK publish time, mirroring +# how .NET's _GenerateVersionProps BeforeTargets="Pack" target writes +# GitHub.Copilot.SDK.props before NuGet packing. +# +# Inputs: +# - ../nodejs/package-lock.json (sibling) - source of the pinned version. +# - https://github.com/github/copilot-cli/releases/v{version}/SHA256SUMS.txt - +# authoritative per-platform hashes. +# +# Output: +# - cli-version.txt (in the rust crate root). Gitignored. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RUST_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +REPO_ROOT="$(cd "${RUST_DIR}/.." && pwd)" +LOCKFILE="${REPO_ROOT}/nodejs/package-lock.json" +OUTPUT="${RUST_DIR}/cli-version.txt" + +if [[ ! -f "${LOCKFILE}" ]]; then + echo "error: ${LOCKFILE} not found" >&2 + exit 1 +fi + +VERSION="$(node -e "console.log(require('${LOCKFILE}').packages['node_modules/@github/copilot'].version)")" +if [[ -z "${VERSION}" ]]; then + echo "error: could not read @github/copilot version from ${LOCKFILE}" >&2 + exit 1 +fi + +CHECKSUMS_URL="https://github.com/github/copilot-cli/releases/download/v${VERSION}/SHA256SUMS.txt" +echo "Fetching ${CHECKSUMS_URL}" +SHA256SUMS="$(curl -fsSL --retry 3 --retry-delay 2 "${CHECKSUMS_URL}")" + +ASSETS=( + "copilot-darwin-arm64.tar.gz" + "copilot-darwin-x64.tar.gz" + "copilot-linux-arm64.tar.gz" + "copilot-linux-x64.tar.gz" + "copilot-win32-arm64.zip" + "copilot-win32-x64.zip" +) + +declare -A HASHES +for asset in "${ASSETS[@]}"; do + hash="$(printf '%s\n' "${SHA256SUMS}" | awk -v a="${asset}" '$2 == a { print $1 }')" + if [[ -z "${hash}" ]]; then + echo "error: SHA256SUMS.txt missing entry for ${asset}" >&2 + exit 1 + fi + HASHES[$asset]="${hash}" +done + +{ + echo "# Auto-generated by rust/scripts/snapshot-bundled-cli-version.sh" + echo "# Do not edit. Regenerated by the publish workflow on every release." + echo "version=${VERSION}" + for asset in "${ASSETS[@]}"; do + echo "${asset}=${HASHES[$asset]}" + done +} > "${OUTPUT}" + +echo "Wrote ${OUTPUT} (version=${VERSION}, ${#ASSETS[@]} hashes)" \ No newline at end of file diff --git a/rust/scripts/write-cli-shas.sh b/rust/scripts/write-cli-shas.sh deleted file mode 100755 index 3a8cafa93..000000000 --- a/rust/scripts/write-cli-shas.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash -# -# Snapshot per-platform SHA-256 hashes for the Copilot CLI release matching -# `cli-version.txt` into `cli-shas.txt`. Run by the publish workflow before -# `cargo publish`, and by `github/github-app`'s `scripts/sync-copilot-sdk.sh` -# when vendoring the SDK. -# -# Committing these hashes makes the publish workflow the trust boundary: -# every later consumer build verifies the downloaded archive against a -# hash that was captured at publish time, so an attacker who later -# re-points the release tag can't silently swap in poisoned bytes. -# -# Inputs: -# - cli-version.txt (in the rust crate root) - pinned CLI version. -# - https://github.com/github/copilot-cli/releases/v${VERSION}/SHA256SUMS.txt -# -# Output: -# - cli-shas.txt (in the rust crate root). Gitignored locally. - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -RUST_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" -VERSION_FILE="${RUST_DIR}/cli-version.txt" -OUTPUT="${RUST_DIR}/cli-shas.txt" - -if [[ ! -f "${VERSION_FILE}" ]]; then - echo "error: ${VERSION_FILE} not found; write it before running this script" >&2 - exit 1 -fi - -VERSION="$(tr -d '[:space:]' < "${VERSION_FILE}")" -if [[ -z "${VERSION}" ]]; then - echo "error: ${VERSION_FILE} is empty" >&2 - exit 1 -fi - -CHECKSUMS_URL="https://github.com/github/copilot-cli/releases/download/v${VERSION}/SHA256SUMS.txt" -echo "Fetching ${CHECKSUMS_URL}" -SHA256SUMS="$(curl -fsSL --retry 3 --retry-delay 2 "${CHECKSUMS_URL}")" - -ASSETS=( - "copilot-darwin-arm64.tar.gz" - "copilot-darwin-x64.tar.gz" - "copilot-linux-arm64.tar.gz" - "copilot-linux-x64.tar.gz" - "copilot-win32-arm64.zip" - "copilot-win32-x64.zip" -) - -{ - echo "# Auto-generated by rust/scripts/write-cli-shas.sh" - echo "# Per-platform SHA-256 hashes for Copilot CLI v${VERSION}." - echo "# Do not edit by hand; regenerate after bumping cli-version.txt." - for asset in "${ASSETS[@]}"; do - hash="$(printf '%s\n' "${SHA256SUMS}" | awk -v a="${asset}" '$2 == a { print $1 }')" - if [[ -z "${hash}" ]]; then - echo "error: SHA256SUMS.txt missing entry for ${asset}" >&2 - exit 1 - fi - echo "${asset}=${hash}" - done -} > "${OUTPUT}" - -echo "Wrote ${OUTPUT} (${#ASSETS[@]} hashes for v${VERSION})" diff --git a/rust/tests/cli_resolution_test.rs b/rust/tests/cli_resolution_test.rs index a839ffda9..fb561341f 100644 --- a/rust/tests/cli_resolution_test.rs +++ b/rust/tests/cli_resolution_test.rs @@ -110,22 +110,32 @@ fn dev_mode_extracted_binary_exists() { } /// Build-time version pin: `cli-version.txt` (when present) must be a -/// single-line non-empty exact version. When absent, build.rs falls -/// through to `../nodejs/package-lock.json` — both are accepted, this -/// test only checks the pin file's format if it's there. +/// combined snapshot — a `version=X.Y.Z` line plus per-asset hash lines. +/// When absent, build.rs falls through to `../nodejs/package-lock.json` — +/// both are accepted, this test only checks the pin file's format if it's +/// there. #[test] fn pin_file_when_present_is_well_formed() { let manifest_dir = env!("CARGO_MANIFEST_DIR"); let pin = PathBuf::from(manifest_dir).join("cli-version.txt"); if !pin.is_file() { - // Mono-repo build path — no assertion needed. + // Contributor build path — no assertion needed. return; } let contents = std::fs::read_to_string(&pin).expect("read cli-version.txt"); - let trimmed = contents.trim(); - assert!(!trimmed.is_empty(), "cli-version.txt is empty"); - assert!( - !trimmed.contains(char::is_whitespace), - "cli-version.txt should be a single version line, got {trimmed:?}" - ); + let mut saw_version = false; + for raw in contents.lines() { + let line = raw.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + let (key, value) = line + .split_once('=') + .unwrap_or_else(|| panic!("malformed line: {raw:?}")); + assert!(!value.trim().is_empty(), "empty value for key {key:?}"); + if key.trim() == "version" { + saw_version = true; + } + } + assert!(saw_version, "cli-version.txt missing `version=` line"); } From 0cb4b99a1639fc66bfb6067ca3e81c4acaab462c Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 26 May 2026 22:25:37 -0700 Subject: [PATCH 05/12] Update lib.rs docs for new resolver order and dev-cache layout Self-review (GPT 5.5) catch: - CliProgram::Resolve docstring still said 'PATH + common locations' for the third source; correct it to 'dev cache'. - ClientOptions docstring still implied bundled-cli was the only auto-resolve path; spell out the env var + dev-cache fallback. - bundled_cli_extract_dir field doc had the old hyphenated cache path (github-copilot-sdk-{version}); update to the shared github-copilot-sdk/cli// layout. - bundled_cli_extract_dir + with_bundled_cli_extract_dir now explicitly document that the option is ignored in default-features=false dev builds (build.rs has already extracted; use CliProgram::Path or COPILOT_CLI_PATH for an override). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/src/lib.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/rust/src/lib.rs b/rust/src/lib.rs index ee8606740..1d3c867e0 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -16,7 +16,7 @@ pub mod hooks; mod jsonrpc; /// Permission-policy helpers that produce a [`handler::PermissionHandler`]. pub mod permission; -/// GitHub Copilot CLI binary resolution (env var, embedded, PATH search). +/// GitHub Copilot CLI binary resolution (env var, embedded, dev cache). pub(crate) mod resolve; mod router; /// Session management — create, resume, send messages, and interact with the agent. @@ -314,7 +314,7 @@ pub enum Transport { /// How the SDK locates the GitHub Copilot CLI binary. #[derive(Debug, Clone, Default)] pub enum CliProgram { - /// Auto-resolve: `COPILOT_CLI_PATH` → embedded CLI → PATH + common locations. + /// Auto-resolve: `COPILOT_CLI_PATH` → embedded CLI → dev cache. /// This is the default. #[default] Resolve, @@ -331,12 +331,12 @@ impl From for CliProgram { /// Options for starting a [`Client`]. /// /// When `program` is [`CliProgram::Resolve`] (the default), [`Client::start`] -/// uses the bundled Copilot CLI that was embedded at build time (via the -/// default `bundled-cli` cargo feature). +/// uses `COPILOT_CLI_PATH` when set to a real file. Otherwise it uses the +/// bundled Copilot CLI when the default `bundled-cli` cargo feature is enabled, +/// or the build-time extracted dev-cache CLI when that feature is disabled. /// /// Set `program` to [`CliProgram::Path`] to use an explicit binary instead. -/// This is the required path if you've opted out of bundling via -/// `default-features = false`. +/// This skips auto-resolution entirely. #[non_exhaustive] pub struct ClientOptions { /// How to locate the CLI binary. @@ -417,7 +417,7 @@ pub struct ClientOptions { /// first use. /// /// When `None` (the default), the SDK extracts the embedded CLI to - /// `/github-copilot-sdk-{version}/copilot[.exe]`, + /// `/github-copilot-sdk/cli//copilot[.exe]`, /// where the cache dir is [`dirs::cache_dir()`] — /// `%LOCALAPPDATA%` on Windows, `~/Library/Caches/` on macOS, /// `$XDG_CACHE_HOME` (or `~/.cache/`) on Linux. Use this knob to @@ -425,7 +425,10 @@ pub struct ClientOptions { /// CI runners) without changing the global cache layout. /// /// Ignored when the SDK was built without a bundled CLI (i.e. with - /// `default-features = false` to disable the `bundled-cli` feature). + /// `default-features = false` to disable the `bundled-cli` feature): in + /// that dev mode, build.rs has already extracted the binary to the per-user + /// cache and baked its path into the crate. Use [`CliProgram::Path`] or + /// `COPILOT_CLI_PATH` to point at a different binary. pub bundled_cli_extract_dir: Option, } @@ -828,6 +831,10 @@ impl ClientOptions { /// Override the directory where the bundled CLI binary is extracted on /// first use. See [`Self::bundled_cli_extract_dir`]. + /// + /// This only affects builds with the default `bundled-cli` feature enabled. + /// In `default-features = false` dev builds, build.rs has already extracted + /// the CLI and this option is ignored. pub fn with_bundled_cli_extract_dir(mut self, dir: impl Into) -> Self { self.bundled_cli_extract_dir = Some(dir.into()); self From 82ac5b6f5081f38d212a171c4bd4c1d424e39341 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 26 May 2026 22:28:54 -0700 Subject: [PATCH 06/12] Use file-level atomic rename in extract_to_cache (Windows correctness) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous version staged a whole per-version directory and renamed it into place. fs::rename for files is atomic on Windows (MoveFileExW + MOVEFILE_REPLACE_EXISTING), but fs::rename for directories is NOT — if install_dir already exists (partial-extract recovery, stale install from a crashed build, etc.) Windows refuses the rename and we'd panic. Switch to file-level staging: - create_dir_all(install_dir) up front (idempotent, fine on Windows even if it exists with content). - Write extracted binary to a staging *file* alongside its final location (.copilot[.exe].staging--). - chmod 755 on Unix only. - fs::rename(staging_file, final_path) — atomic file-replace on both Unix and Windows. If a concurrent build won the race their bytes are the same (SHA-verified inputs), so replacement is safe. - On error, remove the staging file (a file, not remove_dir_all of a directory). Verified extraction still works end-to-end on macOS (dev-mode test passes, extracted copilot --version prints expected output). Windows release zip confirmed to be the same single-file layout (copilot.exe at root, no subdir) so the extraction code's name match applies cleanly there too. Windows CI already exercises both default-features and --no-default-features jobs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/build.rs | 53 ++++++++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/rust/build.rs b/rust/build.rs index acae20d86..6a0b7a338 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -232,10 +232,17 @@ fn target_platform() -> Option { /// Dev-mode extraction: write the single binary entry from `archive` to /// `/github-copilot-sdk/cli//` (staging -/// dir + atomic rename). Idempotent — returns the existing path if a previous +/// file + atomic rename). Idempotent — returns the existing path if a previous /// build already populated the target. Mirrors the cache layout /// `embeddedcli::default_install_dir` uses at runtime so the two modes are /// interchangeable. +/// +/// Uses file-level (not directory-level) atomic rename so the recovery path +/// works on Windows too: `std::fs::rename` on Windows atomically replaces +/// existing files via `MoveFileExW` with `MOVEFILE_REPLACE_EXISTING`, but +/// **cannot** replace an existing directory. Staging the binary alongside +/// its final location and renaming the file is portable; staging an entire +/// directory and renaming it is not. fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBuf { let cache_root = dirs::cache_dir().unwrap_or_else(std::env::temp_dir); let install_dir = cache_root @@ -248,28 +255,27 @@ fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBu return final_path; } - let parent = install_dir - .parent() - .expect("install_dir always has a parent"); - std::fs::create_dir_all(parent) - .unwrap_or_else(|e| panic!("failed to create cache parent {}: {e}", parent.display())); - - // Staging dir is a sibling of the final per-version dir so the atomic - // rename stays on the same filesystem. PID + nanos disambiguate parallel - // builds racing on the same cache. + std::fs::create_dir_all(&install_dir).unwrap_or_else(|e| { + panic!( + "failed to create install dir {}: {e}", + install_dir.display() + ) + }); + + // Staging file is a sibling of the final binary so the rename stays on + // the same filesystem. PID + nanos disambiguate parallel builds racing + // on the same cache. let nanos = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map(|d| d.as_nanos()) .unwrap_or(0); - let staging = parent.join(format!( - ".staging-{version}-{pid}-{nanos}", + let staging_binary = install_dir.join(format!( + ".{binary_name}.staging-{pid}-{nanos}", + binary_name = platform.binary_name, pid = std::process::id(), )); - std::fs::create_dir_all(&staging) - .unwrap_or_else(|e| panic!("failed to create staging dir {}: {e}", staging.display())); let bytes = extract_binary_bytes(archive, platform); - let staging_binary = staging.join(platform.binary_name); std::fs::write(&staging_binary, &bytes) .unwrap_or_else(|e| panic!("failed to write {}: {e}", staging_binary.display())); @@ -280,19 +286,18 @@ fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBu .unwrap_or_else(|e| panic!("failed to chmod {}: {e}", staging_binary.display())); } - match std::fs::rename(&staging, &install_dir) { + // Atomic on both Unix and Windows for file-to-file renames. If a + // concurrent build already won the race, the rename replaces their + // bytes with ours — the SHA-verified inputs are identical, so this is + // safe. + match std::fs::rename(&staging_binary, &final_path) { Ok(()) => {} - Err(_) if final_path.is_file() => { - // Another concurrent build won the race; their bytes are the same - // (verified SHA), so just clean up our staging copy. - let _ = std::fs::remove_dir_all(&staging); - } Err(e) => { - let _ = std::fs::remove_dir_all(&staging); + let _ = std::fs::remove_file(&staging_binary); panic!( "failed to rename {} -> {}: {e}", - staging.display(), - install_dir.display() + staging_binary.display(), + final_path.display() ); } } From 33c3be69eaf2b820d0c80ac7a950758c6c07d5de Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 26 May 2026 22:35:37 -0700 Subject: [PATCH 07/12] Drop staging+rename from extract_to_cache; match proven embed install() User caught this on review: embed mode's embeddedcli::install (which already ships to Windows) doesn't stage. It does: create_dir_all(install_dir)? if final_path.is_file() { return Ok(final_path); } write_binary(final_path, bytes) That's it. My extract_to_cache invented a staging-file + atomic-rename dance that doesn't exist in the proven path, then I 'discovered' my own over-engineering didn't work on Windows (directory rename), 'fixed' it with a file-level rename, and was still strictly more complex than what already works. Match the embed pattern exactly. Same idempotency check, same direct write, same #[cfg(unix)] chmod gate. Race semantics across concurrent build.rs invocations are identical to the embed install at runtime, and both write SHA-verified-identical bytes. If two builds race, last write wins with identical contents. Verified end-to-end on macOS: dev-mode test passes, extracted copilot --version runs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/build.rs | 58 ++++++++++++--------------------------------------- 1 file changed, 13 insertions(+), 45 deletions(-) diff --git a/rust/build.rs b/rust/build.rs index 6a0b7a338..6c872ba53 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -231,18 +231,13 @@ fn target_platform() -> Option { } /// Dev-mode extraction: write the single binary entry from `archive` to -/// `/github-copilot-sdk/cli//` (staging -/// file + atomic rename). Idempotent — returns the existing path if a previous -/// build already populated the target. Mirrors the cache layout -/// `embeddedcli::default_install_dir` uses at runtime so the two modes are -/// interchangeable. -/// -/// Uses file-level (not directory-level) atomic rename so the recovery path -/// works on Windows too: `std::fs::rename` on Windows atomically replaces -/// existing files via `MoveFileExW` with `MOVEFILE_REPLACE_EXISTING`, but -/// **cannot** replace an existing directory. Staging the binary alongside -/// its final location and renaming the file is portable; staging an entire -/// directory and renaming it is not. +/// `/github-copilot-sdk/cli//`. +/// Idempotent — returns the existing path if a previous build already +/// populated the target. Mirrors `embeddedcli::install` exactly so the +/// dev-mode and embed-mode paths share the same proven semantics on every +/// platform (notably Windows, where directory-level atomic rename isn't +/// available and the embed path has shipped to users without staging for +/// some time). fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBuf { let cache_root = dirs::cache_dir().unwrap_or_else(std::env::temp_dir); let install_dir = cache_root @@ -251,6 +246,8 @@ fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBu .join(version); let final_path = install_dir.join(platform.binary_name); + // Per-version install dir means a present file at this path is the + // binary we want — no need to re-extract. Matches embeddedcli::install. if final_path.is_file() { return final_path; } @@ -262,44 +259,15 @@ fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBu ) }); - // Staging file is a sibling of the final binary so the rename stays on - // the same filesystem. PID + nanos disambiguate parallel builds racing - // on the same cache. - let nanos = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_nanos()) - .unwrap_or(0); - let staging_binary = install_dir.join(format!( - ".{binary_name}.staging-{pid}-{nanos}", - binary_name = platform.binary_name, - pid = std::process::id(), - )); - let bytes = extract_binary_bytes(archive, platform); - std::fs::write(&staging_binary, &bytes) - .unwrap_or_else(|e| panic!("failed to write {}: {e}", staging_binary.display())); + std::fs::write(&final_path, &bytes) + .unwrap_or_else(|e| panic!("failed to write {}: {e}", final_path.display())); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; - std::fs::set_permissions(&staging_binary, std::fs::Permissions::from_mode(0o755)) - .unwrap_or_else(|e| panic!("failed to chmod {}: {e}", staging_binary.display())); - } - - // Atomic on both Unix and Windows for file-to-file renames. If a - // concurrent build already won the race, the rename replaces their - // bytes with ours — the SHA-verified inputs are identical, so this is - // safe. - match std::fs::rename(&staging_binary, &final_path) { - Ok(()) => {} - Err(e) => { - let _ = std::fs::remove_file(&staging_binary); - panic!( - "failed to rename {} -> {}: {e}", - staging_binary.display(), - final_path.display() - ); - } + std::fs::set_permissions(&final_path, std::fs::Permissions::from_mode(0o755)) + .unwrap_or_else(|e| panic!("failed to chmod {}: {e}", final_path.display())); } final_path From 66be1e65d4612d54baeb5e1043e549727231dfab Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 26 May 2026 22:40:23 -0700 Subject: [PATCH 08/12] Sanitize version in extract_to_cache to match embed-mode cache path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit embeddedcli::default_install_dir runs sanitize_version on the version string before using it as a path component, replacing anything outside [a-zA-Z0-9._-] with _. extract_to_cache did not. For today's CLI versions (1.0.55-1) both resolve to identical paths because the version chars are already safe. But the invariant we documented — embed mode and dev mode share the same per-version cache directory — would silently break for any future version containing an unusual character: embed would write to /.../1.0.55_rc1/copilot while dev wrote to /.../1.0.55-rc1/copilot (etc). Duplicate sanitize_version into build.rs (small, can't share with the crate it's building) and note it's kept in sync with the embed-mode copy. No other behavior change. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/build.rs | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/rust/build.rs b/rust/build.rs index 6c872ba53..470805dcb 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -231,19 +231,17 @@ fn target_platform() -> Option { } /// Dev-mode extraction: write the single binary entry from `archive` to -/// `/github-copilot-sdk/cli//`. +/// `/github-copilot-sdk/cli//`. /// Idempotent — returns the existing path if a previous build already -/// populated the target. Mirrors `embeddedcli::install` exactly so the -/// dev-mode and embed-mode paths share the same proven semantics on every -/// platform (notably Windows, where directory-level atomic rename isn't -/// available and the embed path has shipped to users without staging for -/// some time). +/// populated the target. Mirrors `embeddedcli::install` exactly (including +/// `sanitize_version`) so the dev-mode and embed-mode paths share the same +/// cache layout on every platform. fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBuf { let cache_root = dirs::cache_dir().unwrap_or_else(std::env::temp_dir); let install_dir = cache_root .join("github-copilot-sdk") .join("cli") - .join(version); + .join(sanitize_version(version)); let final_path = install_dir.join(platform.binary_name); // Per-version install dir means a present file at this path is the @@ -273,6 +271,20 @@ fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBu final_path } +/// Replace characters outside `[a-zA-Z0-9._-]` with `_` so the version +/// string is always safe to use as a path component. Kept in sync with +/// `embeddedcli::sanitize_version` so embed-mode and dev-mode resolve to +/// the same cache directory for any given version. +fn sanitize_version(version: &str) -> String { + version + .chars() + .map(|c| match c { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '.' | '-' | '_' => c, + _ => '_', + }) + .collect() +} + /// Extract the single `binary_name` entry from the release archive. Reused /// between embed mode's `verify_binary_present_in_archive` and dev mode's /// `extract_to_cache`. Panics if the entry isn't found — callers have From fbd2b24be3a1e1649fb47ad288719bd32ab27383 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 26 May 2026 22:52:06 -0700 Subject: [PATCH 09/12] Address PR review feedback on tests + docs rust/tests/cli_resolution_test.rs (resolves 5 review threads): - Replace `display.contains("/cli/")` with a Path::components() walk so the assertion is portable across Windows backslashes. This was the actual Windows CI failure (PR #1450 run 26493115251). - Re-gate dev_mode_extracted_binary_exists on cfg(has_dev_cli) so it compiles cleanly on unsupported target platforms where build.rs doesn't emit COPILOT_CLI_DEV_PATH. - Replace substring "BinaryNotFound" / "not bundled" check in stale_env_override_falls_through with a variant match on Error::BinaryNotFound (the Display output is lowercase, so the old check passed even when fallthrough was broken). - Rewrite the comment in env_override_resolves_to_pointed_file: the test never chmods the tempfile and doesn't need to (resolver only does is_file()). - Switch env-touching tokio tests to flavor = "current_thread" and rewrite the safety comment on set_env/unset_env to honestly describe what serial + current_thread give us (and what they don't). rust/src/resolve.rs: - Drop the "matches the .NET and TypeScript SDKs" claim from the module doc; this resolver now has an extra dev-cache step. rust/README.md: - Add a callout warning that default-features = false produces a build that is *not* self-contained; only intended for local development. Production / distributed builds should keep bundled-cli on or supply COPILOT_CLI_PATH at runtime. Validated: cargo test (both feature configs), clippy, nightly fmt, RUSTDOCFLAGS="-D warnings" cargo doc. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/README.md | 7 ++++ rust/src/resolve.rs | 2 +- rust/tests/cli_resolution_test.rs | 60 +++++++++++++++++++++---------- 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/rust/README.md b/rust/README.md index 813d42418..5210f3b0d 100644 --- a/rust/README.md +++ b/rust/README.md @@ -764,6 +764,13 @@ For development you usually want the smaller dev build: github-copilot-sdk = { version = "0.1", default-features = false } ``` +> **Dev-mode only.** This mode is intended for local development. The +> resulting binary is not self-contained — it depends on the extracted +> CLI living in the build machine's per-user cache. Distributed builds +> (release artifacts, signed installers, etc.) should keep `bundled-cli` +> enabled or supply an explicit CLI path at runtime via +> [`CliProgram::Path`] / `COPILOT_CLI_PATH`. + This still pins, downloads, and SHA-verifies the same CLI version, but extracts the binary into a per-user cache instead of embedding it. The runtime resolver returns that cached path. The cache is shared across crates and across embed/dev builds. ### How it works diff --git a/rust/src/resolve.rs b/rust/src/resolve.rs index c4346d9bc..2d36b56d9 100644 --- a/rust/src/resolve.rs +++ b/rust/src/resolve.rs @@ -1,6 +1,6 @@ //! Internal resolution of the GitHub Copilot CLI binary. //! -//! Resolution order (matches the .NET and TypeScript SDKs): +//! Resolution order: //! //! 1. An explicit path supplied by the application via //! [`CliProgram::Path`](crate::CliProgram::Path). diff --git a/rust/tests/cli_resolution_test.rs b/rust/tests/cli_resolution_test.rs index fb561341f..a80c71297 100644 --- a/rust/tests/cli_resolution_test.rs +++ b/rust/tests/cli_resolution_test.rs @@ -8,30 +8,44 @@ use std::path::PathBuf; -use github_copilot_sdk::{CliProgram, Client, ClientOptions}; +use github_copilot_sdk::{CliProgram, Client, ClientOptions, Error}; use serial_test::serial; fn unset_env(key: &str) { - // SAFETY: tests are serialized with #[serial], so no other thread can - // observe the env mid-write. This is the standard pattern for testing - // env-driven behavior in this crate. + // SAFETY: these tests are serialized with #[serial(copilot_cli_path)] + // so no other test in this binary mutates COPILOT_CLI_PATH while + // we hold the lock. POSIX `setenv`/`unsetenv` are generally + // thread-safe on modern platforms, and we use `current_thread` + // tokio runtimes to avoid concurrent reads from worker threads. + // This doesn't satisfy the strict Rust 2024 safety contract + // (other tests in the binary may read env vars), but the practical + // race window is negligible. unsafe { std::env::remove_var(key) }; } fn set_env(key: &str, value: &str) { + // SAFETY: these tests are serialized with #[serial(copilot_cli_path)] + // so no other test in this binary mutates COPILOT_CLI_PATH while + // we hold the lock. POSIX `setenv`/`unsetenv` are generally + // thread-safe on modern platforms, and we use `current_thread` + // tokio runtimes to avoid concurrent reads from worker threads. + // This doesn't satisfy the strict Rust 2024 safety contract + // (other tests in the binary may read env vars), but the practical + // race window is negligible. unsafe { std::env::set_var(key, value) }; } /// COPILOT_CLI_PATH wins when it points at a real file, regardless of /// build mode. -#[tokio::test] +#[tokio::test(flavor = "current_thread")] #[serial(copilot_cli_path)] async fn env_override_resolves_to_pointed_file() { let tmp = tempfile::NamedTempFile::new().expect("create tempfile"); - // Make the temp file executable on POSIX so resolve doesn't reject it; - // resolve only checks `is_file()`, but downstream `Client::start` - // wants to exec the binary. We only need the resolver to return the - // path here, so a `--version`-like wrapper isn't required. + // resolve.rs only checks `is_file()` for COPILOT_CLI_PATH, so a plain + // tempfile is sufficient — we don't need it to be executable. The + // downstream `Client::start` call will fail to exec an empty file, + // which we tolerate below; we just need to observe that the resolver + // returned the env-override path rather than `BinaryNotFound`. let path = tmp.path().to_path_buf(); set_env( @@ -67,7 +81,7 @@ async fn env_override_resolves_to_pointed_file() { /// A stale (non-existent) COPILOT_CLI_PATH falls through to the next /// resolution source (embed or dev) rather than failing outright. -#[tokio::test] +#[tokio::test(flavor = "current_thread")] #[serial(copilot_cli_path)] async fn stale_env_override_falls_through() { set_env("COPILOT_CLI_PATH", "/definitely/does/not/exist/copilot"); @@ -79,10 +93,9 @@ async fn stale_env_override_falls_through() { // resolver should find a binary via the next source. Failing here // would mean fallthrough is broken. if let Err(e) = &result { - let msg = format!("{e}"); assert!( - !msg.contains("BinaryNotFound") && !msg.contains("not bundled"), - "stale COPILOT_CLI_PATH should fall through to bundled/dev CLI; got {msg}" + !matches!(e, Error::BinaryNotFound { .. }), + "stale COPILOT_CLI_PATH should fall through; got BinaryNotFound: {e}" ); } } @@ -90,7 +103,7 @@ async fn stale_env_override_falls_through() { /// In dev mode (no `bundled-cli` feature) build.rs writes the extracted /// binary into the per-user cache and emits its path as /// `COPILOT_CLI_DEV_PATH`. The runtime resolver returns that path. -#[cfg(not(feature = "bundled-cli"))] +#[cfg(has_dev_cli)] #[test] fn dev_mode_extracted_binary_exists() { let path = PathBuf::from(env!("COPILOT_CLI_DEV_PATH")); @@ -101,11 +114,22 @@ fn dev_mode_extracted_binary_exists() { ); // Confirm the cache layout matches what runtime resolution expects. - let display = path.display().to_string(); + let mut found = false; + let comps: Vec<_> = path.components().collect(); + for window in comps.windows(2) { + if let (std::path::Component::Normal(a), std::path::Component::Normal(b)) = + (&window[0], &window[1]) + && a.to_str() == Some("github-copilot-sdk") + && b.to_str() == Some("cli") + { + found = true; + break; + } + } assert!( - display.contains("github-copilot-sdk") && display.contains("/cli/"), - "dev path {} does not match expected `github-copilot-sdk/cli//` layout", - display + found, + "dev path {} does not contain the expected `github-copilot-sdk/cli/` segments", + path.display() ); } From 9b22dc5e44be0b9f5a85c16d640a9044a46a8cb0 Mon Sep 17 00:00:00 2001 From: Steve Sanderson Date: Wed, 27 May 2026 12:08:34 +0100 Subject: [PATCH 10/12] Refine extract-to-cache: skip envvar, extract-dir envvar, single cache, staging+rename Drop 'extract-to-cache mode' phrasing; clarify caveats when bundled-cli is off Phrase the disabled-bundled-cli case in terms of the feature flag itself rather than coining a 'mode' name. Strengthen the README caveat: the consumer must supply a compatible CLI via CliProgram::Path / COPILOT_CLI_PATH and is responsible for version compatibility; the build-machine auto-resolution is a convenience that does not carry over to other machines. Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/Cargo.toml | 4 +- rust/README.md | 55 +++++--- rust/build.rs | 211 +++++++++++++++++++++--------- rust/src/embeddedcli.rs | 31 ++++- rust/src/lib.rs | 21 +-- rust/src/resolve.rs | 94 +++++++++---- rust/tests/cli_resolution_test.rs | 139 ++++++++++++++------ 7 files changed, 400 insertions(+), 155 deletions(-) diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 383ac3901..4f16c7bf5 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -27,7 +27,7 @@ name = "github_copilot_sdk" [features] default = ["bundled-cli"] -bundled-cli = ["dep:dirs", "dep:tar", "dep:flate2", "dep:zip"] +bundled-cli = ["dep:tar", "dep:flate2", "dep:zip"] derive = ["dep:schemars"] test-support = [] @@ -48,7 +48,7 @@ tokio = { version = "1", features = ["io-util", "sync", "rt", "process", "net", tokio-stream = { version = "0.1", features = ["sync"] } tokio-util = { version = "0.7", default-features = false } tracing = "0.1" -dirs = { version = "5", optional = true } +dirs = "5" parking_lot = "0.12" regex = "1" getrandom = "0.2" diff --git a/rust/README.md b/rust/README.md index 5210f3b0d..215448e05 100644 --- a/rust/README.md +++ b/rust/README.md @@ -758,20 +758,31 @@ none of them are scheduled for removal. The SDK provisions the Copilot CLI binary at build time. By default the `bundled-cli` feature embeds the verified binary directly in your compiled crate, so end-user binaries are self-contained — no env var setup, no separate install, just `cargo build`. -For development you usually want the smaller dev build: +For builds that prefer a smaller artifact, disable the `bundled-cli` feature: ```toml github-copilot-sdk = { version = "0.1", default-features = false } ``` -> **Dev-mode only.** This mode is intended for local development. The -> resulting binary is not self-contained — it depends on the extracted -> CLI living in the build machine's per-user cache. Distributed builds -> (release artifacts, signed installers, etc.) should keep `bundled-cli` -> enabled or supply an explicit CLI path at runtime via -> [`CliProgram::Path`] / `COPILOT_CLI_PATH`. - -This still pins, downloads, and SHA-verifies the same CLI version, but extracts the binary into a per-user cache instead of embedding it. The runtime resolver returns that cached path. The cache is shared across crates and across embed/dev builds. +> **You become responsible for supplying the CLI at runtime.** With +> `bundled-cli` disabled, the produced binary does not contain the CLI +> and will not search the system for one. You must point it at a +> compatible CLI via [`CliProgram::Path`] (on `ClientOptions`) or the +> `COPILOT_CLI_PATH` environment variable, and you are responsible for +> guaranteeing the supplied CLI version is compatible with this SDK +> release. Do **not** assume that whatever CLI happens to be installed +> on the target system will work — the SDK and CLI are versioned +> together. +> +> **Convenience on the build machine only.** As a special case, +> `build.rs` downloads and SHA-verifies the compatible CLI version and +> drops it into the build machine's per-user cache; the runtime +> resolver on that same machine will pick it up automatically. This +> makes local development and CI ergonomic, but it does **not** carry +> over when you copy the built binary to another machine — distributed +> builds (release artifacts, signed installers, container images, etc.) +> must either keep `bundled-cli` enabled or ship the CLI alongside and +> set `CliProgram::Path` / `COPILOT_CLI_PATH`. ### How it works @@ -779,9 +790,11 @@ This still pins, downloads, and SHA-verifies the same CLI version, but extracts - `cli-version.txt` at the crate root (present in published crate tarballs and vendored slots). - Otherwise, `../nodejs/package-lock.json` (contributor build inside the github/copilot-sdk repo — matches the .NET and Go SDK conventions here). + The resolved version is baked into the crate via `cargo:rustc-env=COPILOT_SDK_CLI_VERSION` regardless of mode. The runtime resolver consumes it to recompute the on-disk path by convention, so no absolute paths leak into the rlib. + 2. **Build time:** `build.rs` downloads the platform-appropriate archive from the [`github/copilot-cli` GitHub Releases](https://github.com/github/copilot-cli/releases) (`copilot-{platform}.tar.gz` on macOS/Linux, `.zip` on Windows), live-fetches the matching `SHA256SUMS.txt`, and verifies the archive hash. Then: - **`bundled-cli` on (default, release):** embeds the raw archive bytes via `include_bytes!()`. Runtime extracts on first `Client::start()`. - - **`bundled-cli` off (dev):** extracts the binary directly into the platform cache (staging dir + atomic rename), idempotent across rebuilds. + - **`bundled-cli` off:** extracts the binary directly into the platform cache (staging file + atomic rename), idempotent across rebuilds. If the extracted binary is already present at the expected path, the download is skipped entirely — the extracted binary *is* the cache. 3. **Runtime:** in both modes the binary lives at: @@ -806,7 +819,19 @@ let options = ClientOptions::new() let client = Client::start(options).await?; ``` -In dev mode this option is a no-op — the binary's path is fixed at build time and there is no archive to re-extract. Set `COPILOT_CLI_PATH` instead if you need to point the runtime at a different binary. +With `bundled-cli` disabled the equivalent knob is the **`COPILOT_CLI_EXTRACT_DIR`** environment variable, which is honored symmetrically at build time (where `build.rs` writes the binary) and at runtime (where the resolver reads it). When set, the binary lives directly under the named directory (no per-version subdir). The most ergonomic way to pin it from a consumer crate is `.cargo/config.toml`: + +```toml +# .cargo/config.toml at the consumer's repo root +[env] +COPILOT_CLI_EXTRACT_DIR = { value = "vendor/copilot", relative = true, force = true } +``` + +`relative = true` resolves the path against the config file's directory, so the value is stable regardless of where `cargo build` is invoked from. `force = true` makes the value visible to invocations of the produced binary under `cargo run` / `cargo test`, keeping build and runtime in sync. For runtime invocations outside cargo (e.g. a deploy script running the binary directly), either export the same env var or use [`CliProgram::Path`] / `COPILOT_CLI_PATH` at runtime. + +### Skipping the bundle entirely + +Set `COPILOT_SKIP_CLI_DOWNLOAD=1` at build time to disable the entire download / bundle / cache mechanism — `build.rs` returns immediately without touching the network. Use this when you always supply the CLI at runtime via `ClientOptions::program = CliProgram::Path(...)` or `COPILOT_CLI_PATH`. Works regardless of the `bundled-cli` feature state; runtime resolution falls through to `Error::BinaryNotFound` unless one of those explicit sources resolves. ### Resolution priority @@ -815,13 +840,13 @@ In dev mode this option is a no-op — the binary's path is fixed at build time 1. Explicit `CliProgram::Path(path)` on `ClientOptions::program`. 2. `COPILOT_CLI_PATH` environment variable, if it points at a real file. 3. **`bundled-cli` on:** the embedded archive, lazily extracted on first call. -4. **`bundled-cli` off:** the build-time-extracted binary in the per-user cache. +4. **`bundled-cli` off:** the build-time-extracted binary in the per-user cache, located by recomputing the convention from `COPILOT_SDK_CLI_VERSION` + OS + optional `COPILOT_CLI_EXTRACT_DIR`. There is no PATH scanning. If none of the above resolves, `Client::start` returns `Error::BinaryNotFound`. -### Download cache (build-time) +### Download cache (build-time, embed mode) -`build.rs` re-downloads on every clean build by default. Set `BUNDLED_CLI_CACHE_DIR=` to cache the verified archive between builds (CI keys this on `-` for ~zero-cost rebuilds on cache hits). +In embed mode `build.rs` re-downloads on every clean build by default. Set `BUNDLED_CLI_CACHE_DIR=` to cache the verified archive between builds (CI keys this on `-` for ~zero-cost rebuilds on cache hits). With `bundled-cli` disabled there is no separate archive cache — the extracted binary itself is the cache. ### Platforms @@ -831,7 +856,7 @@ Supported: `darwin-arm64`, `darwin-x64`, `linux-x64`, `linux-arm64`, `win32-x64` | Feature | Default | Description | | -------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `bundled-cli` | ✓ | Build-time CLI embedding. Pulls in `dirs`, `tar`+`flate2` (Linux/macOS), or `zip` (Windows). Disable via `default-features = false` to opt out (e.g. when shipping a smaller binary or when always supplying the CLI via `CliProgram::Path` / `COPILOT_CLI_PATH`). | +| `bundled-cli` | ✓ | Build-time CLI embedding. Pulls in `tar`+`flate2` (Linux/macOS) or `zip` (Windows). Disable via `default-features = false` to opt out (e.g. when shipping a smaller binary or when always supplying the CLI via `CliProgram::Path` / `COPILOT_CLI_PATH`). | | `derive` | — | `schema_for::()` for generating JSON Schema from Rust types (adds `schemars`). Enable when defining [tool parameters](#tool-registration). | ```toml diff --git a/rust/build.rs b/rust/build.rs index 470805dcb..be4f7c38d 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -5,12 +5,29 @@ use std::time::Duration; use sha2::Digest; fn main() { + println!("cargo:rerun-if-env-changed=COPILOT_SKIP_CLI_DOWNLOAD"); + println!("cargo:rerun-if-env-changed=COPILOT_CLI_EXTRACT_DIR"); println!("cargo:rerun-if-env-changed=BUNDLED_CLI_CACHE_DIR"); println!("cargo::rustc-check-cfg=cfg(has_bundled_cli)"); - println!("cargo::rustc-check-cfg=cfg(has_dev_cli)"); + println!("cargo::rustc-check-cfg=cfg(has_extracted_cli)"); println!("cargo:rerun-if-changed=cli-version.txt"); println!("cargo:rerun-if-changed=../nodejs/package-lock.json"); + // Hard opt-out: disable the entire download / bundle / cache mechanism + // in one step. For consumers who always supply the CLI via + // `CliProgram::Path` or `COPILOT_CLI_PATH` and don't want build.rs to + // touch the network (offline builds, locked-down CI, etc.). Works + // regardless of the `bundled-cli` cargo feature state — with neither + // `has_bundled_cli` nor `has_extracted_cli` emitted, runtime resolution + // falls straight through to `Error::BinaryNotFound` unless an explicit + // path source resolves first. + if std::env::var_os("COPILOT_SKIP_CLI_DOWNLOAD").is_some() { + println!( + "cargo:warning=COPILOT_SKIP_CLI_DOWNLOAD is set — skipping CLI download/bundle/cache" + ); + return; + } + let Some(platform) = target_platform() else { println!("cargo:warning=Unsupported target platform for Copilot CLI bundling — skipping"); return; @@ -31,7 +48,15 @@ fn main() { // the .NET `_GetCopilotCliVersion` MSBuild target and the Go // `cmd/bundler` tool. let (version, expected_hash) = resolve_version_and_hash(platform.asset_name); - let asset_name = platform.asset_name; + + // Bake the version into the crate regardless of mode. This is the + // single source of truth for "what CLI version did build.rs target", + // consumed by both the embed-mode path computation in embeddedcli.rs + // and the runtime path computation in resolve.rs (when `bundled-cli` + // is off). It's a small, machine-independent datum: no absolute + // paths, no username/home leakage, so sccache / cross-machine + // `target/` reuse stays cache-coherent. + println!("cargo:rustc-env=COPILOT_SDK_CLI_VERSION={version}"); let base_url = format!("https://github.com/github/copilot-cli/releases/download/v{version}"); let cache_dir = std::env::var("BUNDLED_CLI_CACHE_DIR") @@ -39,53 +64,82 @@ fn main() { .map(std::path::PathBuf::from); // Versioned cache key since copilot asset names don't include the version. - let cache_key = format!("v{version}-{asset_name}"); - - // Download the archive (or read from cache) and verify SHA-256. Shared - // by both build modes — embed mode includes the bytes, dev mode extracts - // them into the per-user cache. - let archive = cached_download( - &format!("{base_url}/{asset_name}"), - &cache_key, - &expected_hash, - &cache_dir, - ); - - // Sanity check: the extraction path expects `binary_name` inside the - // archive. Fail the build now (with a clear message) rather than - // shipping a broken bundle / cache if the upstream archive layout ever - // changes. - verify_binary_present_in_archive(&archive, platform.binary_name, asset_name); + let cache_key = format!("v{version}-{}", platform.asset_name); if std::env::var_os("CARGO_FEATURE_BUNDLED_CLI").is_some() { - emit_embedded(out, &archive, &version, platform.binary_name); + // Embed mode: we need the archive bytes to bake into the rlib, so + // always run the download (cache hit short-circuits inside + // `cached_download`). + let archive = cached_download( + &format!("{base_url}/{}", platform.asset_name), + &cache_key, + &expected_hash, + &cache_dir, + ); + verify_binary_present_in_archive(&archive, platform.binary_name, platform.asset_name); + emit_embedded(out, &archive); println!("cargo:rustc-cfg=has_bundled_cli"); } else { - let dev_path = extract_to_cache(&archive, &version, platform); - // Path is set as a build-time env var so `env!()` in resolve.rs can - // read it without runtime fs probing. The cargo cfg makes the - // dev-mode branch in resolve.rs discoverable. - println!( - "cargo:rustc-env=COPILOT_CLI_DEV_PATH={}", - dev_path.display() - ); - println!("cargo:rustc-cfg=has_dev_cli"); + // With `bundled-cli` off the extracted binary *is* the cache. + // Skip the upstream download entirely when it already exists at + // the expected path. No two separate caches. + // + // Runtime resolution (see `src/resolve.rs::extracted_cli_path`) + // recomputes this same path from `COPILOT_SDK_CLI_VERSION` + the + // OS-derived binary name + optional `COPILOT_CLI_EXTRACT_DIR`, + // so we don't bake an absolute path into the crate. + let install_dir = extracted_install_dir(&version); + let final_path = install_dir.join(platform.binary_name); + + if !final_path.is_file() { + let archive = cached_download( + &format!("{base_url}/{}", platform.asset_name), + &cache_key, + &expected_hash, + &cache_dir, + ); + verify_binary_present_in_archive(&archive, platform.binary_name, platform.asset_name); + extract_to_cache(&archive, &install_dir, platform); + } + + println!("cargo:rustc-cfg=has_extracted_cli"); + } +} + +/// Install directory used when `bundled-cli` is off. Mirrors the runtime +/// convention in `src/resolve.rs::extracted_cli_path`: both sides MUST +/// compute the same path from the same inputs, otherwise the runtime +/// resolver won't find what build.rs extracted. +/// +/// If `COPILOT_CLI_EXTRACT_DIR` is set the binary lives directly under +/// that directory (no per-version subdir) — useful for vendored slots and +/// for `.cargo/config.toml [env]`-style pinning that's symmetric between +/// build-time write and runtime read. Otherwise the binary lives under +/// `/github-copilot-sdk/cli//`. +fn extracted_install_dir(version: &str) -> PathBuf { + if let Some(custom) = std::env::var_os("COPILOT_CLI_EXTRACT_DIR") { + PathBuf::from(custom) + } else { + let cache = dirs::cache_dir().unwrap_or_else(std::env::temp_dir); + cache + .join("github-copilot-sdk") + .join("cli") + .join(sanitize_version(version)) } } /// Emit the `bundled_cli.rs` glue + `copilot_cli.archive` blob into `OUT_DIR` -/// for embed mode (`bundled-cli` cargo feature on). -fn emit_embedded(out: &Path, archive: &[u8], version: &str, binary_name: &str) { +/// for embed mode (`bundled-cli` cargo feature on). The version is exposed +/// crate-wide via the unconditional `cargo:rustc-env=COPILOT_SDK_CLI_VERSION` +/// emit; the binary name is OS-derived at runtime — so all we need to +/// generate here is the archive blob include. +fn emit_embedded(out: &Path, archive: &[u8]) { std::fs::write(out.join("copilot_cli.archive"), archive) .expect("failed to write copilot_cli.archive"); - let generated = format!( - r#"// Auto-generated by github-copilot-sdk build.rs. Do not edit. + let generated = r#"// Auto-generated by github-copilot-sdk build.rs. Do not edit. pub(super) static CLI_ARCHIVE: &[u8] = include_bytes!("copilot_cli.archive"); -pub(super) static CLI_VERSION: &str = "{version}"; -pub(super) static CLI_BINARY_NAME: &str = "{binary_name}"; -"#, - ); +"#; std::fs::write(out.join("bundled_cli.rs"), generated).expect("failed to write bundled_cli.rs"); } @@ -230,27 +284,26 @@ fn target_platform() -> Option { } } -/// Dev-mode extraction: write the single binary entry from `archive` to -/// `/github-copilot-sdk/cli//`. +/// Write the single binary entry from `archive` to +/// `/` and return the resulting path. /// Idempotent — returns the existing path if a previous build already -/// populated the target. Mirrors `embeddedcli::install` exactly (including -/// `sanitize_version`) so the dev-mode and embed-mode paths share the same -/// cache layout on every platform. -fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBuf { - let cache_root = dirs::cache_dir().unwrap_or_else(std::env::temp_dir); - let install_dir = cache_root - .join("github-copilot-sdk") - .join("cli") - .join(sanitize_version(version)); +/// populated the target. +/// +/// Uses file-level staging + atomic rename so a concurrent reader during +/// a parallel `cargo build` race never observes a partially-written +/// binary. `fs::rename` for files is atomic on both Unix and Windows +/// (Windows uses `MoveFileExW` with `MOVEFILE_REPLACE_EXISTING`); for +/// directories it is not, which is why we stage at file granularity. +fn extract_to_cache(archive: &[u8], install_dir: &Path, platform: Platform) -> PathBuf { let final_path = install_dir.join(platform.binary_name); - // Per-version install dir means a present file at this path is the - // binary we want — no need to re-extract. Matches embeddedcli::install. + // Caller already gated on `final_path.is_file()`; this is a safety + // net for any future caller that forgets. if final_path.is_file() { return final_path; } - std::fs::create_dir_all(&install_dir).unwrap_or_else(|e| { + std::fs::create_dir_all(install_dir).unwrap_or_else(|e| { panic!( "failed to create install dir {}: {e}", install_dir.display() @@ -258,14 +311,49 @@ fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBu }); let bytes = extract_binary_bytes(archive, platform); - std::fs::write(&final_path, &bytes) - .unwrap_or_else(|e| panic!("failed to write {}: {e}", final_path.display())); + + // Staging file is a sibling of the final binary so the rename stays + // on the same filesystem (cross-fs rename is not atomic). PID + nanos + // disambiguate concurrent builds racing on the same cache. + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + let staging_path = install_dir.join(format!( + ".{}.staging-{}-{nanos}", + platform.binary_name, + std::process::id(), + )); + + if let Err(e) = std::fs::write(&staging_path, &bytes) { + let _ = std::fs::remove_file(&staging_path); + panic!( + "failed to write staging file {}: {e}", + staging_path.display() + ); + } #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; - std::fs::set_permissions(&final_path, std::fs::Permissions::from_mode(0o755)) - .unwrap_or_else(|e| panic!("failed to chmod {}: {e}", final_path.display())); + if let Err(e) = + std::fs::set_permissions(&staging_path, std::fs::Permissions::from_mode(0o755)) + { + let _ = std::fs::remove_file(&staging_path); + panic!("failed to chmod {}: {e}", staging_path.display()); + } + } + + // Atomic file-replace on both Unix and Windows. If a concurrent build + // already produced the same file the rename overwrites it; the bytes + // are SHA-verified-identical so replacement is safe. + if let Err(e) = std::fs::rename(&staging_path, &final_path) { + let _ = std::fs::remove_file(&staging_path); + panic!( + "failed to rename {} -> {}: {e}", + staging_path.display(), + final_path.display() + ); } final_path @@ -273,8 +361,8 @@ fn extract_to_cache(archive: &[u8], version: &str, platform: Platform) -> PathBu /// Replace characters outside `[a-zA-Z0-9._-]` with `_` so the version /// string is always safe to use as a path component. Kept in sync with -/// `embeddedcli::sanitize_version` so embed-mode and dev-mode resolve to -/// the same cache directory for any given version. +/// `embeddedcli::sanitize_version` and `resolve::sanitize_version` so all +/// three resolve to the same cache directory for any given version. fn sanitize_version(version: &str) -> String { version .chars() @@ -286,9 +374,10 @@ fn sanitize_version(version: &str) -> String { } /// Extract the single `binary_name` entry from the release archive. Reused -/// between embed mode's `verify_binary_present_in_archive` and dev mode's -/// `extract_to_cache`. Panics if the entry isn't found — callers have -/// already invoked `verify_binary_present_in_archive`. +/// between embed mode's `verify_binary_present_in_archive` and the +/// `extract_to_cache` path used when `bundled-cli` is off. Panics if the +/// entry isn't found — callers have already invoked +/// `verify_binary_present_in_archive`. fn extract_binary_bytes(archive: &[u8], platform: Platform) -> Vec { if platform.asset_name.ends_with(".zip") { let cursor = std::io::Cursor::new(archive); @@ -330,7 +419,7 @@ fn extract_binary_bytes(archive: &[u8], platform: Platform) -> Vec { } } panic!( - "binary `{}` not found in archive `{}` (extract_to_cache)", + "binary `{}` not found in archive `{}`", platform.binary_name, platform.asset_name ); } diff --git a/rust/src/embeddedcli.rs b/rust/src/embeddedcli.rs index fe571616e..504edbf67 100644 --- a/rust/src/embeddedcli.rs +++ b/rust/src/embeddedcli.rs @@ -26,13 +26,30 @@ use tracing::{info, warn}; // When the `bundled-cli` cargo feature is enabled and the target platform is // supported, build.rs generates `bundled_cli.rs` exposing the raw archive -// bytes plus the version + binary-name constants the runtime install path -// consumes. +// bytes. The CLI version is exposed crate-wide via the +// `cargo:rustc-env=COPILOT_SDK_CLI_VERSION` emit (see `build.rs`), and the +// binary name is OS-derived — so no other generated constants are needed. #[cfg(has_bundled_cli)] mod build_time { include!(concat!(env!("OUT_DIR"), "/bundled_cli.rs")); } +// Pinned at build time and consumed by both install paths (path/install_at). +// Sourced from the unconditional `COPILOT_SDK_CLI_VERSION` env emit in +// build.rs — the single source of truth for "what version did build.rs +// target", shared with the runtime resolver used when `bundled-cli` is off. +#[cfg(has_bundled_cli)] +const CLI_VERSION: &str = env!("COPILOT_SDK_CLI_VERSION"); + +// OS-derived; matches the release-archive entry name and the on-disk +// filename. No need to bake this — `cfg(windows)` reflects the target +// the runtime is running on, which by definition is the same target +// build.rs targeted. +#[cfg(all(has_bundled_cli, windows))] +const CLI_BINARY_NAME: &str = "copilot.exe"; +#[cfg(all(has_bundled_cli, not(windows)))] +const CLI_BINARY_NAME: &str = "copilot"; + #[cfg(feature = "bundled-cli")] static INSTALLED_PATH: OnceLock> = OnceLock::new(); @@ -56,10 +73,10 @@ pub(crate) fn path() -> Option { .get_or_init(|| { #[cfg(has_bundled_cli)] { - let dir = default_install_dir(build_time::CLI_VERSION); + let dir = default_install_dir(CLI_VERSION); match install(&dir, build_time::CLI_ARCHIVE) { Ok(path) => { - info!(path = %path.display(), version = build_time::CLI_VERSION, "embedded CLI installed"); + info!(path = %path.display(), version = CLI_VERSION, "embedded CLI installed"); return Some(path); } Err(e) => { @@ -85,7 +102,7 @@ pub(crate) fn install_at(extract_dir: &Path) -> Option { { match install(extract_dir, build_time::CLI_ARCHIVE) { Ok(path) => { - info!(path = %path.display(), version = build_time::CLI_VERSION, "embedded CLI installed"); + info!(path = %path.display(), version = CLI_VERSION, "embedded CLI installed"); return Some(path); } Err(e) => { @@ -117,7 +134,7 @@ fn install(install_dir: &Path, archive: &[u8]) -> Result Result, } @@ -832,9 +834,12 @@ impl ClientOptions { /// Override the directory where the bundled CLI binary is extracted on /// first use. See [`Self::bundled_cli_extract_dir`]. /// - /// This only affects builds with the default `bundled-cli` feature enabled. - /// In `default-features = false` dev builds, build.rs has already extracted - /// the CLI and this option is ignored. + /// Only applies when the `bundled-cli` cargo feature is on. With + /// `bundled-cli` disabled (`default-features = false`), set + /// `COPILOT_CLI_EXTRACT_DIR` to relocate the build-time extraction + /// (honored symmetrically at build and runtime), or use + /// [`CliProgram::Path`] / `COPILOT_CLI_PATH` to point at a different + /// binary at runtime. pub fn with_bundled_cli_extract_dir(mut self, dir: impl Into) -> Self { self.bundled_cli_extract_dir = Some(dir.into()); self diff --git a/rust/src/resolve.rs b/rust/src/resolve.rs index 2d36b56d9..cd813407a 100644 --- a/rust/src/resolve.rs +++ b/rust/src/resolve.rs @@ -5,10 +5,10 @@ //! 1. An explicit path supplied by the application via //! [`CliProgram::Path`](crate::CliProgram::Path). //! 2. The `COPILOT_CLI_PATH` environment variable. -//! 3. The bundled CLI embedded in this crate at build time (gated on the -//! default `bundled-cli` cargo feature). +//! 3. The bundled CLI embedded in this crate at build time (when the +//! `bundled-cli` cargo feature is on, the default). //! 4. The build-time-extracted CLI in the per-user cache (when -//! `bundled-cli` is disabled, i.e. dev builds). +//! `bundled-cli` is off). //! //! There is no PATH scanning and no walking of standard install locations. //! If none of the above resolves to a real file, @@ -25,9 +25,14 @@ use crate::Error; /// Resolve the CLI binary, optionally overriding the directory the bundled /// CLI is extracted to. Called by `Client::start` to thread /// `ClientOptions::bundled_cli_extract_dir` through to -/// `embeddedcli::install_at`. `extract_dir` only affects embed mode — in -/// dev mode the binary path is baked in at build time and `extract_dir` -/// is ignored (there's no archive to re-extract). +/// `embeddedcli::install_at`. `extract_dir` only applies when the +/// `bundled-cli` feature is on — with it off the binary lives at a +/// build-time-known conventional location and `extract_dir` is ignored +/// (there's no archive to re-extract; pointing the lookup elsewhere +/// would be exactly equivalent to setting `CliProgram::Path`). Set +/// `COPILOT_CLI_EXTRACT_DIR` at build time to relocate that extraction; +/// the same env var is honored at runtime to find binaries written +/// under it. pub(crate) fn copilot_binary_with_extract_dir( extract_dir: Option<&Path>, ) -> Result { @@ -56,7 +61,7 @@ pub(crate) fn copilot_binary_with_extract_dir( #[cfg(not(feature = "bundled-cli"))] { let _ = extract_dir; - if let Some(path) = dev_cli_path() { + if let Some(path) = extracted_cli_path() { return Ok(path); } } @@ -72,22 +77,65 @@ pub(crate) fn copilot_binary_with_extract_dir( /// Path to the CLI extracted into the per-user cache by `build.rs` when /// `bundled-cli` is disabled. Returns `None` if the cached file is missing -/// (e.g. the user deleted the cache after building). -#[cfg(not(feature = "bundled-cli"))] -fn dev_cli_path() -> Option { - // `has_dev_cli` is emitted by build.rs only when it successfully extracted - // the CLI for a supported target. On unsupported targets (where - // target_platform() returns None) the cfg is absent. - #[cfg(has_dev_cli)] - { - let path = PathBuf::from(env!("COPILOT_CLI_DEV_PATH")); - if path.is_file() { - return Some(path); - } - warn!( - path = %path.display(), - "build-time-extracted CLI is missing from cache; rebuild the crate to re-extract" - ); +/// (e.g. the user deleted the cache after building, or built with +/// `COPILOT_SKIP_CLI_DOWNLOAD`). +/// +/// The path is recomputed from the build-time-baked +/// `COPILOT_SDK_CLI_VERSION`, the OS-derived binary name, and the +/// optional `COPILOT_CLI_EXTRACT_DIR` env var. This must match +/// `build.rs::extracted_install_dir` exactly — both sides implement the +/// same convention. We deliberately don't bake the resolved path into +/// the crate at build time: an absolute path leaks the build machine's +/// `$HOME` / `$LOCALAPPDATA` into the artifact, breaks sccache across +/// machines, and prevents copying `target/` between hosts. +#[cfg(all(not(feature = "bundled-cli"), has_extracted_cli))] +fn extracted_cli_path() -> Option { + let version = env!("COPILOT_SDK_CLI_VERSION"); + let binary = if cfg!(windows) { + "copilot.exe" + } else { + "copilot" + }; + + let dir = match env::var_os("COPILOT_CLI_EXTRACT_DIR") { + Some(custom) => PathBuf::from(custom), + None => dirs::cache_dir() + .unwrap_or_else(env::temp_dir) + .join("github-copilot-sdk") + .join("cli") + .join(sanitize_version(version)), + }; + + let path = dir.join(binary); + if path.is_file() { + return Some(path); } + warn!( + path = %path.display(), + "expected build-time-extracted CLI is missing; rebuild the crate or set COPILOT_CLI_PATH" + ); None } + +/// `has_extracted_cli` is absent when the target is unsupported or the +/// build opted out via `COPILOT_SKIP_CLI_DOWNLOAD`. In both cases there's +/// no binary to look up, so the resolver returns `None` immediately. +#[cfg(all(not(feature = "bundled-cli"), not(has_extracted_cli)))] +fn extracted_cli_path() -> Option { + None +} + +/// Replace characters outside `[a-zA-Z0-9._-]` with `_`. Kept in sync +/// with `build.rs::sanitize_version` and `embeddedcli::sanitize_version` +/// so all three resolve to the same cache directory for any given +/// version. +#[cfg(all(not(feature = "bundled-cli"), has_extracted_cli))] +fn sanitize_version(version: &str) -> String { + version + .chars() + .map(|c| match c { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '.' | '-' | '_' => c, + _ => '_', + }) + .collect() +} diff --git a/rust/tests/cli_resolution_test.rs b/rust/tests/cli_resolution_test.rs index a80c71297..72bebdcb2 100644 --- a/rust/tests/cli_resolution_test.rs +++ b/rust/tests/cli_resolution_test.rs @@ -1,10 +1,10 @@ //! Tests for the build-time and runtime CLI provisioning path. //! -//! Covers the `COPILOT_CLI_PATH` env override, the dev-mode (no -//! `bundled-cli`) build-time extracted binary, and the embed-mode lazy -//! extraction. Mutating `COPILOT_CLI_PATH` is process-global, so all such -//! tests use `serial_test` to avoid races with each other (and with the -//! e2e tests which also read it). +//! Covers the `COPILOT_CLI_PATH` env override, the build-time-extracted +//! binary used when `bundled-cli` is off, and the embed-mode lazy +//! extraction. Mutating env vars is process-global, so all such tests +//! use `serial_test` to avoid races with each other (and with the e2e +//! tests which also read them). use std::path::PathBuf; @@ -24,14 +24,7 @@ fn unset_env(key: &str) { } fn set_env(key: &str, value: &str) { - // SAFETY: these tests are serialized with #[serial(copilot_cli_path)] - // so no other test in this binary mutates COPILOT_CLI_PATH while - // we hold the lock. POSIX `setenv`/`unsetenv` are generally - // thread-safe on modern platforms, and we use `current_thread` - // tokio runtimes to avoid concurrent reads from worker threads. - // This doesn't satisfy the strict Rust 2024 safety contract - // (other tests in the binary may read env vars), but the practical - // race window is negligible. + // SAFETY: see `unset_env`. unsafe { std::env::set_var(key, value) }; } @@ -89,9 +82,9 @@ async fn stale_env_override_falls_through() { let result = Client::start(opts).await; unset_env("COPILOT_CLI_PATH"); - // In a normally-configured build (either bundled-cli or dev mode) the - // resolver should find a binary via the next source. Failing here - // would mean fallthrough is broken. + // In a normally-configured build (either `bundled-cli` on or off) + // the resolver should find a binary via the next source. Failing + // here would mean fallthrough is broken. if let Err(e) = &result { assert!( !matches!(e, Error::BinaryNotFound { .. }), @@ -100,37 +93,105 @@ async fn stale_env_override_falls_through() { } } -/// In dev mode (no `bundled-cli` feature) build.rs writes the extracted -/// binary into the per-user cache and emits its path as -/// `COPILOT_CLI_DEV_PATH`. The runtime resolver returns that path. -#[cfg(has_dev_cli)] +/// With `bundled-cli` off, `build.rs` extracts the binary into the +/// per-user cache and the runtime resolver recomputes its location from +/// `COPILOT_SDK_CLI_VERSION` + the OS-derived binary name. This test +/// mirrors that convention and asserts the file is on disk where the +/// resolver expects to find it. +#[cfg(all(not(feature = "bundled-cli"), has_extracted_cli))] #[test] -fn dev_mode_extracted_binary_exists() { - let path = PathBuf::from(env!("COPILOT_CLI_DEV_PATH")); +fn extracted_binary_present_at_conventional_path() { + let version = env!("COPILOT_SDK_CLI_VERSION"); + let binary = if cfg!(windows) { + "copilot.exe" + } else { + "copilot" + }; + let sanitized = sanitize_version_for_test(version); + let path = dirs::cache_dir() + .expect("platform cache dir") + .join("github-copilot-sdk") + .join("cli") + .join(sanitized) + .join(binary); assert!( path.is_file(), - "expected build.rs to extract the CLI to {} (dev mode)", + "expected build.rs to extract the CLI to {} (`bundled-cli` off)", path.display() ); +} - // Confirm the cache layout matches what runtime resolution expects. - let mut found = false; - let comps: Vec<_> = path.components().collect(); - for window in comps.windows(2) { - if let (std::path::Component::Normal(a), std::path::Component::Normal(b)) = - (&window[0], &window[1]) - && a.to_str() == Some("github-copilot-sdk") - && b.to_str() == Some("cli") - { - found = true; - break; - } +#[cfg(all(not(feature = "bundled-cli"), has_extracted_cli))] +fn sanitize_version_for_test(version: &str) -> String { + version + .chars() + .map(|c| match c { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '.' | '-' | '_' => c, + _ => '_', + }) + .collect() +} + +/// With `bundled-cli` off, the resolver locates the build-time-extracted +/// binary without any runtime configuration. Observed via +/// `Client::start`: any outcome other than `BinaryNotFound` means the +/// resolver succeeded. +#[cfg(all(not(feature = "bundled-cli"), has_extracted_cli))] +#[tokio::test(flavor = "current_thread")] +#[serial(copilot_cli_path)] +async fn unbundled_resolver_finds_extracted_binary() { + unset_env("COPILOT_CLI_PATH"); + unset_env("COPILOT_CLI_EXTRACT_DIR"); + + let opts = ClientOptions::default().with_program(CliProgram::Resolve); + let result = Client::start(opts).await; + if let Err(e) = result { + assert!( + !matches!(e, Error::BinaryNotFound { .. }), + "resolver returned BinaryNotFound with `bundled-cli` off: {e}" + ); } - assert!( - found, - "dev path {} does not contain the expected `github-copilot-sdk/cli/` segments", - path.display() +} + +/// With `bundled-cli` off, `COPILOT_CLI_EXTRACT_DIR` set at runtime +/// redirects the resolver to look directly under the named directory +/// (no per-version subdir, matching the build-time write semantics). +/// We place a fake `copilot[.exe]` there and assert the resolver picks +/// it up — failing here means the build-time / runtime convention has +/// drifted. +#[cfg(all(not(feature = "bundled-cli"), has_extracted_cli))] +#[tokio::test(flavor = "current_thread")] +#[serial(copilot_cli_path)] +async fn extract_dir_runtime_override_is_honored() { + let tmp = tempfile::tempdir().expect("create tempdir"); + let binary = if cfg!(windows) { + "copilot.exe" + } else { + "copilot" + }; + let fake = tmp.path().join(binary); + std::fs::write(&fake, b"").expect("write fake binary"); + + unset_env("COPILOT_CLI_PATH"); + set_env( + "COPILOT_CLI_EXTRACT_DIR", + tmp.path().to_str().expect("utf-8 tempdir path"), ); + + let opts = ClientOptions::default().with_program(CliProgram::Resolve); + let result = Client::start(opts).await; + + unset_env("COPILOT_CLI_EXTRACT_DIR"); + + if let Err(e) = result { + assert!( + !matches!(e, Error::BinaryNotFound { .. }), + "EXTRACT_DIR-redirected resolver returned BinaryNotFound: {e}" + ); + } + + drop(tmp); + let _ = fake; } /// Build-time version pin: `cli-version.txt` (when present) must be a From 7616acf7c6f1a171a488820360f772ffa5aa4dc6 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 27 May 2026 06:44:31 -0700 Subject: [PATCH 11/12] Log extracted CLI path when extract_to_cache actually writes The download path emits a 'Downloading ' cargo warning on cache miss, but the extract step that follows is silent. With the separate download cache gone (extracted binary is the cache), a first build gave no surface telling the contributor where the ~160 MB binary landed. Mirror the existing warning style with a one-line 'Extracted Copilot CLI to ' after the atomic rename succeeds. Quiet on the hot path: caller short-circuits on is_file() so this only fires on a true cache miss. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/build.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/rust/build.rs b/rust/build.rs index be4f7c38d..4d15c8495 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -356,6 +356,15 @@ fn extract_to_cache(archive: &[u8], install_dir: &Path, platform: Platform) -> P ); } + // Surface where the binary landed so contributors can find it. Quiet + // on the hot path: the caller's `is_file()` short-circuit (and the + // safety net at the top of this function) means this only fires on a + // true cache miss. + println!( + "cargo:warning=Extracted Copilot CLI to {}", + final_path.display() + ); + final_path } From 4e65091e8ed64e2c46e8e8375ac420ecacb66264 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Wed, 27 May 2026 06:49:36 -0700 Subject: [PATCH 12/12] Gate package-lock.json rerun-if-changed on lockfile presence Cargo treats a missing rerun-if-changed path as 'always rerun', so the unconditional 'cargo:rerun-if-changed=../nodejs/package-lock.json' declaration was forcing build.rs to re-run on every cargo build for consumers without a sibling nodejs/ directory (vendored slots, published crates). cli-version.txt is the source of truth in those deployments; the lockfile is only a fallback inside this repo. Now: declare the lockfile rerun only when the file actually exists. In-tree contributor builds keep re-running when @github/copilot is bumped via the lockfile; vendored consumers get true zero-cost rebuilds when nothing has changed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/build.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/rust/build.rs b/rust/build.rs index 4d15c8495..64659107e 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -11,7 +11,22 @@ fn main() { println!("cargo::rustc-check-cfg=cfg(has_bundled_cli)"); println!("cargo::rustc-check-cfg=cfg(has_extracted_cli)"); println!("cargo:rerun-if-changed=cli-version.txt"); - println!("cargo:rerun-if-changed=../nodejs/package-lock.json"); + + // Only declare the lockfile rerun when the lockfile actually exists. + // Cargo treats `rerun-if-changed` for a missing path as "always rerun" + // — so unconditionally declaring this on consumers without a sibling + // `nodejs/` (vendored slots, published crates) would force build.rs + // to re-run on every `cargo build` even when nothing has changed. + // The lockfile path is only the source-of-truth in this repo's + // contributor builds; everywhere else `cli-version.txt` is canonical. + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR is set"); + let lockfile = Path::new(&manifest_dir) + .join("..") + .join("nodejs") + .join("package-lock.json"); + if lockfile.is_file() { + println!("cargo:rerun-if-changed={}", lockfile.display()); + } // Hard opt-out: disable the entire download / bundle / cache mechanism // in one step. For consumers who always supply the CLI via