From d742fff11c63e8248317174154dea93ba76d62ae Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Jun 2026 17:05:19 +0000 Subject: [PATCH 1/6] Make kubectl postCreateCommand non-fatal to fix devcontainer install --- .devcontainer/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ee38034..3d2e85c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -38,7 +38,7 @@ // "forwardPorts": [], // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "kubectl version --client --short 2>/dev/null || kubectl version --client 2>/dev/null", + "postCreateCommand": "kubectl version --client 2>/dev/null || true", // Configure tool-specific properties. "customizations": { From 2b784a6fda1b5a827ca1d1868c16f52578f717ed Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Jun 2026 17:15:15 +0000 Subject: [PATCH 2/6] CI: run devcontainer up to exercise container startup and lifecycle --- .github/workflows/build-containers.yml | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-containers.yml b/.github/workflows/build-containers.yml index 24ac6ca..d46f048 100644 --- a/.github/workflows/build-containers.yml +++ b/.github/workflows/build-containers.yml @@ -39,8 +39,26 @@ jobs: run: | npm install -g @devcontainers/cli - - name: Build devcontainer + # `devcontainer build` only builds the image (Dockerfile + features); it does + # NOT create the container or run lifecycle commands such as postCreateCommand. + # Use `devcontainer up` so the container is actually started and the lifecycle + # commands run, which catches startup failures the plain build step misses. + - name: Start devcontainer run: | - echo "Building devcontainer..." - devcontainer build --workspace-folder . - echo "Devcontainer build completed successfully" \ No newline at end of file + echo "Starting devcontainer (build + create + lifecycle commands)..." + devcontainer up --workspace-folder . + echo "Devcontainer started successfully" + + - name: Smoke test devcontainer + run: | + echo "Running smoke test inside the devcontainer..." + devcontainer exec --workspace-folder . bash -lc 'echo "Devcontainer is up and exec works"' + + - name: Tear down devcontainer + if: always() + run: | + container_id=$(docker ps -aq --filter "label=devcontainer.local_folder=${GITHUB_WORKSPACE}") + if [ -n "$container_id" ]; then + echo "Removing devcontainer ($container_id)..." + docker rm -f $container_id + fi \ No newline at end of file From 0ad6ecaaaa50019dedf1b8347a03aee9b8148aae Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Jun 2026 17:27:36 +0000 Subject: [PATCH 3/6] Disable updateRemoteUserUID to fix devcontainer up GID 1001 collision --- .devcontainer/devcontainer.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 3d2e85c..cf4b08a 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -31,6 +31,12 @@ }, "overrideFeatureInstallOrder": ["ghcr.io/devcontainers/features/rust"], + // Don't remap the container user's UID/GID to match the host user. The sshd + // feature creates an `ssh` group at GID 1001, which collides with the host + // UID/GID (1001) used by CI runners. The CLI's automatic remap then fails with + // `groupmod: GID '1001' already exists`, breaking `devcontainer up`. + "updateRemoteUserUID": false, + // Features to add to the dev container. More info: https://containers.dev/features. // "features": {}, From d7cad68d35ab253cbc0a3b517593951828b318f1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Jun 2026 17:45:08 +0000 Subject: [PATCH 4/6] Add envbuilder CI job + verification, make node user explicit --- .devcontainer/Dockerfile | 6 ++ .devcontainer/devcontainer.json | 12 +++- .github/workflows/build-containers.yml | 29 +++++++-- scripts/test-envbuilder.sh | 56 +++++++++++++++++ scripts/verify-devcontainer.sh | 86 ++++++++++++++++++++++++++ 5 files changed, 184 insertions(+), 5 deletions(-) create mode 100755 scripts/test-envbuilder.sh create mode 100755 scripts/verify-devcontainer.sh diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 5d2f340..0482768 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -19,3 +19,9 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright RUN npm install -g playwright \ && playwright install --with-deps chromium \ && chmod -R a+rx /ms-playwright + +# Default to the non-root `node` user. envbuilder (used by Coder) picks its target +# user from the last `USER` directive when `containerUser`/`remoteUser` are not +# applied from image metadata, so make the intended user explicit here to avoid +# dropping into a root shell. +USER node diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index cf4b08a..92b0684 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -31,10 +31,18 @@ }, "overrideFeatureInstallOrder": ["ghcr.io/devcontainers/features/rust"], + // Connect as the `node` user. Set this explicitly rather than relying on the + // base image's `remoteUser` metadata: envbuilder (used by Coder) does not honor + // image-metadata `remoteUser` and falls back to the image's `USER` (root), + // which would otherwise drop Coder workspaces into a root shell. + "remoteUser": "node", + "containerUser": "node", + // Don't remap the container user's UID/GID to match the host user. The sshd // feature creates an `ssh` group at GID 1001, which collides with the host // UID/GID (1001) used by CI runners. The CLI's automatic remap then fails with // `groupmod: GID '1001' already exists`, breaking `devcontainer up`. + // (envbuilder ignores this key; it only affects the devcontainer CLI.) "updateRemoteUserUID": false, // Features to add to the dev container. More info: https://containers.dev/features. @@ -44,7 +52,9 @@ // "forwardPorts": [], // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "kubectl version --client 2>/dev/null || true", + // The sentinel file lets CI verify that the lifecycle actually ran to + // completion (both the devcontainer CLI and envbuilder honor this hook). + "postCreateCommand": "kubectl version --client 2>/dev/null || true; touch \"$HOME/.devcontainer-postcreate-done\"", // Configure tool-specific properties. "customizations": { diff --git a/.github/workflows/build-containers.yml b/.github/workflows/build-containers.yml index d46f048..b858960 100644 --- a/.github/workflows/build-containers.yml +++ b/.github/workflows/build-containers.yml @@ -9,7 +9,7 @@ jobs: name: Build Dockerfile runs-on: namespace-profile-devcontainer timeout-minutes: 20 - + steps: - name: Checkout code uses: actions/checkout@v6 @@ -23,11 +23,32 @@ jobs: docker build -t devcontainer-dockerfile .devcontainer/ echo "Dockerfile build completed successfully" + build-envbuilder: + name: Build Envbuilder + runs-on: namespace-profile-devcontainer + timeout-minutes: 30 + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + # Coder builds workspaces with envbuilder, NOT the devcontainer CLI. The two + # diverge on user selection, UID/GID remapping and feature install order, so + # the CLI job below can pass while a Coder workspace fails to start. This job + # exercises the envbuilder path and runs scripts/verify-devcontainer.sh as the + # post-build init script to assert: user is `node` (not root), tmux works, and + # the lifecycle (postCreateCommand) ran to completion. A non-zero exit from the + # init script fails the job. + - name: Build and verify with envbuilder + run: | + echo "Building dev container with envbuilder (Coder's builder)..." + scripts/test-envbuilder.sh + build-devcontainer: name: Build Devcontainer runs-on: namespace-profile-devcontainer timeout-minutes: 30 - + steps: - name: Checkout code uses: actions/checkout@v6 @@ -60,5 +81,5 @@ jobs: container_id=$(docker ps -aq --filter "label=devcontainer.local_folder=${GITHUB_WORKSPACE}") if [ -n "$container_id" ]; then echo "Removing devcontainer ($container_id)..." - docker rm -f $container_id - fi \ No newline at end of file + docker rm -f "$container_id" + fi diff --git a/scripts/test-envbuilder.sh b/scripts/test-envbuilder.sh new file mode 100755 index 0000000..6662818 --- /dev/null +++ b/scripts/test-envbuilder.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# +# Build this dev container the same way Coder does (with envbuilder) and run the +# verification checks, so you can reproduce and debug Coder-only startup issues +# (e.g. "drops to root", "tmux won't run", "something isn't completing") +# locally and read the full envbuilder build log. +# +# Coder builds workspaces with envbuilder, NOT the `@devcontainers/cli`. The two +# diverge on user selection, UID/GID remapping and feature install order, so the +# CLI can succeed while Coder fails. This script exercises the envbuilder path. +# +# Usage: +# scripts/test-envbuilder.sh +# +# Requirements: +# - docker +# +# Optional environment variables: +# ENVBUILDER_IMAGE envbuilder image to use (default: ghcr.io/coder/envbuilder:latest) +# CACHE_DIR host directory for the envbuilder layer cache (speeds up reruns) + +set -euo pipefail + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +envbuilder_image="${ENVBUILDER_IMAGE:-ghcr.io/coder/envbuilder:latest}" + +if ! command -v docker >/dev/null 2>&1; then + echo "error: docker is required but not found on PATH" >&2 + exit 1 +fi + +# The repo is mounted into the envbuilder workspace. envbuilder discovers +# `.devcontainer/devcontainer.json` automatically. After the build + lifecycle +# commands complete, it runs ENVBUILDER_INIT_SCRIPT as the target user; we point +# that at the verification script (resolved from the mounted workspace). +workspace_folder="/workspaces/devcontainer" +init_script="bash ${workspace_folder}/scripts/verify-devcontainer.sh" + +docker_args=( + run --rm + -e "ENVBUILDER_WORKSPACE_FOLDER=${workspace_folder}" + -e "ENVBUILDER_INIT_SCRIPT=${init_script}" + -v "${repo_root}:${workspace_folder}" +) + +# Optionally persist the layer cache between runs for faster iteration. +if [ -n "${CACHE_DIR:-}" ]; then + mkdir -p "${CACHE_DIR}" + docker_args+=(-v "${CACHE_DIR}:/cache" -e "ENVBUILDER_LAYER_CACHE_DIR=/cache") +fi + +docker_args+=("${envbuilder_image}") + +echo "=== Running envbuilder ($envbuilder_image) on $repo_root ===" +echo "docker ${docker_args[*]}" +exec docker "${docker_args[@]}" diff --git a/scripts/verify-devcontainer.sh b/scripts/verify-devcontainer.sh new file mode 100755 index 0000000..9595a1b --- /dev/null +++ b/scripts/verify-devcontainer.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +# +# Verify that the built dev container started up correctly. +# +# This runs *inside* the container after the build and lifecycle commands have +# completed. It is used both as the envbuilder `ENVBUILDER_INIT_SCRIPT` (the +# source of truth for how Coder builds this workspace) and can be run by hand +# inside any container started from this dev container. +# +# It asserts the things that have previously broken on Coder/envbuilder: +# 1. the effective user is `node`, not root, +# 2. tmux is on PATH and can start a session that reads ~/.tmux.conf, +# 3. the lifecycle ran to completion (postCreateCommand sentinel exists). +# +# Any failed check exits non-zero so CI fails loudly instead of silently +# dropping you into a broken shell. + +set -euo pipefail + +failures=0 + +fail() { + echo "FAIL: $*" >&2 + failures=$((failures + 1)) +} + +pass() { + echo "PASS: $*" +} + +echo "=== Verifying dev container startup ===" +echo "whoami: $(whoami)" +echo "id: $(id)" +echo "HOME: ${HOME:-}" + +# 1. Effective user must be `node`, not root. +if [ "$(id -un)" = "node" ]; then + pass "running as expected user 'node'" +else + fail "expected to run as 'node' but running as '$(id -un)' (uid=$(id -u))" +fi + +# 2. tmux must be on PATH and able to start a session that reads ~/.tmux.conf. +if command -v tmux >/dev/null 2>&1; then + pass "tmux is on PATH ($(command -v tmux))" + + session="verify-$$" + tmux_conf="${HOME}/.tmux.conf" + + if [ -f "$tmux_conf" ]; then + pass "tmux config present at $tmux_conf" + else + fail "tmux config missing at $tmux_conf" + fi + + # Start a detached session, explicitly loading the user's config so a broken + # config (or a missing HOME) fails the check rather than silently starting a + # default session. + if tmux -f "$tmux_conf" new-session -d -s "$session" 'sleep 5' 2>/tmp/tmux-verify.err; then + if tmux has-session -t "$session" 2>/dev/null; then + pass "tmux started a session using $tmux_conf" + else + fail "tmux session '$session' did not stay alive" + fi + tmux kill-session -t "$session" 2>/dev/null || true + else + fail "tmux could not start a session: $(cat /tmp/tmux-verify.err 2>/dev/null)" + fi +else + fail "tmux is not on PATH" +fi + +# 3. The lifecycle (postCreateCommand) must have completed. +sentinel="${HOME}/.devcontainer-postcreate-done" +if [ -f "$sentinel" ]; then + pass "lifecycle sentinel present ($sentinel)" +else + fail "lifecycle sentinel missing ($sentinel); postCreateCommand did not complete" +fi + +echo "=== Verification complete ===" +if [ "$failures" -ne 0 ]; then + echo "$failures check(s) failed." >&2 + exit 1 +fi +echo "All checks passed." From 453204d9df35306031dfd7cc614c9890e796148c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Jun 2026 17:46:29 +0000 Subject: [PATCH 5/6] Address review: scope workflow token permissions, use mktemp in verify --- .github/workflows/build-containers.yml | 3 +++ scripts/verify-devcontainer.sh | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-containers.yml b/.github/workflows/build-containers.yml index b858960..7e3afd2 100644 --- a/.github/workflows/build-containers.yml +++ b/.github/workflows/build-containers.yml @@ -4,6 +4,9 @@ on: pull_request: branches: [ main ] +permissions: + contents: read + jobs: build-dockerfile: name: Build Dockerfile diff --git a/scripts/verify-devcontainer.sh b/scripts/verify-devcontainer.sh index 9595a1b..f2148c4 100755 --- a/scripts/verify-devcontainer.sh +++ b/scripts/verify-devcontainer.sh @@ -46,6 +46,7 @@ if command -v tmux >/dev/null 2>&1; then session="verify-$$" tmux_conf="${HOME}/.tmux.conf" + tmux_err="$(mktemp)" if [ -f "$tmux_conf" ]; then pass "tmux config present at $tmux_conf" @@ -56,7 +57,7 @@ if command -v tmux >/dev/null 2>&1; then # Start a detached session, explicitly loading the user's config so a broken # config (or a missing HOME) fails the check rather than silently starting a # default session. - if tmux -f "$tmux_conf" new-session -d -s "$session" 'sleep 5' 2>/tmp/tmux-verify.err; then + if tmux -f "$tmux_conf" new-session -d -s "$session" 'sleep 5' 2>"$tmux_err"; then if tmux has-session -t "$session" 2>/dev/null; then pass "tmux started a session using $tmux_conf" else @@ -64,8 +65,9 @@ if command -v tmux >/dev/null 2>&1; then fi tmux kill-session -t "$session" 2>/dev/null || true else - fail "tmux could not start a session: $(cat /tmp/tmux-verify.err 2>/dev/null)" + fail "tmux could not start a session: $(cat "$tmux_err" 2>/dev/null)" fi + rm -f "$tmux_err" else fail "tmux is not on PATH" fi From 718d35103d190ae086e2574b53badaa963a5708f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Jun 2026 18:04:45 +0000 Subject: [PATCH 6/6] Set _REMOTE_USER_HOME/_CONTAINER_USER_HOME so envbuilder feature installs work --- .devcontainer/Dockerfile | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 0482768..072ae42 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -20,6 +20,21 @@ RUN npm install -g playwright \ && playwright install --with-deps chromium \ && chmod -R a+rx /ms-playwright +# Tell dev container features where the target user's home directory is. +# The devcontainer CLI passes `_REMOTE_USER_HOME`/`_CONTAINER_USER_HOME` to every +# feature's install.sh, but envbuilder (used by Coder) only sets `_REMOTE_USER` +# and `_CONTAINER_USER` -- it leaves the *_HOME variables empty. Features that +# rely on them then break: e.g. the claude-code feature runs +# `cp "$_REMOTE_USER_HOME/.local/bin/claude" /usr/local/bin/claude`, which under +# envbuilder expands to `cp /.local/bin/claude ...` and fails the whole build. +# Supplying the values here (as build ARGs, so they don't leak into the running +# container's environment) keeps feature installs working on the envbuilder path. +# ARGs declared here remain in scope for the feature install steps envbuilder +# appends after this Dockerfile, but reset for the devcontainer CLI (which sets +# these variables itself), so this is a no-op there. +ARG _REMOTE_USER_HOME=/home/node +ARG _CONTAINER_USER_HOME=/home/node + # Default to the non-root `node` user. envbuilder (used by Coder) picks its target # user from the last `USER` directive when `containerUser`/`remoteUser` are not # applied from image metadata, so make the intended user explicit here to avoid