Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,24 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
RUN npm install -g playwright \
&& playwright install --with-deps chromium \
&& chmod -R a+rx /ms-playwright

# Tell dev container features where the target user's home directory is.
# The devcontainer CLI passes `_REMOTE_USER_HOME`/`_CONTAINER_USER_HOME` to every
# feature's install.sh, but envbuilder (used by Coder) only sets `_REMOTE_USER`
# and `_CONTAINER_USER` -- it leaves the *_HOME variables empty. Features that
# rely on them then break: e.g. the claude-code feature runs
# `cp "$_REMOTE_USER_HOME/.local/bin/claude" /usr/local/bin/claude`, which under
# envbuilder expands to `cp /.local/bin/claude ...` and fails the whole build.
# Supplying the values here (as build ARGs, so they don't leak into the running
# container's environment) keeps feature installs working on the envbuilder path.
# ARGs declared here remain in scope for the feature install steps envbuilder
# appends after this Dockerfile, but reset for the devcontainer CLI (which sets
# these variables itself), so this is a no-op there.
ARG _REMOTE_USER_HOME=/home/node
ARG _CONTAINER_USER_HOME=/home/node

# Default to the non-root `node` user. envbuilder (used by Coder) picks its target
# user from the last `USER` directive when `containerUser`/`remoteUser` are not
# applied from image metadata, so make the intended user explicit here to avoid
# dropping into a root shell.
USER node
18 changes: 17 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,30 @@
},
"overrideFeatureInstallOrder": ["ghcr.io/devcontainers/features/rust"],

// Connect as the `node` user. Set this explicitly rather than relying on the
// base image's `remoteUser` metadata: envbuilder (used by Coder) does not honor
// image-metadata `remoteUser` and falls back to the image's `USER` (root),
// which would otherwise drop Coder workspaces into a root shell.
"remoteUser": "node",
"containerUser": "node",

// Don't remap the container user's UID/GID to match the host user. The sshd
// feature creates an `ssh` group at GID 1001, which collides with the host
// UID/GID (1001) used by CI runners. The CLI's automatic remap then fails with
// `groupmod: GID '1001' already exists`, breaking `devcontainer up`.
// (envbuilder ignores this key; it only affects the devcontainer CLI.)
"updateRemoteUserUID": false,

// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "kubectl version --client --short 2>/dev/null || kubectl version --client 2>/dev/null",
// The sentinel file lets CI verify that the lifecycle actually ran to
// completion (both the devcontainer CLI and envbuilder honor this hook).
"postCreateCommand": "kubectl version --client 2>/dev/null || true; touch \"$HOME/.devcontainer-postcreate-done\"",

// Configure tool-specific properties.
"customizations": {
Expand Down
54 changes: 48 additions & 6 deletions .github/workflows/build-containers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ on:
pull_request:
branches: [ main ]

permissions:
contents: read

jobs:
build-dockerfile:
name: Build Dockerfile
runs-on: namespace-profile-devcontainer
timeout-minutes: 20

steps:
- name: Checkout code
uses: actions/checkout@v6
Expand All @@ -23,11 +26,32 @@ jobs:
docker build -t devcontainer-dockerfile .devcontainer/
echo "Dockerfile build completed successfully"

build-envbuilder:
name: Build Envbuilder
runs-on: namespace-profile-devcontainer
timeout-minutes: 30

steps:
- name: Checkout code
uses: actions/checkout@v6

# Coder builds workspaces with envbuilder, NOT the devcontainer CLI. The two
# diverge on user selection, UID/GID remapping and feature install order, so
# the CLI job below can pass while a Coder workspace fails to start. This job
# exercises the envbuilder path and runs scripts/verify-devcontainer.sh as the
# post-build init script to assert: user is `node` (not root), tmux works, and
# the lifecycle (postCreateCommand) ran to completion. A non-zero exit from the
# init script fails the job.
- name: Build and verify with envbuilder
run: |
echo "Building dev container with envbuilder (Coder's builder)..."
scripts/test-envbuilder.sh

build-devcontainer:
name: Build Devcontainer
runs-on: namespace-profile-devcontainer
timeout-minutes: 30

steps:
- name: Checkout code
uses: actions/checkout@v6
Expand All @@ -39,8 +63,26 @@ jobs:
run: |
npm install -g @devcontainers/cli

- name: Build devcontainer
# `devcontainer build` only builds the image (Dockerfile + features); it does
# NOT create the container or run lifecycle commands such as postCreateCommand.
# Use `devcontainer up` so the container is actually started and the lifecycle
# commands run, which catches startup failures the plain build step misses.
- name: Start devcontainer
run: |
echo "Starting devcontainer (build + create + lifecycle commands)..."
devcontainer up --workspace-folder .
echo "Devcontainer started successfully"

- name: Smoke test devcontainer
run: |
echo "Running smoke test inside the devcontainer..."
devcontainer exec --workspace-folder . bash -lc 'echo "Devcontainer is up and exec works"'

- name: Tear down devcontainer
if: always()
run: |
echo "Building devcontainer..."
devcontainer build --workspace-folder .
echo "Devcontainer build completed successfully"
container_id=$(docker ps -aq --filter "label=devcontainer.local_folder=${GITHUB_WORKSPACE}")
if [ -n "$container_id" ]; then
echo "Removing devcontainer ($container_id)..."
docker rm -f "$container_id"
fi
56 changes: 56 additions & 0 deletions scripts/test-envbuilder.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env bash
#
# Build this dev container the same way Coder does (with envbuilder) and run the
# verification checks, so you can reproduce and debug Coder-only startup issues
# (e.g. "drops to root", "tmux won't run", "something isn't completing")
# locally and read the full envbuilder build log.
#
# Coder builds workspaces with envbuilder, NOT the `@devcontainers/cli`. The two
# diverge on user selection, UID/GID remapping and feature install order, so the
# CLI can succeed while Coder fails. This script exercises the envbuilder path.
#
# Usage:
# scripts/test-envbuilder.sh
#
# Requirements:
# - docker
#
# Optional environment variables:
# ENVBUILDER_IMAGE envbuilder image to use (default: ghcr.io/coder/envbuilder:latest)
# CACHE_DIR host directory for the envbuilder layer cache (speeds up reruns)

set -euo pipefail

repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
envbuilder_image="${ENVBUILDER_IMAGE:-ghcr.io/coder/envbuilder:latest}"

if ! command -v docker >/dev/null 2>&1; then
echo "error: docker is required but not found on PATH" >&2
exit 1
fi

# The repo is mounted into the envbuilder workspace. envbuilder discovers
# `.devcontainer/devcontainer.json` automatically. After the build + lifecycle
# commands complete, it runs ENVBUILDER_INIT_SCRIPT as the target user; we point
# that at the verification script (resolved from the mounted workspace).
workspace_folder="/workspaces/devcontainer"
init_script="bash ${workspace_folder}/scripts/verify-devcontainer.sh"

docker_args=(
run --rm
-e "ENVBUILDER_WORKSPACE_FOLDER=${workspace_folder}"
-e "ENVBUILDER_INIT_SCRIPT=${init_script}"
-v "${repo_root}:${workspace_folder}"
)

# Optionally persist the layer cache between runs for faster iteration.
if [ -n "${CACHE_DIR:-}" ]; then
mkdir -p "${CACHE_DIR}"
docker_args+=(-v "${CACHE_DIR}:/cache" -e "ENVBUILDER_LAYER_CACHE_DIR=/cache")
fi

docker_args+=("${envbuilder_image}")

echo "=== Running envbuilder ($envbuilder_image) on $repo_root ==="
echo "docker ${docker_args[*]}"
exec docker "${docker_args[@]}"
88 changes: 88 additions & 0 deletions scripts/verify-devcontainer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/env bash
#
# Verify that the built dev container started up correctly.
#
# This runs *inside* the container after the build and lifecycle commands have
# completed. It is used both as the envbuilder `ENVBUILDER_INIT_SCRIPT` (the
# source of truth for how Coder builds this workspace) and can be run by hand
# inside any container started from this dev container.
#
# It asserts the things that have previously broken on Coder/envbuilder:
# 1. the effective user is `node`, not root,
# 2. tmux is on PATH and can start a session that reads ~/.tmux.conf,
# 3. the lifecycle ran to completion (postCreateCommand sentinel exists).
#
# Any failed check exits non-zero so CI fails loudly instead of silently
# dropping you into a broken shell.

set -euo pipefail

failures=0

fail() {
echo "FAIL: $*" >&2
failures=$((failures + 1))
}

pass() {
echo "PASS: $*"
}

echo "=== Verifying dev container startup ==="
echo "whoami: $(whoami)"
echo "id: $(id)"
echo "HOME: ${HOME:-<unset>}"

# 1. Effective user must be `node`, not root.
if [ "$(id -un)" = "node" ]; then
pass "running as expected user 'node'"
else
fail "expected to run as 'node' but running as '$(id -un)' (uid=$(id -u))"
fi

# 2. tmux must be on PATH and able to start a session that reads ~/.tmux.conf.
if command -v tmux >/dev/null 2>&1; then
pass "tmux is on PATH ($(command -v tmux))"

session="verify-$$"
tmux_conf="${HOME}/.tmux.conf"
tmux_err="$(mktemp)"

if [ -f "$tmux_conf" ]; then
pass "tmux config present at $tmux_conf"
else
fail "tmux config missing at $tmux_conf"
fi

# Start a detached session, explicitly loading the user's config so a broken
# config (or a missing HOME) fails the check rather than silently starting a
# default session.
if tmux -f "$tmux_conf" new-session -d -s "$session" 'sleep 5' 2>"$tmux_err"; then
if tmux has-session -t "$session" 2>/dev/null; then
pass "tmux started a session using $tmux_conf"
else
fail "tmux session '$session' did not stay alive"
fi
tmux kill-session -t "$session" 2>/dev/null || true
else
fail "tmux could not start a session: $(cat "$tmux_err" 2>/dev/null)"
fi
rm -f "$tmux_err"
else
fail "tmux is not on PATH"
fi

# 3. The lifecycle (postCreateCommand) must have completed.
sentinel="${HOME}/.devcontainer-postcreate-done"
if [ -f "$sentinel" ]; then
pass "lifecycle sentinel present ($sentinel)"
else
fail "lifecycle sentinel missing ($sentinel); postCreateCommand did not complete"
fi

echo "=== Verification complete ==="
if [ "$failures" -ne 0 ]; then
echo "$failures check(s) failed." >&2
exit 1
fi
echo "All checks passed."