From f77f102b70031c735e5cc7518e1ab247e09abc58 Mon Sep 17 00:00:00 2001 From: Luther Monson Date: Mon, 8 Jun 2026 19:35:00 -0700 Subject: [PATCH] feat(vm): plumb dind.allow_privileged from host config into the VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The host's `[dind] allow_privileged = true` setting was silently dropped on the way into the embedded Linux VM. The in-VM ephemerd reads its own (default) config inside /var/lib/ephemerd and falls back to the Linux default of false, rejecting `docker run --privileged` siblings even when the host operator explicitly opted in. ephpm-style workloads that need KIND (privileged containers) couldn't run. Plumb the host's `cfg.Dind.ResolvedAllowPrivileged()` through: 1. main.go → startContainerRuntime → LinuxVMConfig.DindAllowPrivileged 2. linuxvm_windows.go appends `ephemerd.dind_allow_privileged=1` to the kernel cmdline when set 3. The in-initrd init script parses the new param and adds `--dind-allow-privileged` to the in-VM `ephemerd-linux serve` call 4. A new `--dind-allow-privileged` CLI flag on `ephemerd serve` forces `cfg.Dind.AllowPrivileged = true`, overriding the in-VM config file Also fixes a latent bug in mage/download/download.go: Initrdx86's `outOfDate` input list didn't include download.go itself, so edits to the embedded init script body were silently skipped by `mage build:windows` (we burned ~30 minutes on this today). Adding the file as an input makes init-script edits invalidate the cached initrd correctly. Verified end-to-end on the live rig: kernel cmdline carries ephemerd.dind_allow_privileged=1, init banner shows dind_allow_privileged=1, serve invocation logs `(dind=1 allow_privileged=1)`, and the dind rejection warnings ("rejecting elevated container request") stop firing. Note: the Darwin Initrd() function has a similar cache pattern using fileExists rather than outOfDate — same class of bug, deferred to a follow-up. --- cmd/ephemerd/main.go | 18 +++++++++++++++--- cmd/ephemerd/runtime_darwin.go | 13 +++++++------ cmd/ephemerd/runtime_default.go | 2 +- cmd/ephemerd/runtime_windows.go | 15 ++++++++------- cmd/ephemerd/svc_windows.go | 2 +- mage/download/download.go | 24 +++++++++++++++++------- pkg/vm/linuxvm_windows.go | 3 +++ pkg/vm/vm.go | 7 +++++++ 8 files changed, 59 insertions(+), 25 deletions(-) diff --git a/cmd/ephemerd/main.go b/cmd/ephemerd/main.go index 3bddeaae..e13895b7 100644 --- a/cmd/ephemerd/main.go +++ b/cmd/ephemerd/main.go @@ -126,14 +126,18 @@ func serveCmd() *cli.Command { Name: "dind", Usage: "mount a fake Docker socket into each container (passed to WSL worker)", }, + &cli.BoolFlag{ + Name: "dind-allow-privileged", + Usage: "allow privileged sibling containers (overrides config). Set on the in-VM ephemerd from the host's dind.allow_privileged.", + }, }, Action: func(ctx context.Context, cmd *cli.Command) error { - return serve(ctx, cmd.String("config"), cmd.String("images-dir"), uint32(cmd.Uint("containerd-tcp-port")), cmd.String("containerd-tcp-addr"), cmd.Bool("containerd-only"), cmd.Bool("dind")) + return serve(ctx, cmd.String("config"), cmd.String("images-dir"), uint32(cmd.Uint("containerd-tcp-port")), cmd.String("containerd-tcp-addr"), cmd.Bool("containerd-only"), cmd.Bool("dind"), cmd.Bool("dind-allow-privileged")) }, } } -func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPPort uint32, containerdTCPAddr string, containerdOnly bool, dindFlag bool) error { +func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPPort uint32, containerdTCPAddr string, containerdOnly bool, dindFlag, dindAllowPrivilegedFlag bool) error { // Check if another instance is already running. if cc, err := dialControl(ctx); err == nil { if resp, err := cc.Status(ctx, &apiv1.StatusRequest{}); err == nil { @@ -165,6 +169,14 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP if dindFlag { cfg.Dind.Enabled = true } + // CLI --dind-allow-privileged flag overrides config file. Used by the + // in-VM ephemerd: the host plumbs its own dind.allow_privileged across + // the VM boundary via this flag because the in-VM daemon has its own + // (defaulted) config file. + if dindAllowPrivilegedFlag { + t := true + cfg.Dind.AllowPrivileged = &t + } // When running as a Windows Service, route log output to the Event Log. if w := getServiceLogWriter(); w != nil { @@ -197,7 +209,7 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP // Start container runtime. // On Linux/Windows: embedded containerd runs in-process. // On macOS: boot a Linux VM via Virtualization.framework, containerd runs inside it. - ctrdClient, waitDispatch, cleanup, err := startContainerRuntime(configDir, log, cfg.VM.Linux.Enabled, containerdTCPPort, containerdTCPAddr, cfg.Dind.Enabled, cfg.VM.Linux.CPUs, cfg.VM.Linux.MemoryMB, cfg.VM.Linux.DiskSizeGB) + ctrdClient, waitDispatch, cleanup, err := startContainerRuntime(configDir, log, cfg.VM.Linux.Enabled, containerdTCPPort, containerdTCPAddr, cfg.Dind.Enabled, cfg.Dind.ResolvedAllowPrivileged(), cfg.VM.Linux.CPUs, cfg.VM.Linux.MemoryMB, cfg.VM.Linux.DiskSizeGB) if err != nil { return fmt.Errorf("starting container runtime: %w", err) } diff --git a/cmd/ephemerd/runtime_darwin.go b/cmd/ephemerd/runtime_darwin.go index 9d72bcdc..13ee4cc2 100644 --- a/cmd/ephemerd/runtime_darwin.go +++ b/cmd/ephemerd/runtime_darwin.go @@ -15,15 +15,16 @@ import ( // a dispatch client to ephemerd-linux running inside the VM. Linux jobs run as // containers inside the VM, dispatched through the gRPC dispatch server so // they get full CNI networking (the raw containerd API skips CRI/CNI). -func startContainerRuntime(dataDir string, log *slog.Logger, _ bool, _ uint32, _ string, _ bool, linuxVMCPUs uint, linuxVMMemoryMB uint64, linuxVMDiskSizeGB uint64) (*client.Client, func() (*scheduler.DispatchClient, *client.Client), func(), error) { +func startContainerRuntime(dataDir string, log *slog.Logger, _ bool, _ uint32, _ string, _, dindAllowPrivileged bool, linuxVMCPUs uint, linuxVMMemoryMB uint64, linuxVMDiskSizeGB uint64) (*client.Client, func() (*scheduler.DispatchClient, *client.Client), func(), error) { log.Info("macOS detected — booting Linux VM for container runtime") linuxVM, err := vm.StartLinuxVM(vm.LinuxVMConfig{ - DataDir: dataDir, - CPUs: linuxVMCPUs, - MemoryMB: linuxVMMemoryMB, - DiskSizeGB: linuxVMDiskSizeGB, - Log: log, + DataDir: dataDir, + CPUs: linuxVMCPUs, + MemoryMB: linuxVMMemoryMB, + DiskSizeGB: linuxVMDiskSizeGB, + DindAllowPrivileged: dindAllowPrivileged, + Log: log, }) if err != nil { return nil, nil, nil, err diff --git a/cmd/ephemerd/runtime_default.go b/cmd/ephemerd/runtime_default.go index 52be3d5c..9e119290 100644 --- a/cmd/ephemerd/runtime_default.go +++ b/cmd/ephemerd/runtime_default.go @@ -11,7 +11,7 @@ import ( ) // startContainerRuntime starts an in-process containerd server on Linux. -func startContainerRuntime(dataDir string, log *slog.Logger, _ bool, tcpPort uint32, tcpAddr string, _ bool, _ uint, _ uint64, _ uint64) (*client.Client, func() (*scheduler.DispatchClient, *client.Client), func(), error) { +func startContainerRuntime(dataDir string, log *slog.Logger, _ bool, tcpPort uint32, tcpAddr string, _, _ bool, _ uint, _ uint64, _ uint64) (*client.Client, func() (*scheduler.DispatchClient, *client.Client), func(), error) { ctrd, err := containerd.New(containerd.Config{ DataDir: dataDir, TCPPort: tcpPort, diff --git a/cmd/ephemerd/runtime_windows.go b/cmd/ephemerd/runtime_windows.go index 8b0f0659..6002f231 100644 --- a/cmd/ephemerd/runtime_windows.go +++ b/cmd/ephemerd/runtime_windows.go @@ -18,7 +18,7 @@ import ( // Returns the native containerd client for Windows jobs and a function that // blocks until the Linux dispatch client is ready (nil if Linux VM is disabled // or failed to start). -func startContainerRuntime(dataDir string, log *slog.Logger, linuxVMEnabled bool, _ uint32, _ string, dindEnabled bool, linuxVMCPUs uint, linuxVMMemoryMB uint64, linuxVMDiskSizeGB uint64) (*client.Client, func() (*scheduler.DispatchClient, *client.Client), func(), error) { +func startContainerRuntime(dataDir string, log *slog.Logger, linuxVMEnabled bool, _ uint32, _ string, dindEnabled, dindAllowPrivileged bool, linuxVMCPUs uint, linuxVMMemoryMB uint64, linuxVMDiskSizeGB uint64) (*client.Client, func() (*scheduler.DispatchClient, *client.Client), func(), error) { // Start native containerd for Windows container jobs ctrd, err := containerd.New(containerd.Config{ DataDir: dataDir, @@ -45,12 +45,13 @@ func startContainerRuntime(dataDir string, log *slog.Logger, linuxVMEnabled bool log.Info("starting Linux VM in background (Hyper-V)") lvm, err := vm.StartLinuxVM(vm.LinuxVMConfig{ - DataDir: dataDir, - CPUs: linuxVMCPUs, - MemoryMB: linuxVMMemoryMB, - DiskSizeGB: linuxVMDiskSizeGB, - DindEnabled: dindEnabled, - Log: log, + DataDir: dataDir, + CPUs: linuxVMCPUs, + MemoryMB: linuxVMMemoryMB, + DiskSizeGB: linuxVMDiskSizeGB, + DindEnabled: dindEnabled, + DindAllowPrivileged: dindAllowPrivileged, + Log: log, }) if err != nil { log.Warn("Linux VM not started — Linux jobs will not be available on this host", "error", err) diff --git a/cmd/ephemerd/svc_windows.go b/cmd/ephemerd/svc_windows.go index 878afa40..0b068337 100644 --- a/cmd/ephemerd/svc_windows.go +++ b/cmd/ephemerd/svc_windows.go @@ -55,7 +55,7 @@ func (s *ephemerdService) Execute(_ []string, r <-chan svc.ChangeRequest, status errCh := make(chan error, 1) go func() { - errCh <- serve(ctx, s.configFile, "", s.ctrdTCPPort, s.ctrdTCPAddr, s.containerdOnly, s.dind) + errCh <- serve(ctx, s.configFile, "", s.ctrdTCPPort, s.ctrdTCPAddr, s.containerdOnly, s.dind, false) }() status <- svc.Status{State: svc.Running, Accepts: accepted} diff --git a/mage/download/download.go b/mage/download/download.go index baa24e88..a980e59b 100644 --- a/mage/download/download.go +++ b/mage/download/download.go @@ -696,13 +696,18 @@ func extractVmlinuxFromBzImage(bzImagePath, dest string) error { // exec's ephemerd-linux as PID 1. func Initrdx86() error { dest := filepath.Join(vmEmbedDir, "initrd") - // Initrd embeds the rootfs tarball and kernel modules from linux-virt. - // ephemerd-linux is appended to the boot initrd at runtime by - // pkg/vm.buildBootInitrd, so changes to Linux code do not require an - // initrd rebuild — only changes to busybox, the rootfs, or kernel - // modules invalidate this output. + // Initrd embeds the rootfs tarball, kernel modules from linux-virt, and + // the init script which is templated in-line below from this very file. + // ephemerd-linux itself is appended to the boot initrd at runtime by + // pkg/vm.buildBootInitrd, so changes to *Linux ephemerd source* do not + // require an initrd rebuild — but changes to busybox, the rootfs, + // kernel modules, OR the init script (i.e. download.go itself) + // invalidate this output. The download.go input is what catches edits + // to the init script body; without it `mage build:windows` happily + // embeds a stale init script and the kernel boots with old behavior. inputs := []string{ filepath.Join(vmEmbedDir, "ephemerd-rootfs-"+AlpineVersion+"-x86_64.tar.gz"), + filepath.Join("mage", "download", "download.go"), } if !outOfDate(dest, inputs...) { fmt.Printf(" %s already up to date, skipping\n", dest) @@ -1416,14 +1421,16 @@ sleep 1 CONTAINERD_PORT="10000" ROOT_DISK="" DIND="0" +DIND_ALLOW_PRIV="0" for param in $(cat /proc/cmdline); do case "$param" in ephemerd.containerd_port=*) CONTAINERD_PORT="${param#*=}" ;; ephemerd.root_disk=*) ROOT_DISK="${param#*=}" ;; ephemerd.dind=1) DIND="1" ;; + ephemerd.dind_allow_privileged=1) DIND_ALLOW_PRIV="1" ;; esac done -echo "ephemerd-init: containerd_port=$CONTAINERD_PORT root_disk=$ROOT_DISK" +echo "ephemerd-init: containerd_port=$CONTAINERD_PORT root_disk=$ROOT_DISK dind=$DIND dind_allow_privileged=$DIND_ALLOW_PRIV" # Network: eth0 via hv_netvsc (built-in), DHCP from Default Switch NET_IF="" @@ -1607,8 +1614,11 @@ export HOME=/root DIND_FLAG="" if [ "$DIND" = "1" ]; then DIND_FLAG="--dind" + if [ "$DIND_ALLOW_PRIV" = "1" ]; then + DIND_FLAG="$DIND_FLAG --dind-allow-privileged" + fi fi -echo "ephemerd-init: launching ephemerd-linux (dind=$DIND)" +echo "ephemerd-init: launching ephemerd-linux (dind=$DIND allow_privileged=$DIND_ALLOW_PRIV)" exec switch_root /newroot /usr/local/bin/ephemerd-linux serve \ --data-dir /var/lib/ephemerd \ --containerd-tcp-port "$CONTAINERD_PORT" \ diff --git a/pkg/vm/linuxvm_windows.go b/pkg/vm/linuxvm_windows.go index fd52c1d4..e2e985bc 100644 --- a/pkg/vm/linuxvm_windows.go +++ b/pkg/vm/linuxvm_windows.go @@ -520,6 +520,9 @@ func (l *hypervLinuxVM) createAndBootVM() error { dindFlag := "" if l.cfg.DindEnabled { dindFlag = " ephemerd.dind=1" + if l.cfg.DindAllowPrivileged { + dindFlag += " ephemerd.dind_allow_privileged=1" + } } cmdline := fmt.Sprintf( "rdinit=/init ephemerd.containerd_port=%d ephemerd.root_disk=/dev/sda%s "+ diff --git a/pkg/vm/vm.go b/pkg/vm/vm.go index dce1f19d..adfb93fa 100644 --- a/pkg/vm/vm.go +++ b/pkg/vm/vm.go @@ -46,6 +46,13 @@ type LinuxVMConfig struct { // Docker socket into each container. DindEnabled bool + // DindAllowPrivileged forwards the host's dind.allow_privileged setting + // to the in-VM ephemerd via the kernel cmdline. Without this, the in-VM + // daemon reads its own (minimal) config and Linux defaults to false, + // rejecting `docker run --privileged` siblings even when the host + // operator explicitly opted in. + DindAllowPrivileged bool + Log *slog.Logger }