From ec0767a07230cbdbca522aa472556e027c483bc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Chaves?= Date: Thu, 28 May 2026 10:31:08 +0200 Subject: [PATCH 1/9] fix(daemon): send self-compact nudge straight away, not queued for Stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The auto-compact guard queued the 500k/600k/700k self-compact nudges and relied on auto-send when the agent next emits a Stop. A freshly-resumed or idle session never emits a Stop, so those gentle nudges never fired and context coasted up to the 750k hard /compact — which then collided with whatever prompt landed next (e.g. the morning nudge), losing it. Paste the nudge straight into the session instead. Claude queues pasted input and runs it after the current turn, so a busy agent still finishes gracefully, while a resumed/idle session self-compacts immediately and is well below the limit by the time its next prompt arrives. --- cli/src/agents/daemon.ts | 19 ++++++++----------- cli/src/daemon.test.ts | 9 ++++++--- specs/system/headless-runtime.feature | 5 +++-- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/cli/src/agents/daemon.ts b/cli/src/agents/daemon.ts index 88c0b270..c5cb1f8f 100644 --- a/cli/src/agents/daemon.ts +++ b/cli/src/agents/daemon.ts @@ -1,5 +1,4 @@ import { existsSync, statSync, openSync, readSync, closeSync } from "node:fs"; -import { randomUUID } from "node:crypto"; import { hookEventsPath } from "../paths.js"; import { readLinks, readSessionContext, pasteTmuxPrompt } from "../data.js"; import { upsertCard, isoNow } from "../cards.js"; @@ -286,7 +285,14 @@ export class Daemon { if (rule.thresholdTokens <= (this.lastTriggered.get(sessionId) ?? 0)) continue; if (rule.action === "queuePrompt") { - this.enqueueOnce(card.id, rule.message); + // Send the self-compact nudge straight away rather than queueing it for + // the next Stop. A resumed/idle session never hits Stop, so a queued + // nudge would never fire and context would coast up to the hard + // /compact (which then collides with whatever prompt lands next, e.g. + // the morning nudge). Claude queues pasted input and runs it after the + // current turn, so a busy agent still finishes gracefully while an idle + // one self-compacts immediately. + this.paste(sessionName, rule.message); } else { this.paste(sessionName, "/compact"); this.announce(card.name ?? "", `🧹 context over ${Math.round(rule.thresholdTokens / 1000)}k - sending /compact`); @@ -317,13 +323,4 @@ export class Daemon { }; upsertCard(next); } - - private enqueueOnce(cardId: string, body: string): void { - const card = readLinks().find((c) => c.id === cardId); - if (!card) return; - const queue = card.queuedPrompts ?? []; - if (queue.some((p) => p.body.trim() === body.trim())) return; // already queued - const prompt: QueuedPrompt = { id: randomUUID(), body, sendAutomatically: true }; - upsertCard({ ...card, queuedPrompts: [...queue, prompt], updatedAt: isoNow() }); - } } diff --git a/cli/src/daemon.test.ts b/cli/src/daemon.test.ts index 159da4ca..68214e55 100644 --- a/cli/src/daemon.test.ts +++ b/cli/src/daemon.test.ts @@ -101,17 +101,20 @@ describe("daemon (sandboxed, injected paste)", () => { assert.equal(readLinks()[0].queuedPrompts?.length, 0, "stale warning dropped from queue"); }); - test("auto-compact queues the crossed warning once (no re-queue)", () => { + test("auto-compact sends the crossed warning straight away, once (no re-send)", () => { writeLinks([card()]); writeContextPct(55); // 550k -> crosses 500k queuePrompt rule const d = newDaemon(); const acted = d.evaluateAutoCompact(); assert.deepEqual(acted, [{ sessionId: SID, action: "queuePrompt", thresholdTokens: 500_000 }]); - assert.equal(readLinks()[0].queuedPrompts?.[0].body, DEFAULT_SELF_COMPACT_RULES[0].message); + // Pasted straight into the session, not parked in the queue (a resumed/idle + // session never hits Stop, so a queued nudge would never fire). + assert.deepEqual(pastes, [["daemon-agent", DEFAULT_SELF_COMPACT_RULES[0].message]]); + assert.equal(readLinks()[0].queuedPrompts?.length ?? 0, 0, "not queued"); const again = d.evaluateAutoCompact(); assert.equal(again.length, 0, "must not re-trigger the same threshold"); - assert.equal(readLinks()[0].queuedPrompts?.length, 1, "no duplicate queued warning"); + assert.equal(pastes.length, 1, "no duplicate nudge"); }); test("auto-compact sends /compact at the hard threshold", () => { diff --git a/specs/system/headless-runtime.feature b/specs/system/headless-runtime.feature index 18de5a79..cfe12670 100644 --- a/specs/system/headless-runtime.feature +++ b/specs/system/headless-runtime.feature @@ -28,10 +28,11 @@ Feature: Headless runtime engine (no macOS app) Scenario: Auto-compaction protects long-running sessions Given an agent has been running for a long time When its current context usage crosses 500k tokens - Then a prompt instructing it to self-compact is queued + Then a prompt instructing it to self-compact is sent to it straight away (not parked in the queue waiting for a Stop) + And so a resumed or idle session, which never emits a Stop, still gets the nudge and self-compacts before context grows further + And the same threshold is not nudged twice When usage crosses the hard threshold (750k) Then "/compact" is sent to the agent automatically - And a stale self-compact warning is dropped if context already dropped back below its threshold Scenario: The session runs forever across compactions Given the agent has compacted multiple times From 2983a1250c3dc6a8b5408c0799cae01599885163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Chaves?= Date: Thu, 28 May 2026 12:36:51 +0200 Subject: [PATCH 2/9] feat(headless): add a Codex runtime alongside Claude The headless engine was Claude-only. Add a runtime abstraction so an agent can declare runtime: codex and be driven by the Codex CLI: - runtime.ts: descriptor table (bin, arg building, canResume, selfCompact) mirroring the macOS CodingAssistant entity, scoped to the headless path - config: optional runtime field (defaults to claude), validated - identity/launch/reconcile: thread runtime through; build the codex command (--no-alt-screen + full-auto bypass flags), always launch fresh (Codex mints its own session id and the reviewer is per-PR), tag the card assistant=codex - launcher exports KANBAN_SESSION_ID so the shared hook.sh correlates events to the agent's card regardless of the id the runtime mints internally - hooks: install Codex hooks (~/.codex/hooks.json) pointing at the same hook.sh; self-compact is naturally skipped for codex (no statusline context json) - tests + headless-runtime spec scenario --- cli/src/agents/config.ts | 8 ++ cli/src/agents/identity.ts | 14 ++- cli/src/agents/launch.ts | 54 ++++++----- cli/src/agents/reconcile.ts | 8 +- cli/src/agents/runtime.ts | 82 ++++++++++++++++ cli/src/codex-runtime.test.ts | 131 ++++++++++++++++++++++++++ cli/src/hooks.ts | 59 +++++++++++- cli/src/launch.test.ts | 8 +- cli/src/reconcile.test.ts | 14 +-- specs/system/headless-runtime.feature | 9 ++ 10 files changed, 342 insertions(+), 45 deletions(-) create mode 100644 cli/src/agents/runtime.ts create mode 100644 cli/src/codex-runtime.test.ts diff --git a/cli/src/agents/config.ts b/cli/src/agents/config.ts index bc3a7ad1..0fd6ccc0 100644 --- a/cli/src/agents/config.ts +++ b/cli/src/agents/config.ts @@ -3,12 +3,15 @@ import { homedir } from "node:os"; import { join } from "node:path"; import { parse as parseYaml } from "yaml"; import { isValidSlug } from "./identity.js"; +import { Runtime, isRuntime } from "./runtime.js"; /// One long-lived agent, defined declaratively. Used by the reconciler (slug, /// repos, model), the scheduler (schedule, dailyPrompt) and the Slack bridge /// (slackChannel). Prompts live here so the whole agent is one config object. export interface AgentConfig { slug: string; + /// Which agent CLI drives this agent. Optional; defaults to "claude". + runtime?: Runtime; /// GitHub repos the agent works on, as "owner/name". repos: string[]; /// Model alias or full name (claude --model). Optional. @@ -49,6 +52,10 @@ export function parseAgentsConfig(text: string): AgentsFile { if (!isValidSlug(a.slug)) throw new Error(`agents[${i}].slug invalid: ${JSON.stringify(a.slug)}`); if (seen.has(a.slug)) throw new Error(`duplicate agent slug: ${a.slug}`); seen.add(a.slug); + const runtime = a.runtime ?? "claude"; + if (!isRuntime(runtime)) { + throw new Error(`agents[${i}] (${a.slug}) has invalid runtime ${JSON.stringify(a.runtime)} (expected "claude" or "codex")`); + } const repos = Array.isArray(a.repos) ? a.repos : []; for (const r of repos) { if (typeof r !== "string" || !REPO_RE.test(r)) { @@ -57,6 +64,7 @@ export function parseAgentsConfig(text: string): AgentsFile { } return { slug: a.slug, + runtime, repos, model: a.model, slackChannel: a.slackChannel, diff --git a/cli/src/agents/identity.ts b/cli/src/agents/identity.ts index 86206739..f51f80d7 100644 --- a/cli/src/agents/identity.ts +++ b/cli/src/agents/identity.ts @@ -1,12 +1,17 @@ import { uuidv5 } from "../uuid.js"; +import { Runtime } from "./runtime.js"; /// A stable, readable identity for a long-lived agent. Everything humans see or -/// type is the readable slug; only the Claude session id is a (deterministic) -/// UUID, because `claude --session-id` requires a valid UUID. +/// type is the readable slug; the session id is a deterministic UUID. For Claude +/// it is the --session-id / --resume key; for Codex (which mints its own id) it +/// is still the stable hook-events correlation key, passed to the hook via env. export interface AgentIdentity { /// Readable slug, e.g. "dependabot-scout". Source of truth for the identity. slug: string; - /// Deterministic UUIDv5 of the slug — the Claude --session-id / --resume key. + /// Which agent CLI drives this agent. + runtime: Runtime; + /// Deterministic UUIDv5 of the slug. Claude --session-id/--resume key, and the + /// hook-events correlation key for both runtimes. sessionId: string; /// tmux session name (== slug). tmuxName: string; @@ -22,7 +27,7 @@ export function isValidSlug(slug: string): boolean { return SLUG_RE.test(slug) && slug.length <= 60; } -export function agentIdentity(slug: string): AgentIdentity { +export function agentIdentity(slug: string, runtime: Runtime = "claude"): AgentIdentity { if (!isValidSlug(slug)) { throw new Error( `Invalid agent slug "${slug}" (use lowercase letters, digits and hyphens; max 60 chars)` @@ -30,6 +35,7 @@ export function agentIdentity(slug: string): AgentIdentity { } return { slug, + runtime, sessionId: uuidv5(slug), tmuxName: slug, cardName: slug, diff --git a/cli/src/agents/launch.ts b/cli/src/agents/launch.ts index 2a0ebb13..9eaba93f 100644 --- a/cli/src/agents/launch.ts +++ b/cli/src/agents/launch.ts @@ -8,20 +8,21 @@ import { import { upsertCard, isoNow } from "../cards.js"; import { generateKsuid } from "../ksuid.js"; import { Link, ManualOverrides } from "../types.js"; +import { runtimeSpec } from "./runtime.js"; export interface LaunchOptions { /// Working directory for the session (the agent's workspace / worktree root). cwd: string; - /// Extra args appended to the claude invocation. + /// Extra args appended to the agent invocation. extraArgs?: string[]; /// Environment variables exported into the tmux session. env?: Record; - /// Model alias or full name (claude --model). + /// Model alias or full name. model?: string; /// Autonomous agents skip permission prompts by default. skipPermissions?: boolean; - /// Override the claude binary (tests). - claudeBin?: string; + /// Override the agent binary (tests). + bin?: string; } export type LaunchAction = "noop-running" | "launched" | "resumed"; @@ -44,20 +45,23 @@ const DEFAULT_OVERRIDES: ManualOverrides = { issueLink: false, }; -/// Idempotently ensure an agent's Claude session is running in tmux and its -/// kanban card reflects reality. Decides launch vs resume vs no-op: -/// - tmux session already alive -> no-op (never restart a live agent) -/// - a transcript exists for the session -> resume (--resume ) -/// - neither -> fresh launch (--session-id ) +/// Idempotently ensure an agent's session is running in tmux and its kanban card +/// reflects reality. Decides launch vs resume vs no-op: +/// - tmux session already alive -> no-op (never restart a live agent) +/// - runtime can resume + transcript exists -> resume +/// - otherwise -> fresh launch +/// Codex mints its own session id and the reviewer is per-PR, so it always +/// launches fresh (canResume=false); tmux keeps it alive between prompts. export function ensureAgentSession( identity: AgentIdentity, opts: LaunchOptions ): LaunchResult { - const claudeBin = opts.claudeBin ?? "claude"; + const spec = runtimeSpec(identity.runtime); + const bin = opts.bin ?? spec.bin; const skipPerms = opts.skipPermissions ?? true; const tmuxAlive = hasTmuxSession(identity.tmuxName); - const sessionExists = !!findSessionJsonl(identity.sessionId); + const sessionExists = spec.canResume && !!findSessionJsonl(identity.sessionId); let action: LaunchAction; let command: string | undefined; @@ -65,20 +69,22 @@ export function ensureAgentSession( if (tmuxAlive) { action = "noop-running"; } else { - const args: string[] = []; - if (sessionExists) { - action = "resumed"; - args.push("--resume", identity.sessionId); - } else { - action = "launched"; - args.push("--session-id", identity.sessionId, "--name", identity.slug); - } - if (skipPerms) args.push("--dangerously-skip-permissions"); - if (opts.model) args.push("--model", opts.model); + const args = spec.buildArgs({ + sessionId: identity.sessionId, + slug: identity.slug, + resume: sessionExists, + skipPermissions: skipPerms, + model: opts.model, + }); + action = sessionExists ? "resumed" : "launched"; if (opts.extraArgs?.length) args.push(...opts.extraArgs); - command = [claudeBin, ...args].join(" "); + command = [bin, ...args].join(" "); - const res = createTmuxSession(identity.tmuxName, opts.cwd, command, opts.env ?? {}); + // Both runtimes' hooks correlate events to this agent via this env var, so + // the daemon/bridge key on our stable session id regardless of the id the + // runtime mints internally. + const env = { ...(opts.env ?? {}), KANBAN_SESSION_ID: identity.sessionId, KANBAN_SLUG: identity.slug }; + const res = createTmuxSession(identity.tmuxName, opts.cwd, command, env); if (!res.ok) { throw new Error(`Failed to create tmux session "${identity.tmuxName}": ${res.error}`); } @@ -124,7 +130,7 @@ function upsertAgentCard(identity: AgentIdentity, cwd: string): Link { sessionLink: { sessionId: identity.sessionId, sessionPath }, tmuxLink: { sessionName: identity.tmuxName }, worktreeLink: { path: cwd }, - assistant: "claude", + assistant: identity.runtime, isRemote: false, }; upsertCard(card); diff --git a/cli/src/agents/reconcile.ts b/cli/src/agents/reconcile.ts index b2d14a7d..35de39c5 100644 --- a/cli/src/agents/reconcile.ts +++ b/cli/src/agents/reconcile.ts @@ -8,8 +8,8 @@ import { readLinks, killTmuxSession } from "../data.js"; import { upsertCard, isoNow } from "../cards.js"; export interface ReconcileOptions { - /// Override the claude binary (tests). - claudeBin?: string; + /// Override the agent binary (tests). + bin?: string; /// Tear down agent-managed sessions/cards/worktrees no longer in config. prune?: boolean; } @@ -61,10 +61,10 @@ export function reconcileAgent( repos.push({ name, worktreeCreated: created, worktree }); } - const launch = ensureAgentSession(agentIdentity(agent.slug), { + const launch = ensureAgentSession(agentIdentity(agent.slug, agent.runtime), { cwd: workspace, model: agent.model, - claudeBin: opts.claudeBin, + bin: opts.bin, }); return { slug: agent.slug, workspace, repos, launch }; diff --git a/cli/src/agents/runtime.ts b/cli/src/agents/runtime.ts new file mode 100644 index 00000000..2f732554 --- /dev/null +++ b/cli/src/agents/runtime.ts @@ -0,0 +1,82 @@ +/// Runtime abstraction for the headless agent engine. The engine was built for +/// Claude Code; this table lets a single agent be driven by a different CLI +/// (currently Codex) without branching all over launch/daemon/bridge. It mirrors +/// the macOS app's CodingAssistant entity, scoped to what the headless path needs. + +export type Runtime = "claude" | "codex"; + +export const RUNTIMES: readonly Runtime[] = ["claude", "codex"] as const; + +export function isRuntime(v: unknown): v is Runtime { + return v === "claude" || v === "codex"; +} + +export interface BuildArgsInput { + /// The stable session id (uuidv5 of the slug). Used as Claude's --session-id + /// and, for both runtimes, as the hook-events correlation key via env. + sessionId: string; + /// Readable agent slug. + slug: string; + /// Resume an existing session rather than launching fresh. + resume: boolean; + /// Skip permission/approval prompts (autonomous agents). + skipPermissions: boolean; + /// Model alias/name, if pinned. + model?: string; +} + +export interface RuntimeSpec { + /// The CLI binary. + bin: string; + /// Build the argv after the binary. + buildArgs(input: BuildArgsInput): string[]; + /// Whether this runtime can resume a prior session by our session id. Claude + /// can (--resume ); Codex generates its own id and the headless reviewer + /// is per-PR, so we always launch it fresh and rely on tmux to keep it alive. + canResume: boolean; + /// Whether the daemon's context-threshold self-compaction applies. Codex + /// auto-compacts on its own and exposes no context introspection, so off. + selfCompact: boolean; + /// Config dir under $HOME (for hooks/skills install). + configDirName: string; +} + +const claude: RuntimeSpec = { + bin: "claude", + canResume: true, + selfCompact: true, + configDirName: ".claude", + buildArgs({ sessionId, slug, resume, skipPermissions, model }) { + const args: string[] = []; + if (resume) args.push("--resume", sessionId); + else args.push("--session-id", sessionId, "--name", slug); + if (skipPermissions) args.push("--dangerously-skip-permissions"); + if (model) args.push("--model", model); + return args; + }, +}; + +const codex: RuntimeSpec = { + bin: "codex", + canResume: false, + selfCompact: false, + configDirName: ".codex", + buildArgs({ skipPermissions, model }) { + // --no-alt-screen keeps Codex inline so tmux send-keys paste works (no TUI + // alt-screen). The bypass flags are Codex's equivalent of Claude's + // --dangerously-skip-permissions; --dangerously-bypass-hook-trust skips the + // interactive hook-trust gate so our hooks run unattended. + const args = ["--no-alt-screen"]; + if (skipPermissions) { + args.push("--dangerously-bypass-approvals-and-sandbox", "--dangerously-bypass-hook-trust"); + } + if (model) args.push("-m", model); + return args; + }, +}; + +const SPECS: Record = { claude, codex }; + +export function runtimeSpec(runtime: Runtime): RuntimeSpec { + return SPECS[runtime]; +} diff --git a/cli/src/codex-runtime.test.ts b/cli/src/codex-runtime.test.ts new file mode 100644 index 00000000..d016ff70 --- /dev/null +++ b/cli/src/codex-runtime.test.ts @@ -0,0 +1,131 @@ +/** + * Unit tests for the codex runtime: the runtime descriptor (arg building), + * config parsing of the `runtime` field, codex hook installation, and that a + * codex agent launches fresh (never tries Claude --resume) and tags its card. + */ +import { test, describe, beforeEach, afterEach } from "node:test"; +import { strict as assert } from "node:assert"; +import { execSync } from "node:child_process"; +import { mkdtempSync, rmSync, mkdirSync, readFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { runtimeSpec, isRuntime } from "./agents/runtime.js"; +import { parseAgentsConfig } from "./agents/config.js"; +import { agentIdentity } from "./agents/identity.js"; +import { ensureAgentSession } from "./agents/launch.js"; +import { installCodexHooks } from "./hooks.js"; +import { readLinks } from "./data.js"; + +describe("runtime descriptor", () => { + test("claude builds --session-id / --resume args", () => { + const c = runtimeSpec("claude"); + assert.equal(c.bin, "claude"); + assert.equal(c.canResume, true); + assert.equal(c.selfCompact, true); + assert.deepEqual( + c.buildArgs({ sessionId: "sid", slug: "agent", resume: false, skipPermissions: true, model: "opus" }), + ["--session-id", "sid", "--name", "agent", "--dangerously-skip-permissions", "--model", "opus"] + ); + assert.deepEqual( + c.buildArgs({ sessionId: "sid", slug: "agent", resume: true, skipPermissions: true }), + ["--resume", "sid", "--dangerously-skip-permissions"] + ); + }); + + test("codex builds inline + full-auto bypass args and never uses session-id", () => { + const x = runtimeSpec("codex"); + assert.equal(x.bin, "codex"); + assert.equal(x.canResume, false); + assert.equal(x.selfCompact, false); + const args = x.buildArgs({ sessionId: "sid", slug: "agent", resume: false, skipPermissions: true, model: "gpt-5.5" }); + assert.deepEqual(args, [ + "--no-alt-screen", + "--dangerously-bypass-approvals-and-sandbox", + "--dangerously-bypass-hook-trust", + "-m", + "gpt-5.5", + ]); + assert.ok(!args.includes("--session-id")); + assert.ok(!args.includes("--resume")); + }); + + test("isRuntime guards the union", () => { + assert.ok(isRuntime("claude")); + assert.ok(isRuntime("codex")); + assert.ok(!isRuntime("gemini")); + assert.ok(!isRuntime(undefined)); + }); +}); + +describe("agents config runtime field", () => { + test("defaults to claude and accepts codex", () => { + const f = parseAgentsConfig(`agents:\n - slug: a\n repos: ["acme/x"]\n - slug: b\n runtime: codex\n repos: ["acme/y"]\n`); + assert.equal(f.agents[0].runtime, "claude"); + assert.equal(f.agents[1].runtime, "codex"); + }); + + test("rejects an unknown runtime", () => { + assert.throws( + () => parseAgentsConfig(`agents:\n - slug: a\n runtime: gemini\n repos: []\n`), + /invalid runtime/ + ); + }); +}); + +describe("installCodexHooks", () => { + let codexHome: string; + beforeEach(() => { + codexHome = mkdtempSync(join(tmpdir(), "kanban-codex-")); + }); + afterEach(() => rmSync(codexHome, { recursive: true, force: true })); + + test("writes hooks.json pointing at the shared hook.sh, idempotently", () => { + const hooksPath = join(codexHome, "hooks.json"); + const hookScriptPath = join(codexHome, "hook.sh"); + const r = installCodexHooks({ hooksPath, hookScriptPath }); + assert.deepEqual(r.events, ["SessionStart", "UserPromptSubmit", "Stop"]); + const json = JSON.parse(readFileSync(hooksPath, "utf-8")); + for (const ev of r.events) { + assert.equal(json[ev][0].hooks[0].command, hookScriptPath); + } + // Re-install does not duplicate the entry. + installCodexHooks({ hooksPath, hookScriptPath }); + const json2 = JSON.parse(readFileSync(hooksPath, "utf-8")); + assert.equal(json2.Stop[0].hooks.length, 1); + }); +}); + +function hasTmux(): boolean { + try { execSync("tmux -V", { stdio: "ignore" }); return true; } catch { return false; } +} + +describe("codex agent launch (real tmux)", { skip: !hasTmux() }, () => { + let home: string; + let workspace: string; + const slug = `kanban-codex-test-${Date.now()}`; + const identity = agentIdentity(slug, "codex"); + + beforeEach(() => { + home = mkdtempSync(join(tmpdir(), "kanban-codex-home-")); + workspace = mkdtempSync(join(tmpdir(), "kanban-codex-ws-")); + process.env.KANBAN_CODE_HOME = home; + }); + afterEach(() => { + try { execSync(`tmux kill-session -t ${identity.tmuxName}`, { stdio: "ignore" }); } catch {} + delete process.env.KANBAN_CODE_HOME; + rmSync(home, { recursive: true, force: true }); + rmSync(workspace, { recursive: true, force: true }); + }); + + test("launches codex fresh (no resume) and tags the card assistant=codex", () => { + const result = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); + assert.equal(result.action, "launched"); + assert.match(result.command!, /true --no-alt-screen --dangerously-bypass-approvals-and-sandbox/); + const card = readLinks().find((l) => l.name === slug); + assert.equal(card?.assistant, "codex"); + // A second reconcile is a no-op while the session is alive. + const again = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); + assert.equal(again.action, "noop-running"); + }); +}); diff --git a/cli/src/hooks.ts b/cli/src/hooks.ts index 1d027be7..5a536422 100644 --- a/cli/src/hooks.ts +++ b/cli/src/hooks.ts @@ -1,4 +1,5 @@ import { readFileSync, writeFileSync, mkdirSync, existsSync, chmodSync } from "node:fs"; +import { homedir } from "node:os"; import { dirname, join } from "node:path"; import { kanbanHome, claudeSettingsPath } from "./paths.js"; import { sortedStringify } from "./cards.js"; @@ -8,6 +9,19 @@ import { sortedStringify } from "./cards.js"; /// activity tracking. Mirrors the Swift HookManager. export const HOOK_EVENTS = ["Stop", "Notification", "SessionStart", "SessionEnd", "UserPromptSubmit"]; +/// Codex hook events we register (Codex exposes the same names; we only need the +/// subset that drives the daemon: Stop for auto-send, UserPromptSubmit for the +/// Slack receipt mirror, SessionStart/Stop for activity). +export const CODEX_HOOK_EVENTS = ["SessionStart", "UserPromptSubmit", "Stop"]; + +function codexConfigDir(): string { + return process.env.CODEX_HOME ?? join(homedir(), ".codex"); +} + +function codexHooksPath(): string { + return join(codexConfigDir(), "hooks.json"); +} + function defaultHookScriptPath(): string { return join(kanbanHome(), "hook.sh"); } @@ -16,8 +30,8 @@ function defaultHookScriptPath(): string { /// dependency (lightweight grep parsing). Honors KANBAN_CODE_HOME so the same /// script works in tests and in alternate deployments. const HOOK_SCRIPT = `#!/usr/bin/env bash -# Kanban hook handler. Receives JSON on stdin from Claude Code hooks and appends -# a timestamped event line to /hook-events.jsonl. +# Kanban hook handler. Receives JSON on stdin from Claude Code or Codex hooks and +# appends a timestamped event line to /hook-events.jsonl. set -euo pipefail EVENTS_DIR="\${KANBAN_CODE_HOME:-\$HOME/.kanban-code}" @@ -33,6 +47,13 @@ transcript=$(echo "$input" | grep -o '"transcript_path":"[^"]*"' | head -1 | cut if [ -z "$session_id" ]; then session_id=$(echo "$input" | grep -o '"sessionId":"[^"]*"' | head -1 | cut -d'"' -f4) fi +# The launcher exports KANBAN_SESSION_ID (the stable uuidv5 of the slug) into the +# session, so events correlate to the agent's card regardless of the id the +# runtime mints internally. This is what lets Codex (which generates its own +# session id) share the daemon/bridge correlation path with Claude. +if [ -n "\${KANBAN_SESSION_ID:-}" ]; then + session_id="\$KANBAN_SESSION_ID" +fi [ -z "$session_id" ] && exit 0 # For UserPromptSubmit, capture the whole payload (base64, so the prompt's @@ -145,9 +166,43 @@ export function installHooks(opts: InstallHooksOptions = {}): InstallHooksResult mkdirSync(dirname(settingsPath), { recursive: true }); writeFileSync(settingsPath, sortedStringify(root)); + // Install the Codex equivalent too (harmless if no Codex agent runs): Codex + // mirrors Claude's hook structure but reads ~/.codex/hooks.json and runs the + // same hook.sh, so its events land in the same hook-events.jsonl. + installCodexHooks({ hookScriptPath }); + return { settingsPath, hookScriptPath, statuslinePath, events: HOOK_EVENTS }; } +export interface InstallCodexHooksOptions { + hooksPath?: string; + hookScriptPath?: string; +} + +/// Install Codex hooks (~/.codex/hooks.json) pointing at the shared hook.sh. +/// Idempotent and additive: preserves any other registered hooks for each event. +export function installCodexHooks(opts: InstallCodexHooksOptions = {}): { hooksPath: string; events: string[] } { + const hooksPath = opts.hooksPath ?? codexHooksPath(); + const hookScriptPath = opts.hookScriptPath ?? defaultHookScriptPath(); + deployScript(hookScriptPath, HOOK_SCRIPT); + + const root = readJson(hooksPath); + const hookEntry = { type: "command", command: hookScriptPath }; + for (const event of CODEX_HOOK_EVENTS) { + const groups: any[] = Array.isArray(root[event]) ? root[event] : []; + const present = groups.some((g) => (g?.hooks ?? []).some((h: any) => h?.command === hookScriptPath)); + if (!present) { + if (groups.length === 0) groups.push({ hooks: [hookEntry] }); + else groups[0].hooks = [...(groups[0].hooks ?? []), hookEntry]; + } + root[event] = groups; + } + + mkdirSync(dirname(hooksPath), { recursive: true }); + writeFileSync(hooksPath, sortedStringify(root)); + return { hooksPath, events: CODEX_HOOK_EVENTS }; +} + /// True if our hook script is registered for every required event. export function areHooksInstalled(settingsPath?: string, hookScriptPath?: string): boolean { const root = readJson(settingsPath ?? claudeSettingsPath()); diff --git a/cli/src/launch.test.ts b/cli/src/launch.test.ts index b0e77d6d..6f3a4c27 100644 --- a/cli/src/launch.test.ts +++ b/cli/src/launch.test.ts @@ -53,7 +53,7 @@ describe("headless agent launch/resume (real tmux)", skipIfNoTmux, () => { }); test("fresh launch creates a tmux session and a stable card", () => { - const result = ensureAgentSession(identity, { cwd: workspace, claudeBin: "true" }); + const result = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); assert.equal(result.action, "launched"); assert.match(result.command!, /true --session-id .* --name /); @@ -72,10 +72,10 @@ describe("headless agent launch/resume (real tmux)", skipIfNoTmux, () => { }); test("re-running while alive is a no-op: no restart, no duplicate card", () => { - const first = ensureAgentSession(identity, { cwd: workspace, claudeBin: "true" }); + const first = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); const firstCardId = first.card.id; - const second = ensureAgentSession(identity, { cwd: workspace, claudeBin: "true" }); + const second = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); assert.equal(second.action, "noop-running"); assert.equal(second.command, undefined, "must not build a launch command when already running"); @@ -90,7 +90,7 @@ describe("headless agent launch/resume (real tmux)", skipIfNoTmux, () => { mkdirSync(projDir, { recursive: true }); writeFileSync(join(projDir, `${identity.sessionId}.jsonl`), '{"type":"user"}\n'); - const result = ensureAgentSession(identity, { cwd: workspace, claudeBin: "true" }); + const result = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); assert.equal(result.action, "resumed"); assert.match(result.command!, new RegExp(`true --resume ${identity.sessionId}`)); execSync(`tmux has-session -t ${identity.tmuxName}`); diff --git a/cli/src/reconcile.test.ts b/cli/src/reconcile.test.ts index 6fd8bf94..93d2b132 100644 --- a/cli/src/reconcile.test.ts +++ b/cli/src/reconcile.test.ts @@ -87,7 +87,7 @@ describe("reconciler (real git + tmux)", skipIfNoTmux, () => { test("creates a per-agent worktree on agent/ and launches the session", () => { provisionRepo(root, reposDir, "myrepo"); const slug = trackSlug(`recon-a-${Date.now()}`); - const result = reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { claudeBin: "true" }); + const result = reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { bin: "true" }); const agent = result.agents[0]; assert.equal(agent.launch.action, "launched"); @@ -107,7 +107,7 @@ describe("reconciler (real git + tmux)", skipIfNoTmux, () => { test("the agent worktree structurally cannot check out main", () => { provisionRepo(root, reposDir, "myrepo"); const slug = trackSlug(`recon-main-${Date.now()}`); - reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { claudeBin: "true" }); + reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { bin: "true" }); const worktree = join(workspacesDir, slug, "myrepo"); // main is checked out in the canonical clone, so git refuses it here. assert.throws(() => g(["-C", worktree, "checkout", "main"]), /already (used|checked out)/i); @@ -118,8 +118,8 @@ describe("reconciler (real git + tmux)", skipIfNoTmux, () => { const slug = trackSlug(`recon-idem-${Date.now()}`); const file = makeFile([{ slug, repos: ["acme/myrepo"] }]); - const first = reconcileAll(file, { claudeBin: "true" }); - const second = reconcileAll(file, { claudeBin: "true" }); + const first = reconcileAll(file, { bin: "true" }); + const second = reconcileAll(file, { bin: "true" }); assert.equal(second.agents[0].launch.action, "noop-running"); assert.equal(second.agents[0].repos[0].worktreeCreated, false); @@ -130,7 +130,7 @@ describe("reconciler (real git + tmux)", skipIfNoTmux, () => { test("a missing canonical clone is a loud error (IaC owns provisioning)", () => { const slug = trackSlug(`recon-missing-${Date.now()}`); assert.throws( - () => reconcileAgent({ slug, repos: ["acme/nope"] }, makeFile([]), { claudeBin: "true" }), + () => reconcileAgent({ slug, repos: ["acme/nope"] }, makeFile([]), { bin: "true" }), /Canonical clone .* is missing/ ); }); @@ -138,10 +138,10 @@ describe("reconciler (real git + tmux)", skipIfNoTmux, () => { test("prune tears down an agent removed from config", () => { provisionRepo(root, reposDir, "myrepo"); const slug = trackSlug(`recon-prune-${Date.now()}`); - reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { claudeBin: "true" }); + reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { bin: "true" }); execSync(`tmux has-session -t ${slug}`); - const pruneResult = reconcileAll(makeFile([]), { claudeBin: "true", prune: true }); + const pruneResult = reconcileAll(makeFile([]), { bin: "true", prune: true }); assert.deepEqual(pruneResult.pruned, [slug]); assert.throws(() => execSync(`tmux has-session -t ${slug} 2>/dev/null`), "tmux session should be gone"); diff --git a/specs/system/headless-runtime.feature b/specs/system/headless-runtime.feature index cfe12670..29cc0a46 100644 --- a/specs/system/headless-runtime.feature +++ b/specs/system/headless-runtime.feature @@ -44,3 +44,12 @@ Feature: Headless runtime engine (no macOS app) When the box finishes booting Then reconcile-on-boot resumes every configured agent with "--resume " And the daemon is restarted by the service manager + + Scenario: An agent can be driven by the Codex runtime instead of Claude + Given an agent declares "runtime: codex" in the agents config + When the reconciler launches it + Then it runs "codex --no-alt-screen --dangerously-bypass-approvals-and-sandbox --dangerously-bypass-hook-trust" in tmux (no Claude --session-id/--resume) + And the launcher exports KANBAN_SESSION_ID so the shared hook.sh correlates Codex events to the agent's card + And Codex hooks (~/.codex/hooks.json) feed the same hook-events.jsonl, so Slack steering and auto-send still work + And the daemon's context-threshold self-compaction is skipped for it (Codex auto-compacts and exposes no context usage) + And sending and Slack-inbound steering work unchanged because they paste into the tmux session by slug From 966521027452010c10141f5d8d74e3a0bb2fa7ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Chaves?= Date: Thu, 28 May 2026 13:30:24 +0200 Subject: [PATCH 3/9] fix(hooks): wrap codex hooks.json events under top-level "hooks" key Codex's hooks.json nests events under a "hooks" key (mirroring the config.toml [hooks] table); without the wrapper Codex silently ignores the file, so no codex hook events fired. Caught dogfooding on the box (empty Slack channel). --- cli/src/codex-runtime.test.ts | 6 ++++-- cli/src/hooks.ts | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cli/src/codex-runtime.test.ts b/cli/src/codex-runtime.test.ts index d016ff70..103d078e 100644 --- a/cli/src/codex-runtime.test.ts +++ b/cli/src/codex-runtime.test.ts @@ -86,13 +86,15 @@ describe("installCodexHooks", () => { const r = installCodexHooks({ hooksPath, hookScriptPath }); assert.deepEqual(r.events, ["SessionStart", "UserPromptSubmit", "Stop"]); const json = JSON.parse(readFileSync(hooksPath, "utf-8")); + // Codex requires the top-level "hooks" wrapper. + assert.ok(json.hooks, "events must be nested under a top-level hooks key"); for (const ev of r.events) { - assert.equal(json[ev][0].hooks[0].command, hookScriptPath); + assert.equal(json.hooks[ev][0].hooks[0].command, hookScriptPath); } // Re-install does not duplicate the entry. installCodexHooks({ hooksPath, hookScriptPath }); const json2 = JSON.parse(readFileSync(hooksPath, "utf-8")); - assert.equal(json2.Stop[0].hooks.length, 1); + assert.equal(json2.hooks.Stop[0].hooks.length, 1); }); }); diff --git a/cli/src/hooks.ts b/cli/src/hooks.ts index 5a536422..81c9ef1a 100644 --- a/cli/src/hooks.ts +++ b/cli/src/hooks.ts @@ -186,17 +186,21 @@ export function installCodexHooks(opts: InstallCodexHooksOptions = {}): { hooksP const hookScriptPath = opts.hookScriptPath ?? defaultHookScriptPath(); deployScript(hookScriptPath, HOOK_SCRIPT); + // Codex's hooks.json nests events under a top-level "hooks" key (mirrors the + // config.toml [hooks] table); without the wrapper Codex ignores the file. const root = readJson(hooksPath); + const hooks = (root.hooks ?? {}) as Record; const hookEntry = { type: "command", command: hookScriptPath }; for (const event of CODEX_HOOK_EVENTS) { - const groups: any[] = Array.isArray(root[event]) ? root[event] : []; + const groups: any[] = Array.isArray(hooks[event]) ? hooks[event] : []; const present = groups.some((g) => (g?.hooks ?? []).some((h: any) => h?.command === hookScriptPath)); if (!present) { if (groups.length === 0) groups.push({ hooks: [hookEntry] }); else groups[0].hooks = [...(groups[0].hooks ?? []), hookEntry]; } - root[event] = groups; + hooks[event] = groups; } + root.hooks = hooks; mkdirSync(dirname(hooksPath), { recursive: true }); writeFileSync(hooksPath, sortedStringify(root)); From c330f3587dff393d2f4204e99155f25d6d611c09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Chaves?= Date: Thu, 28 May 2026 13:49:37 +0200 Subject: [PATCH 4/9] feat(bridge): mirror Codex agent movement to Slack via rollout transcript Codex agents now stream to Slack like Claude agents: the bridge discovers the agent's Codex rollout by workspace cwd (findCodexRollout) and posts its agent_message + exec_command events (formatCodexRolloutLines). Inbound steering already worked via tmux paste. Also stop auto-installing Codex hooks: codex 0.134.0 gates command hooks behind an interactive trust prompt that --dangerously-bypass-hook-trust does not suppress in the inline TUI, which hung the headless session on a modal. The rollout-transcript mirror replaces the hook-based announce for Codex. --- cli/src/codex-runtime.test.ts | 20 ++++++++++++++++ cli/src/data.ts | 43 +++++++++++++++++++++++++++++++++++ cli/src/hooks.ts | 11 +++++---- cli/src/slack/bridge.ts | 22 ++++++++++++------ cli/src/slack/format.ts | 20 ++++++++++++++++ 5 files changed, 105 insertions(+), 11 deletions(-) diff --git a/cli/src/codex-runtime.test.ts b/cli/src/codex-runtime.test.ts index 103d078e..96bd3312 100644 --- a/cli/src/codex-runtime.test.ts +++ b/cli/src/codex-runtime.test.ts @@ -11,6 +11,7 @@ import { tmpdir } from "node:os"; import { join } from "node:path"; import { runtimeSpec, isRuntime } from "./agents/runtime.js"; +import { formatCodexRolloutLines } from "./slack/format.js"; import { parseAgentsConfig } from "./agents/config.js"; import { agentIdentity } from "./agents/identity.js"; import { ensureAgentSession } from "./agents/launch.js"; @@ -58,6 +59,25 @@ describe("runtime descriptor", () => { }); }); +describe("formatCodexRolloutLines", () => { + test("mirrors agent messages and exec commands, skips reasoning/system noise", () => { + const objs = [ + { type: "session_meta", payload: { cwd: "/x" } }, + { type: "event_msg", payload: { type: "task_started" } }, + { type: "event_msg", payload: { type: "agent_message", message: "I'll review the PR now." } }, + { type: "response_item", payload: { type: "reasoning", encrypted_content: "..." } }, + { type: "event_msg", payload: { type: "exec_command_begin", command: ["gh", "pr", "view", "519"] } }, + { type: "event_msg", payload: { type: "agent_message", message: "No blockers; 2 nits." } }, + ]; + const posts = formatCodexRolloutLines(objs); + assert.equal(posts.length, 3); + assert.equal(posts[0].text, "I'll review the PR now."); + assert.match(posts[1].text, /gh pr view 519/); + assert.equal(posts[2].text, "No blockers; 2 nits."); + assert.ok(posts.every((p) => p.role === "assistant")); + }); +}); + describe("agents config runtime field", () => { test("defaults to claude and accepts codex", () => { const f = parseAgentsConfig(`agents:\n - slug: a\n repos: ["acme/x"]\n - slug: b\n runtime: codex\n repos: ["acme/y"]\n`); diff --git a/cli/src/data.ts b/cli/src/data.ts index 2e441c5f..57cfec79 100644 --- a/cli/src/data.ts +++ b/cli/src/data.ts @@ -361,6 +361,49 @@ export function killTmuxSession(name: string): { ok: boolean; error?: string } { } } +/// Locate the newest Codex rollout (.jsonl) for a session by working directory. +/// Codex mints its own session id, so we can't address the file by our session +/// id like Claude; instead each rollout's first line (session_meta) carries the +/// cwd, and a per-agent workspace is unique, so we match on that and take the +/// most recently modified. Only the first line is read, so this stays cheap even +/// as rollouts grow. +export function findCodexRollout(cwd: string): string | undefined { + const base = join(process.env.CODEX_HOME ?? join(homedir(), ".codex"), "sessions"); + if (!existsSync(base)) return undefined; + let best: { path: string; mtime: number } | undefined; + const walk = (dir: string): void => { + let entries: import("node:fs").Dirent[]; + try { + entries = readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + for (const e of entries) { + const full = join(dir, e.name); + if (e.isDirectory()) { + walk(full); + } else if (e.name.startsWith("rollout-") && e.name.endsWith(".jsonl")) { + try { + const fd = openSync(full, "r"); + const buf = Buffer.alloc(8192); + const n = readSync(fd, buf, 0, buf.length, 0); + closeSync(fd); + const firstLine = buf.toString("utf-8", 0, n).split("\n", 1)[0]; + const meta = JSON.parse(firstLine); + if (meta?.payload?.cwd === cwd) { + const mtime = statSync(full).mtimeMs; + if (!best || mtime > best.mtime) best = { path: full, mtime }; + } + } catch { + /* skip unreadable/partial */ + } + } + } + }; + walk(base); + return best?.path; +} + /// Locate a Claude session transcript by scanning ~/.claude/projects//. /// Encoding-independent: finds .jsonl wherever Claude placed it. export function findSessionJsonl(sessionId: string): string | undefined { diff --git a/cli/src/hooks.ts b/cli/src/hooks.ts index 81c9ef1a..eaa23787 100644 --- a/cli/src/hooks.ts +++ b/cli/src/hooks.ts @@ -166,10 +166,13 @@ export function installHooks(opts: InstallHooksOptions = {}): InstallHooksResult mkdirSync(dirname(settingsPath), { recursive: true }); writeFileSync(settingsPath, sortedStringify(root)); - // Install the Codex equivalent too (harmless if no Codex agent runs): Codex - // mirrors Claude's hook structure but reads ~/.codex/hooks.json and runs the - // same hook.sh, so its events land in the same hook-events.jsonl. - installCodexHooks({ hookScriptPath }); + // NOTE: Codex hooks are intentionally NOT installed here. Codex 0.134.0 gates + // command hooks behind an interactive trust prompt that --dangerously-bypass- + // hook-trust does not reliably suppress in the inline TUI, which hangs the + // headless session on a modal. Codex agents are instead mirrored to Slack via + // their rollout transcript (see findCodexRollout / formatCodexRolloutLines in + // the bridge), and steered via tmux paste, so no Codex hooks are needed. + // installCodexHooks() remains available for when Codex honors trust bypass. return { settingsPath, hookScriptPath, statuslinePath, events: HOOK_EVENTS }; } diff --git a/cli/src/slack/bridge.ts b/cli/src/slack/bridge.ts index 89395a79..0f74bed3 100644 --- a/cli/src/slack/bridge.ts +++ b/cli/src/slack/bridge.ts @@ -1,12 +1,14 @@ import { existsSync, statSync, openSync, readSync, closeSync } from "node:fs"; +import { join } from "node:path"; import { SocketModeClient } from "@slack/socket-mode"; import { SlackClient } from "./client.js"; import { routeSlackMessage, ChannelMapping } from "./inbound.js"; -import { formatTranscriptLines } from "./format.js"; +import { formatTranscriptLines, formatCodexRolloutLines } from "./format.js"; import { loadAgentsConfig } from "../agents/config.js"; import { agentIdentity } from "../agents/identity.js"; +import { Runtime } from "../agents/runtime.js"; import { recordAnnounceSuppress } from "./announce-suppress.js"; -import { findSessionJsonl, pasteTmuxPrompt } from "../data.js"; +import { findSessionJsonl, findCodexRollout, pasteTmuxPrompt } from "../data.js"; export interface BridgeOptions { botToken: string; @@ -18,7 +20,10 @@ export interface BridgeOptions { interface TailState { slug: string; + runtime: Runtime; sessionId: string; + /// For Codex, the per-agent workspace used to locate its rollout by cwd. + cwd?: string; channelId: string; path?: string; offset: number; @@ -71,24 +76,27 @@ export async function runSlackBridge(opts: BridgeOptions): Promise { continue; } mapping[channelId] = a.slug; - const sessionId = agentIdentity(a.slug).sessionId; - const path = findSessionJsonl(sessionId); + const runtime = (a.runtime ?? "claude") as Runtime; + const sessionId = agentIdentity(a.slug, runtime).sessionId; + const cwd = join(file.workspacesDir, a.slug); + const path = runtime === "codex" ? findCodexRollout(cwd) : findSessionJsonl(sessionId); // Start at EOF so we mirror only new activity, not the whole backlog. - tails.push({ slug: a.slug, sessionId, channelId, path, offset: path ? statSync(path).size : 0 }); + tails.push({ slug: a.slug, runtime, sessionId, cwd, channelId, path, offset: path ? statSync(path).size : 0 }); } // agent -> slack setInterval(async () => { for (const t of tails) { if (!t.path) { - t.path = findSessionJsonl(t.sessionId); + t.path = t.runtime === "codex" ? findCodexRollout(t.cwd!) : findSessionJsonl(t.sessionId); if (t.path) t.offset = statSync(t.path).size; // skip backlog on first discovery continue; } if (!existsSync(t.path)) continue; const { objs, newOffset } = readAppendedLines(t.path, t.offset); t.offset = newOffset; - for (const post of formatTranscriptLines(objs)) { + const posts = t.runtime === "codex" ? formatCodexRolloutLines(objs) : formatTranscriptLines(objs); + for (const post of posts) { try { await client.post(t.channelId, post.text); } catch (e) { diff --git a/cli/src/slack/format.ts b/cli/src/slack/format.ts index 974c0694..163ea02c 100644 --- a/cli/src/slack/format.ts +++ b/cli/src/slack/format.ts @@ -127,3 +127,23 @@ export function formatTranscriptLines(objs: any[]): SlackPost[] { flush(); return posts; } + +/// Format Codex rollout (.jsonl) records into Slack posts. Codex logs a stream +/// of records; we mirror the agent's own messages (its "movement") and the +/// commands it runs, skipping reasoning/system noise so the channel reads like +/// the Claude mirror. Event shapes: event_msg{payload:{type:"agent_message", +/// message}} for assistant text, and exec_command_begin{command} for shell runs. +export function formatCodexRolloutLines(objs: any[]): SlackPost[] { + const posts: SlackPost[] = []; + for (const o of objs) { + if (o?.type !== "event_msg") continue; + const p = o.payload ?? {}; + if (p.type === "agent_message" && typeof p.message === "string" && p.message.trim()) { + posts.push({ role: "assistant", text: truncate(p.message) }); + } else if (p.type === "exec_command_begin") { + const cmd = Array.isArray(p.command) ? p.command.join(" ") : String(p.command ?? ""); + if (cmd.trim()) posts.push({ role: "assistant", text: fenceBlock(`$ ${truncate(cmd, 300)}`) }); + } + } + return posts; +} From a9176ecd9cb9ebc6f84b76f3bd4617ce6534b11f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Chaves?= Date: Thu, 28 May 2026 13:54:38 +0200 Subject: [PATCH 5/9] fix(bridge): match codex rollout cwd via regex (session_meta line is huge) Codex embeds full base_instructions in the session_meta first line (~20KB), so the bounded read truncated it and JSON.parse failed, so findCodexRollout never matched and no agent movement was mirrored. Extract cwd with a regex from a larger bounded read instead. --- cli/src/data.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cli/src/data.ts b/cli/src/data.ts index 57cfec79..6851ff87 100644 --- a/cli/src/data.ts +++ b/cli/src/data.ts @@ -384,13 +384,17 @@ export function findCodexRollout(cwd: string): string | undefined { walk(full); } else if (e.name.startsWith("rollout-") && e.name.endsWith(".jsonl")) { try { + // The session_meta first line can be very large (Codex embeds its full + // base_instructions), so we don't JSON.parse it; cwd appears early, so + // a bounded read + regex is robust regardless of the line length. const fd = openSync(full, "r"); - const buf = Buffer.alloc(8192); + const buf = Buffer.alloc(65536); const n = readSync(fd, buf, 0, buf.length, 0); closeSync(fd); - const firstLine = buf.toString("utf-8", 0, n).split("\n", 1)[0]; - const meta = JSON.parse(firstLine); - if (meta?.payload?.cwd === cwd) { + const head = buf.toString("utf-8", 0, n); + const m = head.match(/"cwd"\s*:\s*"((?:[^"\\]|\\.)*)"/); + const foundCwd = m ? JSON.parse(`"${m[1]}"`) : undefined; + if (foundCwd === cwd) { const mtime = statSync(full).mtimeMs; if (!best || mtime > best.mtime) best = { path: full, mtime }; } From 2cb8583337359c2650deacec559dfca51e65b0fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Chaves?= Date: Thu, 28 May 2026 13:58:13 +0200 Subject: [PATCH 6/9] docs(spec): codex mirrors via rollout transcript, not hooks --- specs/system/headless-runtime.feature | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/specs/system/headless-runtime.feature b/specs/system/headless-runtime.feature index 29cc0a46..b9dd5129 100644 --- a/specs/system/headless-runtime.feature +++ b/specs/system/headless-runtime.feature @@ -49,7 +49,6 @@ Feature: Headless runtime engine (no macOS app) Given an agent declares "runtime: codex" in the agents config When the reconciler launches it Then it runs "codex --no-alt-screen --dangerously-bypass-approvals-and-sandbox --dangerously-bypass-hook-trust" in tmux (no Claude --session-id/--resume) - And the launcher exports KANBAN_SESSION_ID so the shared hook.sh correlates Codex events to the agent's card - And Codex hooks (~/.codex/hooks.json) feed the same hook-events.jsonl, so Slack steering and auto-send still work And the daemon's context-threshold self-compaction is skipped for it (Codex auto-compacts and exposes no context usage) And sending and Slack-inbound steering work unchanged because they paste into the tmux session by slug + And its movement is mirrored to Slack by tailing its Codex rollout transcript (located by the agent's workspace cwd), since Codex 0.134.0 gates command hooks behind a trust prompt that --dangerously-bypass-hook-trust does not suppress in the inline TUI From c4429d954bf73db6dc5ad7e0de4155e1859f7acc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Chaves?= Date: Thu, 28 May 2026 14:54:28 +0200 Subject: [PATCH 7/9] feat(slack): mirror codex received prompts as '>>> Received user message' Codex has no UserPromptSubmit hook (trust gate), so injected prompts were not announced to the channel the way Claude's are. Mirror user_message events from the rollout tail using the same received-message format, and guard against echoing a prompt that was relayed from a Slack human. --- cli/src/codex-runtime.test.ts | 17 +++++++++++------ cli/src/slack/announce.ts | 14 ++++---------- cli/src/slack/bridge.ts | 25 +++++++++++++++++++++++++ cli/src/slack/format.ts | 27 ++++++++++++++++++++++----- 4 files changed, 62 insertions(+), 21 deletions(-) diff --git a/cli/src/codex-runtime.test.ts b/cli/src/codex-runtime.test.ts index 96bd3312..e0591b78 100644 --- a/cli/src/codex-runtime.test.ts +++ b/cli/src/codex-runtime.test.ts @@ -60,9 +60,10 @@ describe("runtime descriptor", () => { }); describe("formatCodexRolloutLines", () => { - test("mirrors agent messages and exec commands, skips reasoning/system noise", () => { + test("mirrors received prompts, agent messages and exec commands, skips reasoning/system noise", () => { const objs = [ { type: "session_meta", payload: { cwd: "/x" } }, + { type: "event_msg", payload: { type: "user_message", message: "Please review PR 519.", images: [] } }, { type: "event_msg", payload: { type: "task_started" } }, { type: "event_msg", payload: { type: "agent_message", message: "I'll review the PR now." } }, { type: "response_item", payload: { type: "reasoning", encrypted_content: "..." } }, @@ -70,11 +71,15 @@ describe("formatCodexRolloutLines", () => { { type: "event_msg", payload: { type: "agent_message", message: "No blockers; 2 nits." } }, ]; const posts = formatCodexRolloutLines(objs); - assert.equal(posts.length, 3); - assert.equal(posts[0].text, "I'll review the PR now."); - assert.match(posts[1].text, /gh pr view 519/); - assert.equal(posts[2].text, "No blockers; 2 nits."); - assert.ok(posts.every((p) => p.role === "assistant")); + assert.equal(posts.length, 4); + // The injected prompt is mirrored like the Claude UserPromptSubmit announce. + assert.equal(posts[0].role, "user"); + assert.match(posts[0].text, /^>>> Received user message/); + assert.match(posts[0].text, /Please review PR 519\./); + assert.equal(posts[1].text, "I'll review the PR now."); + assert.match(posts[2].text, /gh pr view 519/); + assert.equal(posts[3].text, "No blockers; 2 nits."); + assert.ok(posts.slice(1).every((p) => p.role === "assistant")); }); }); diff --git a/cli/src/slack/announce.ts b/cli/src/slack/announce.ts index 4d6f8a08..b4039369 100644 --- a/cli/src/slack/announce.ts +++ b/cli/src/slack/announce.ts @@ -8,16 +8,10 @@ import { homedir } from "node:os"; /// agent's Slack channel. Messages relayed *from* a Slack human must NOT go /// through here, they already appear in Slack as that person's message. -/// Header shown above each mirrored prompt. It marks the text as the input the -/// agent received (not the agent's own reply), so a reader can tell the two -/// apart in the channel. -export const RECEIVED_MESSAGE_HEADER = ">>> Received user message"; - -/// Format an injected prompt for the channel: the header, then the body in -/// italics (Slack mrkdwn uses _underscores_ for italic; * and ** do not work). -export function formatReceivedMessage(text: string): string { - return `${RECEIVED_MESSAGE_HEADER}\n\n_${text}_`; -} +// The received-message formatting lives in format.ts (all Slack rendering does); +// re-exported here so the Claude announce path and existing imports stay stable. +export { RECEIVED_MESSAGE_HEADER, formatReceivedMessage } from "./format.js"; +import { formatReceivedMessage } from "./format.js"; function defaultConfigPath(): string { return process.env.KANBAN_AGENTS_CONFIG || join(homedir(), ".kanban-code", "agents.yaml"); diff --git a/cli/src/slack/bridge.ts b/cli/src/slack/bridge.ts index 0f74bed3..f7c14ff7 100644 --- a/cli/src/slack/bridge.ts +++ b/cli/src/slack/bridge.ts @@ -69,6 +69,24 @@ export async function runSlackBridge(opts: BridgeOptions): Promise { const mapping: ChannelMapping = {}; const tails: TailState[] = []; + + // Slack-human messages relayed into a Codex agent reappear in that agent's + // rollout as user_message events, which the poll loop would otherwise echo + // back to the channel as ">>> Received user message" (a duplicate of the + // human's own message). Claude suppresses this via the daemon's announce- + // suppress markers, but the Codex rollout mirror runs in this process, so we + // track recent relays here and drop the matching echo. Keyed by slug. + const recentRelays = new Map(); + const RELAY_ECHO_TTL_MS = 90_000; + const consumeRelayEcho = (slug: string, mirrored: string): boolean => { + const list = recentRelays.get(slug); + if (!list?.length) return false; + const now = Date.now(); + const i = list.findIndex((r) => now - r.ts <= RELAY_ECHO_TTL_MS && mirrored.includes(r.text.trim())); + if (i < 0) return false; + list.splice(i, 1); // consume so a genuine resend later is not swallowed + return true; + }; for (const a of agents) { const channelId = await client.resolveChannelId(a.slackChannel!); if (!channelId) { @@ -97,6 +115,9 @@ export async function runSlackBridge(opts: BridgeOptions): Promise { t.offset = newOffset; const posts = t.runtime === "codex" ? formatCodexRolloutLines(objs) : formatTranscriptLines(objs); for (const post of posts) { + // Don't echo a prompt we just relayed from a Slack human (it's already + // in the channel as their message). + if (t.runtime === "codex" && post.role === "user" && consumeRelayEcho(t.slug, post.text)) continue; try { await client.post(t.channelId, post.text); } catch (e) { @@ -116,6 +137,10 @@ export async function runSlackBridge(opts: BridgeOptions): Promise { // (it already appears there as that person's Slack message). Recorded // before the paste so the marker is in place before UserPromptSubmit. recordAnnounceSuppress(agentIdentity(decision.slug).sessionId); + // Also remember it for the in-process Codex rollout-echo guard above. + const relays = recentRelays.get(decision.slug) ?? []; + relays.push({ text: decision.text, ts: Date.now() }); + recentRelays.set(decision.slug, relays); pasteTmuxPrompt(decision.slug, decision.text); // tmux session name == slug } }); diff --git a/cli/src/slack/format.ts b/cli/src/slack/format.ts index 163ea02c..fc77bc5e 100644 --- a/cli/src/slack/format.ts +++ b/cli/src/slack/format.ts @@ -14,6 +14,17 @@ function truncate(s: string, max = MAX_TEXT): string { return t.length > max ? t.slice(0, max) + "…" : t; } +/// Header shown above each mirrored prompt. It marks the text as the input the +/// agent received (not the agent's own reply), so a reader can tell the two +/// apart in the channel. +export const RECEIVED_MESSAGE_HEADER = ">>> Received user message"; + +/// Format an injected prompt for the channel: the header, then the body in +/// italics (Slack mrkdwn uses _underscores_ for italic; * and ** do not work). +export function formatReceivedMessage(text: string): string { + return `${RECEIVED_MESSAGE_HEADER}\n\n_${text}_`; +} + /// Keep the last 3 path components (mirrors TranscriptReader.shortenPath). export function shortenPath(path: string): string { const parts = path.split("/").filter(Boolean); @@ -129,16 +140,22 @@ export function formatTranscriptLines(objs: any[]): SlackPost[] { } /// Format Codex rollout (.jsonl) records into Slack posts. Codex logs a stream -/// of records; we mirror the agent's own messages (its "movement") and the -/// commands it runs, skipping reasoning/system noise so the channel reads like -/// the Claude mirror. Event shapes: event_msg{payload:{type:"agent_message", -/// message}} for assistant text, and exec_command_begin{command} for shell runs. +/// of records; we mirror the prompts the agent receives (user_message), the +/// agent's own messages (its "movement"), and the commands it runs, skipping +/// reasoning/system noise so the channel reads like the Claude mirror. Codex +/// gates command hooks behind a trust prompt, so this rollout tail is also how +/// a received prompt is announced (the Claude path uses the UserPromptSubmit +/// hook). Event shapes: event_msg{payload:{type:"user_message",message}} for an +/// injected prompt, {type:"agent_message",message} for assistant text, and +/// {type:"exec_command_begin",command} for shell runs. export function formatCodexRolloutLines(objs: any[]): SlackPost[] { const posts: SlackPost[] = []; for (const o of objs) { if (o?.type !== "event_msg") continue; const p = o.payload ?? {}; - if (p.type === "agent_message" && typeof p.message === "string" && p.message.trim()) { + if (p.type === "user_message" && typeof p.message === "string" && p.message.trim()) { + posts.push({ role: "user", text: formatReceivedMessage(truncate(p.message)) }); + } else if (p.type === "agent_message" && typeof p.message === "string" && p.message.trim()) { posts.push({ role: "assistant", text: truncate(p.message) }); } else if (p.type === "exec_command_begin") { const cmd = Array.isArray(p.command) ? p.command.join(" ") : String(p.command ?? ""); From 552f0b5e359f051bb9762bbd539c8a59ce7ad333 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Chaves?= Date: Thu, 28 May 2026 15:05:04 +0200 Subject: [PATCH 8/9] feat(slack): follow codex rollout rotation so mirroring survives restarts/compaction Codex writes a new rollout file per session, so a relaunched agent (or its own auto-compaction) rotated the file out from under the bridge tail. The poll loop now switches to the newest rollout for the agent's cwd and mirrors it from the start, so the channel keeps showing the agent's conversation without restarting the bridge. --- cli/src/slack/bridge.ts | 10 ++++++++++ specs/system/headless-runtime.feature | 5 ++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cli/src/slack/bridge.ts b/cli/src/slack/bridge.ts index f7c14ff7..d3a9d37b 100644 --- a/cli/src/slack/bridge.ts +++ b/cli/src/slack/bridge.ts @@ -110,6 +110,16 @@ export async function runSlackBridge(opts: BridgeOptions): Promise { if (t.path) t.offset = statSync(t.path).size; // skip backlog on first discovery continue; } + // Codex writes a fresh rollout file per session, so a restart (or its own + // auto-compaction) rotates the file. Follow the newest one from its start + // so a relaunched agent keeps mirroring without restarting the bridge. + if (t.runtime === "codex") { + const latest = findCodexRollout(t.cwd!); + if (latest && latest !== t.path) { + t.path = latest; + t.offset = 0; + } + } if (!existsSync(t.path)) continue; const { objs, newOffset } = readAppendedLines(t.path, t.offset); t.offset = newOffset; diff --git a/specs/system/headless-runtime.feature b/specs/system/headless-runtime.feature index b9dd5129..6dcf341e 100644 --- a/specs/system/headless-runtime.feature +++ b/specs/system/headless-runtime.feature @@ -51,4 +51,7 @@ Feature: Headless runtime engine (no macOS app) Then it runs "codex --no-alt-screen --dangerously-bypass-approvals-and-sandbox --dangerously-bypass-hook-trust" in tmux (no Claude --session-id/--resume) And the daemon's context-threshold self-compaction is skipped for it (Codex auto-compacts and exposes no context usage) And sending and Slack-inbound steering work unchanged because they paste into the tmux session by slug - And its movement is mirrored to Slack by tailing its Codex rollout transcript (located by the agent's workspace cwd), since Codex 0.134.0 gates command hooks behind a trust prompt that --dangerously-bypass-hook-trust does not suppress in the inline TUI + And its whole conversation is mirrored to Slack automatically by tailing its Codex rollout transcript (located by the agent's workspace cwd): received prompts as ">>> Received user message", the agent's own messages, and the commands it runs, just like the Claude hook mirror, since Codex 0.134.0 gates command hooks behind a trust prompt that --dangerously-bypass-hook-trust does not suppress in the inline TUI + And the agent is never instructed to post to Slack itself; it communicates normally and the bridge does the mirroring + And a prompt relayed from a Slack human is not echoed back to the channel (it is already there as that person's message) + And when Codex rotates its rollout file (a relaunched session or its own auto-compaction) the bridge follows the newest rollout so mirroring continues without a bridge restart From 67c5d250a09c27c4919cd8071fba2899932938b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Chaves?= Date: Thu, 28 May 2026 18:43:43 +0200 Subject: [PATCH 9/9] revert: keep self-compact nudge queued for Stop (drop #73) The 500k/600k/700k soft nudges stay queued and auto-send on the next Stop, as designed: a soft nudge is meant to land at a stop point, and an idle session simply rides up to the 750k hard /compact, after which the next prompt arrives post-compaction. Reverts the daemon paste-straight-away change so the queuePrompt path is preserved. --- cli/src/agents/daemon.ts | 19 +++++++++++-------- cli/src/daemon.test.ts | 9 +++------ specs/system/headless-runtime.feature | 5 ++--- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/cli/src/agents/daemon.ts b/cli/src/agents/daemon.ts index c5cb1f8f..88c0b270 100644 --- a/cli/src/agents/daemon.ts +++ b/cli/src/agents/daemon.ts @@ -1,4 +1,5 @@ import { existsSync, statSync, openSync, readSync, closeSync } from "node:fs"; +import { randomUUID } from "node:crypto"; import { hookEventsPath } from "../paths.js"; import { readLinks, readSessionContext, pasteTmuxPrompt } from "../data.js"; import { upsertCard, isoNow } from "../cards.js"; @@ -285,14 +286,7 @@ export class Daemon { if (rule.thresholdTokens <= (this.lastTriggered.get(sessionId) ?? 0)) continue; if (rule.action === "queuePrompt") { - // Send the self-compact nudge straight away rather than queueing it for - // the next Stop. A resumed/idle session never hits Stop, so a queued - // nudge would never fire and context would coast up to the hard - // /compact (which then collides with whatever prompt lands next, e.g. - // the morning nudge). Claude queues pasted input and runs it after the - // current turn, so a busy agent still finishes gracefully while an idle - // one self-compacts immediately. - this.paste(sessionName, rule.message); + this.enqueueOnce(card.id, rule.message); } else { this.paste(sessionName, "/compact"); this.announce(card.name ?? "", `🧹 context over ${Math.round(rule.thresholdTokens / 1000)}k - sending /compact`); @@ -323,4 +317,13 @@ export class Daemon { }; upsertCard(next); } + + private enqueueOnce(cardId: string, body: string): void { + const card = readLinks().find((c) => c.id === cardId); + if (!card) return; + const queue = card.queuedPrompts ?? []; + if (queue.some((p) => p.body.trim() === body.trim())) return; // already queued + const prompt: QueuedPrompt = { id: randomUUID(), body, sendAutomatically: true }; + upsertCard({ ...card, queuedPrompts: [...queue, prompt], updatedAt: isoNow() }); + } } diff --git a/cli/src/daemon.test.ts b/cli/src/daemon.test.ts index 68214e55..159da4ca 100644 --- a/cli/src/daemon.test.ts +++ b/cli/src/daemon.test.ts @@ -101,20 +101,17 @@ describe("daemon (sandboxed, injected paste)", () => { assert.equal(readLinks()[0].queuedPrompts?.length, 0, "stale warning dropped from queue"); }); - test("auto-compact sends the crossed warning straight away, once (no re-send)", () => { + test("auto-compact queues the crossed warning once (no re-queue)", () => { writeLinks([card()]); writeContextPct(55); // 550k -> crosses 500k queuePrompt rule const d = newDaemon(); const acted = d.evaluateAutoCompact(); assert.deepEqual(acted, [{ sessionId: SID, action: "queuePrompt", thresholdTokens: 500_000 }]); - // Pasted straight into the session, not parked in the queue (a resumed/idle - // session never hits Stop, so a queued nudge would never fire). - assert.deepEqual(pastes, [["daemon-agent", DEFAULT_SELF_COMPACT_RULES[0].message]]); - assert.equal(readLinks()[0].queuedPrompts?.length ?? 0, 0, "not queued"); + assert.equal(readLinks()[0].queuedPrompts?.[0].body, DEFAULT_SELF_COMPACT_RULES[0].message); const again = d.evaluateAutoCompact(); assert.equal(again.length, 0, "must not re-trigger the same threshold"); - assert.equal(pastes.length, 1, "no duplicate nudge"); + assert.equal(readLinks()[0].queuedPrompts?.length, 1, "no duplicate queued warning"); }); test("auto-compact sends /compact at the hard threshold", () => { diff --git a/specs/system/headless-runtime.feature b/specs/system/headless-runtime.feature index 6dcf341e..46742cac 100644 --- a/specs/system/headless-runtime.feature +++ b/specs/system/headless-runtime.feature @@ -28,11 +28,10 @@ Feature: Headless runtime engine (no macOS app) Scenario: Auto-compaction protects long-running sessions Given an agent has been running for a long time When its current context usage crosses 500k tokens - Then a prompt instructing it to self-compact is sent to it straight away (not parked in the queue waiting for a Stop) - And so a resumed or idle session, which never emits a Stop, still gets the nudge and self-compacts before context grows further - And the same threshold is not nudged twice + Then a prompt instructing it to self-compact is queued When usage crosses the hard threshold (750k) Then "/compact" is sent to the agent automatically + And a stale self-compact warning is dropped if context already dropped back below its threshold Scenario: The session runs forever across compactions Given the agent has compacted multiple times