diff --git a/cli/src/agents/config.ts b/cli/src/agents/config.ts index bc3a7ad1..0fd6ccc0 100644 --- a/cli/src/agents/config.ts +++ b/cli/src/agents/config.ts @@ -3,12 +3,15 @@ import { homedir } from "node:os"; import { join } from "node:path"; import { parse as parseYaml } from "yaml"; import { isValidSlug } from "./identity.js"; +import { Runtime, isRuntime } from "./runtime.js"; /// One long-lived agent, defined declaratively. Used by the reconciler (slug, /// repos, model), the scheduler (schedule, dailyPrompt) and the Slack bridge /// (slackChannel). Prompts live here so the whole agent is one config object. export interface AgentConfig { slug: string; + /// Which agent CLI drives this agent. Optional; defaults to "claude". + runtime?: Runtime; /// GitHub repos the agent works on, as "owner/name". repos: string[]; /// Model alias or full name (claude --model). Optional. @@ -49,6 +52,10 @@ export function parseAgentsConfig(text: string): AgentsFile { if (!isValidSlug(a.slug)) throw new Error(`agents[${i}].slug invalid: ${JSON.stringify(a.slug)}`); if (seen.has(a.slug)) throw new Error(`duplicate agent slug: ${a.slug}`); seen.add(a.slug); + const runtime = a.runtime ?? "claude"; + if (!isRuntime(runtime)) { + throw new Error(`agents[${i}] (${a.slug}) has invalid runtime ${JSON.stringify(a.runtime)} (expected "claude" or "codex")`); + } const repos = Array.isArray(a.repos) ? a.repos : []; for (const r of repos) { if (typeof r !== "string" || !REPO_RE.test(r)) { @@ -57,6 +64,7 @@ export function parseAgentsConfig(text: string): AgentsFile { } return { slug: a.slug, + runtime, repos, model: a.model, slackChannel: a.slackChannel, diff --git a/cli/src/agents/identity.ts b/cli/src/agents/identity.ts index 86206739..f51f80d7 100644 --- a/cli/src/agents/identity.ts +++ b/cli/src/agents/identity.ts @@ -1,12 +1,17 @@ import { uuidv5 } from "../uuid.js"; +import { Runtime } from "./runtime.js"; /// A stable, readable identity for a long-lived agent. Everything humans see or -/// type is the readable slug; only the Claude session id is a (deterministic) -/// UUID, because `claude --session-id` requires a valid UUID. +/// type is the readable slug; the session id is a deterministic UUID. For Claude +/// it is the --session-id / --resume key; for Codex (which mints its own id) it +/// is still the stable hook-events correlation key, passed to the hook via env. export interface AgentIdentity { /// Readable slug, e.g. "dependabot-scout". Source of truth for the identity. slug: string; - /// Deterministic UUIDv5 of the slug — the Claude --session-id / --resume key. + /// Which agent CLI drives this agent. + runtime: Runtime; + /// Deterministic UUIDv5 of the slug. Claude --session-id/--resume key, and the + /// hook-events correlation key for both runtimes. sessionId: string; /// tmux session name (== slug). tmuxName: string; @@ -22,7 +27,7 @@ export function isValidSlug(slug: string): boolean { return SLUG_RE.test(slug) && slug.length <= 60; } -export function agentIdentity(slug: string): AgentIdentity { +export function agentIdentity(slug: string, runtime: Runtime = "claude"): AgentIdentity { if (!isValidSlug(slug)) { throw new Error( `Invalid agent slug "${slug}" (use lowercase letters, digits and hyphens; max 60 chars)` @@ -30,6 +35,7 @@ export function agentIdentity(slug: string): AgentIdentity { } return { slug, + runtime, sessionId: uuidv5(slug), tmuxName: slug, cardName: slug, diff --git a/cli/src/agents/launch.ts b/cli/src/agents/launch.ts index 2a0ebb13..9eaba93f 100644 --- a/cli/src/agents/launch.ts +++ b/cli/src/agents/launch.ts @@ -8,20 +8,21 @@ import { import { upsertCard, isoNow } from "../cards.js"; import { generateKsuid } from "../ksuid.js"; import { Link, ManualOverrides } from "../types.js"; +import { runtimeSpec } from "./runtime.js"; export interface LaunchOptions { /// Working directory for the session (the agent's workspace / worktree root). cwd: string; - /// Extra args appended to the claude invocation. + /// Extra args appended to the agent invocation. extraArgs?: string[]; /// Environment variables exported into the tmux session. env?: Record; - /// Model alias or full name (claude --model). + /// Model alias or full name. model?: string; /// Autonomous agents skip permission prompts by default. skipPermissions?: boolean; - /// Override the claude binary (tests). - claudeBin?: string; + /// Override the agent binary (tests). + bin?: string; } export type LaunchAction = "noop-running" | "launched" | "resumed"; @@ -44,20 +45,23 @@ const DEFAULT_OVERRIDES: ManualOverrides = { issueLink: false, }; -/// Idempotently ensure an agent's Claude session is running in tmux and its -/// kanban card reflects reality. Decides launch vs resume vs no-op: -/// - tmux session already alive -> no-op (never restart a live agent) -/// - a transcript exists for the session -> resume (--resume ) -/// - neither -> fresh launch (--session-id ) +/// Idempotently ensure an agent's session is running in tmux and its kanban card +/// reflects reality. Decides launch vs resume vs no-op: +/// - tmux session already alive -> no-op (never restart a live agent) +/// - runtime can resume + transcript exists -> resume +/// - otherwise -> fresh launch +/// Codex mints its own session id and the reviewer is per-PR, so it always +/// launches fresh (canResume=false); tmux keeps it alive between prompts. export function ensureAgentSession( identity: AgentIdentity, opts: LaunchOptions ): LaunchResult { - const claudeBin = opts.claudeBin ?? "claude"; + const spec = runtimeSpec(identity.runtime); + const bin = opts.bin ?? spec.bin; const skipPerms = opts.skipPermissions ?? true; const tmuxAlive = hasTmuxSession(identity.tmuxName); - const sessionExists = !!findSessionJsonl(identity.sessionId); + const sessionExists = spec.canResume && !!findSessionJsonl(identity.sessionId); let action: LaunchAction; let command: string | undefined; @@ -65,20 +69,22 @@ export function ensureAgentSession( if (tmuxAlive) { action = "noop-running"; } else { - const args: string[] = []; - if (sessionExists) { - action = "resumed"; - args.push("--resume", identity.sessionId); - } else { - action = "launched"; - args.push("--session-id", identity.sessionId, "--name", identity.slug); - } - if (skipPerms) args.push("--dangerously-skip-permissions"); - if (opts.model) args.push("--model", opts.model); + const args = spec.buildArgs({ + sessionId: identity.sessionId, + slug: identity.slug, + resume: sessionExists, + skipPermissions: skipPerms, + model: opts.model, + }); + action = sessionExists ? "resumed" : "launched"; if (opts.extraArgs?.length) args.push(...opts.extraArgs); - command = [claudeBin, ...args].join(" "); + command = [bin, ...args].join(" "); - const res = createTmuxSession(identity.tmuxName, opts.cwd, command, opts.env ?? {}); + // Both runtimes' hooks correlate events to this agent via this env var, so + // the daemon/bridge key on our stable session id regardless of the id the + // runtime mints internally. + const env = { ...(opts.env ?? {}), KANBAN_SESSION_ID: identity.sessionId, KANBAN_SLUG: identity.slug }; + const res = createTmuxSession(identity.tmuxName, opts.cwd, command, env); if (!res.ok) { throw new Error(`Failed to create tmux session "${identity.tmuxName}": ${res.error}`); } @@ -124,7 +130,7 @@ function upsertAgentCard(identity: AgentIdentity, cwd: string): Link { sessionLink: { sessionId: identity.sessionId, sessionPath }, tmuxLink: { sessionName: identity.tmuxName }, worktreeLink: { path: cwd }, - assistant: "claude", + assistant: identity.runtime, isRemote: false, }; upsertCard(card); diff --git a/cli/src/agents/reconcile.ts b/cli/src/agents/reconcile.ts index b2d14a7d..35de39c5 100644 --- a/cli/src/agents/reconcile.ts +++ b/cli/src/agents/reconcile.ts @@ -8,8 +8,8 @@ import { readLinks, killTmuxSession } from "../data.js"; import { upsertCard, isoNow } from "../cards.js"; export interface ReconcileOptions { - /// Override the claude binary (tests). - claudeBin?: string; + /// Override the agent binary (tests). + bin?: string; /// Tear down agent-managed sessions/cards/worktrees no longer in config. prune?: boolean; } @@ -61,10 +61,10 @@ export function reconcileAgent( repos.push({ name, worktreeCreated: created, worktree }); } - const launch = ensureAgentSession(agentIdentity(agent.slug), { + const launch = ensureAgentSession(agentIdentity(agent.slug, agent.runtime), { cwd: workspace, model: agent.model, - claudeBin: opts.claudeBin, + bin: opts.bin, }); return { slug: agent.slug, workspace, repos, launch }; diff --git a/cli/src/agents/runtime.ts b/cli/src/agents/runtime.ts new file mode 100644 index 00000000..2f732554 --- /dev/null +++ b/cli/src/agents/runtime.ts @@ -0,0 +1,82 @@ +/// Runtime abstraction for the headless agent engine. The engine was built for +/// Claude Code; this table lets a single agent be driven by a different CLI +/// (currently Codex) without branching all over launch/daemon/bridge. It mirrors +/// the macOS app's CodingAssistant entity, scoped to what the headless path needs. + +export type Runtime = "claude" | "codex"; + +export const RUNTIMES: readonly Runtime[] = ["claude", "codex"] as const; + +export function isRuntime(v: unknown): v is Runtime { + return v === "claude" || v === "codex"; +} + +export interface BuildArgsInput { + /// The stable session id (uuidv5 of the slug). Used as Claude's --session-id + /// and, for both runtimes, as the hook-events correlation key via env. + sessionId: string; + /// Readable agent slug. + slug: string; + /// Resume an existing session rather than launching fresh. + resume: boolean; + /// Skip permission/approval prompts (autonomous agents). + skipPermissions: boolean; + /// Model alias/name, if pinned. + model?: string; +} + +export interface RuntimeSpec { + /// The CLI binary. + bin: string; + /// Build the argv after the binary. + buildArgs(input: BuildArgsInput): string[]; + /// Whether this runtime can resume a prior session by our session id. Claude + /// can (--resume ); Codex generates its own id and the headless reviewer + /// is per-PR, so we always launch it fresh and rely on tmux to keep it alive. + canResume: boolean; + /// Whether the daemon's context-threshold self-compaction applies. Codex + /// auto-compacts on its own and exposes no context introspection, so off. + selfCompact: boolean; + /// Config dir under $HOME (for hooks/skills install). + configDirName: string; +} + +const claude: RuntimeSpec = { + bin: "claude", + canResume: true, + selfCompact: true, + configDirName: ".claude", + buildArgs({ sessionId, slug, resume, skipPermissions, model }) { + const args: string[] = []; + if (resume) args.push("--resume", sessionId); + else args.push("--session-id", sessionId, "--name", slug); + if (skipPermissions) args.push("--dangerously-skip-permissions"); + if (model) args.push("--model", model); + return args; + }, +}; + +const codex: RuntimeSpec = { + bin: "codex", + canResume: false, + selfCompact: false, + configDirName: ".codex", + buildArgs({ skipPermissions, model }) { + // --no-alt-screen keeps Codex inline so tmux send-keys paste works (no TUI + // alt-screen). The bypass flags are Codex's equivalent of Claude's + // --dangerously-skip-permissions; --dangerously-bypass-hook-trust skips the + // interactive hook-trust gate so our hooks run unattended. + const args = ["--no-alt-screen"]; + if (skipPermissions) { + args.push("--dangerously-bypass-approvals-and-sandbox", "--dangerously-bypass-hook-trust"); + } + if (model) args.push("-m", model); + return args; + }, +}; + +const SPECS: Record = { claude, codex }; + +export function runtimeSpec(runtime: Runtime): RuntimeSpec { + return SPECS[runtime]; +} diff --git a/cli/src/codex-runtime.test.ts b/cli/src/codex-runtime.test.ts new file mode 100644 index 00000000..e0591b78 --- /dev/null +++ b/cli/src/codex-runtime.test.ts @@ -0,0 +1,158 @@ +/** + * Unit tests for the codex runtime: the runtime descriptor (arg building), + * config parsing of the `runtime` field, codex hook installation, and that a + * codex agent launches fresh (never tries Claude --resume) and tags its card. + */ +import { test, describe, beforeEach, afterEach } from "node:test"; +import { strict as assert } from "node:assert"; +import { execSync } from "node:child_process"; +import { mkdtempSync, rmSync, mkdirSync, readFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { runtimeSpec, isRuntime } from "./agents/runtime.js"; +import { formatCodexRolloutLines } from "./slack/format.js"; +import { parseAgentsConfig } from "./agents/config.js"; +import { agentIdentity } from "./agents/identity.js"; +import { ensureAgentSession } from "./agents/launch.js"; +import { installCodexHooks } from "./hooks.js"; +import { readLinks } from "./data.js"; + +describe("runtime descriptor", () => { + test("claude builds --session-id / --resume args", () => { + const c = runtimeSpec("claude"); + assert.equal(c.bin, "claude"); + assert.equal(c.canResume, true); + assert.equal(c.selfCompact, true); + assert.deepEqual( + c.buildArgs({ sessionId: "sid", slug: "agent", resume: false, skipPermissions: true, model: "opus" }), + ["--session-id", "sid", "--name", "agent", "--dangerously-skip-permissions", "--model", "opus"] + ); + assert.deepEqual( + c.buildArgs({ sessionId: "sid", slug: "agent", resume: true, skipPermissions: true }), + ["--resume", "sid", "--dangerously-skip-permissions"] + ); + }); + + test("codex builds inline + full-auto bypass args and never uses session-id", () => { + const x = runtimeSpec("codex"); + assert.equal(x.bin, "codex"); + assert.equal(x.canResume, false); + assert.equal(x.selfCompact, false); + const args = x.buildArgs({ sessionId: "sid", slug: "agent", resume: false, skipPermissions: true, model: "gpt-5.5" }); + assert.deepEqual(args, [ + "--no-alt-screen", + "--dangerously-bypass-approvals-and-sandbox", + "--dangerously-bypass-hook-trust", + "-m", + "gpt-5.5", + ]); + assert.ok(!args.includes("--session-id")); + assert.ok(!args.includes("--resume")); + }); + + test("isRuntime guards the union", () => { + assert.ok(isRuntime("claude")); + assert.ok(isRuntime("codex")); + assert.ok(!isRuntime("gemini")); + assert.ok(!isRuntime(undefined)); + }); +}); + +describe("formatCodexRolloutLines", () => { + test("mirrors received prompts, agent messages and exec commands, skips reasoning/system noise", () => { + const objs = [ + { type: "session_meta", payload: { cwd: "/x" } }, + { type: "event_msg", payload: { type: "user_message", message: "Please review PR 519.", images: [] } }, + { type: "event_msg", payload: { type: "task_started" } }, + { type: "event_msg", payload: { type: "agent_message", message: "I'll review the PR now." } }, + { type: "response_item", payload: { type: "reasoning", encrypted_content: "..." } }, + { type: "event_msg", payload: { type: "exec_command_begin", command: ["gh", "pr", "view", "519"] } }, + { type: "event_msg", payload: { type: "agent_message", message: "No blockers; 2 nits." } }, + ]; + const posts = formatCodexRolloutLines(objs); + assert.equal(posts.length, 4); + // The injected prompt is mirrored like the Claude UserPromptSubmit announce. + assert.equal(posts[0].role, "user"); + assert.match(posts[0].text, /^>>> Received user message/); + assert.match(posts[0].text, /Please review PR 519\./); + assert.equal(posts[1].text, "I'll review the PR now."); + assert.match(posts[2].text, /gh pr view 519/); + assert.equal(posts[3].text, "No blockers; 2 nits."); + assert.ok(posts.slice(1).every((p) => p.role === "assistant")); + }); +}); + +describe("agents config runtime field", () => { + test("defaults to claude and accepts codex", () => { + const f = parseAgentsConfig(`agents:\n - slug: a\n repos: ["acme/x"]\n - slug: b\n runtime: codex\n repos: ["acme/y"]\n`); + assert.equal(f.agents[0].runtime, "claude"); + assert.equal(f.agents[1].runtime, "codex"); + }); + + test("rejects an unknown runtime", () => { + assert.throws( + () => parseAgentsConfig(`agents:\n - slug: a\n runtime: gemini\n repos: []\n`), + /invalid runtime/ + ); + }); +}); + +describe("installCodexHooks", () => { + let codexHome: string; + beforeEach(() => { + codexHome = mkdtempSync(join(tmpdir(), "kanban-codex-")); + }); + afterEach(() => rmSync(codexHome, { recursive: true, force: true })); + + test("writes hooks.json pointing at the shared hook.sh, idempotently", () => { + const hooksPath = join(codexHome, "hooks.json"); + const hookScriptPath = join(codexHome, "hook.sh"); + const r = installCodexHooks({ hooksPath, hookScriptPath }); + assert.deepEqual(r.events, ["SessionStart", "UserPromptSubmit", "Stop"]); + const json = JSON.parse(readFileSync(hooksPath, "utf-8")); + // Codex requires the top-level "hooks" wrapper. + assert.ok(json.hooks, "events must be nested under a top-level hooks key"); + for (const ev of r.events) { + assert.equal(json.hooks[ev][0].hooks[0].command, hookScriptPath); + } + // Re-install does not duplicate the entry. + installCodexHooks({ hooksPath, hookScriptPath }); + const json2 = JSON.parse(readFileSync(hooksPath, "utf-8")); + assert.equal(json2.hooks.Stop[0].hooks.length, 1); + }); +}); + +function hasTmux(): boolean { + try { execSync("tmux -V", { stdio: "ignore" }); return true; } catch { return false; } +} + +describe("codex agent launch (real tmux)", { skip: !hasTmux() }, () => { + let home: string; + let workspace: string; + const slug = `kanban-codex-test-${Date.now()}`; + const identity = agentIdentity(slug, "codex"); + + beforeEach(() => { + home = mkdtempSync(join(tmpdir(), "kanban-codex-home-")); + workspace = mkdtempSync(join(tmpdir(), "kanban-codex-ws-")); + process.env.KANBAN_CODE_HOME = home; + }); + afterEach(() => { + try { execSync(`tmux kill-session -t ${identity.tmuxName}`, { stdio: "ignore" }); } catch {} + delete process.env.KANBAN_CODE_HOME; + rmSync(home, { recursive: true, force: true }); + rmSync(workspace, { recursive: true, force: true }); + }); + + test("launches codex fresh (no resume) and tags the card assistant=codex", () => { + const result = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); + assert.equal(result.action, "launched"); + assert.match(result.command!, /true --no-alt-screen --dangerously-bypass-approvals-and-sandbox/); + const card = readLinks().find((l) => l.name === slug); + assert.equal(card?.assistant, "codex"); + // A second reconcile is a no-op while the session is alive. + const again = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); + assert.equal(again.action, "noop-running"); + }); +}); diff --git a/cli/src/data.ts b/cli/src/data.ts index 2e441c5f..6851ff87 100644 --- a/cli/src/data.ts +++ b/cli/src/data.ts @@ -361,6 +361,53 @@ export function killTmuxSession(name: string): { ok: boolean; error?: string } { } } +/// Locate the newest Codex rollout (.jsonl) for a session by working directory. +/// Codex mints its own session id, so we can't address the file by our session +/// id like Claude; instead each rollout's first line (session_meta) carries the +/// cwd, and a per-agent workspace is unique, so we match on that and take the +/// most recently modified. Only the first line is read, so this stays cheap even +/// as rollouts grow. +export function findCodexRollout(cwd: string): string | undefined { + const base = join(process.env.CODEX_HOME ?? join(homedir(), ".codex"), "sessions"); + if (!existsSync(base)) return undefined; + let best: { path: string; mtime: number } | undefined; + const walk = (dir: string): void => { + let entries: import("node:fs").Dirent[]; + try { + entries = readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + for (const e of entries) { + const full = join(dir, e.name); + if (e.isDirectory()) { + walk(full); + } else if (e.name.startsWith("rollout-") && e.name.endsWith(".jsonl")) { + try { + // The session_meta first line can be very large (Codex embeds its full + // base_instructions), so we don't JSON.parse it; cwd appears early, so + // a bounded read + regex is robust regardless of the line length. + const fd = openSync(full, "r"); + const buf = Buffer.alloc(65536); + const n = readSync(fd, buf, 0, buf.length, 0); + closeSync(fd); + const head = buf.toString("utf-8", 0, n); + const m = head.match(/"cwd"\s*:\s*"((?:[^"\\]|\\.)*)"/); + const foundCwd = m ? JSON.parse(`"${m[1]}"`) : undefined; + if (foundCwd === cwd) { + const mtime = statSync(full).mtimeMs; + if (!best || mtime > best.mtime) best = { path: full, mtime }; + } + } catch { + /* skip unreadable/partial */ + } + } + } + }; + walk(base); + return best?.path; +} + /// Locate a Claude session transcript by scanning ~/.claude/projects//. /// Encoding-independent: finds .jsonl wherever Claude placed it. export function findSessionJsonl(sessionId: string): string | undefined { diff --git a/cli/src/hooks.ts b/cli/src/hooks.ts index 1d027be7..eaa23787 100644 --- a/cli/src/hooks.ts +++ b/cli/src/hooks.ts @@ -1,4 +1,5 @@ import { readFileSync, writeFileSync, mkdirSync, existsSync, chmodSync } from "node:fs"; +import { homedir } from "node:os"; import { dirname, join } from "node:path"; import { kanbanHome, claudeSettingsPath } from "./paths.js"; import { sortedStringify } from "./cards.js"; @@ -8,6 +9,19 @@ import { sortedStringify } from "./cards.js"; /// activity tracking. Mirrors the Swift HookManager. export const HOOK_EVENTS = ["Stop", "Notification", "SessionStart", "SessionEnd", "UserPromptSubmit"]; +/// Codex hook events we register (Codex exposes the same names; we only need the +/// subset that drives the daemon: Stop for auto-send, UserPromptSubmit for the +/// Slack receipt mirror, SessionStart/Stop for activity). +export const CODEX_HOOK_EVENTS = ["SessionStart", "UserPromptSubmit", "Stop"]; + +function codexConfigDir(): string { + return process.env.CODEX_HOME ?? join(homedir(), ".codex"); +} + +function codexHooksPath(): string { + return join(codexConfigDir(), "hooks.json"); +} + function defaultHookScriptPath(): string { return join(kanbanHome(), "hook.sh"); } @@ -16,8 +30,8 @@ function defaultHookScriptPath(): string { /// dependency (lightweight grep parsing). Honors KANBAN_CODE_HOME so the same /// script works in tests and in alternate deployments. const HOOK_SCRIPT = `#!/usr/bin/env bash -# Kanban hook handler. Receives JSON on stdin from Claude Code hooks and appends -# a timestamped event line to /hook-events.jsonl. +# Kanban hook handler. Receives JSON on stdin from Claude Code or Codex hooks and +# appends a timestamped event line to /hook-events.jsonl. set -euo pipefail EVENTS_DIR="\${KANBAN_CODE_HOME:-\$HOME/.kanban-code}" @@ -33,6 +47,13 @@ transcript=$(echo "$input" | grep -o '"transcript_path":"[^"]*"' | head -1 | cut if [ -z "$session_id" ]; then session_id=$(echo "$input" | grep -o '"sessionId":"[^"]*"' | head -1 | cut -d'"' -f4) fi +# The launcher exports KANBAN_SESSION_ID (the stable uuidv5 of the slug) into the +# session, so events correlate to the agent's card regardless of the id the +# runtime mints internally. This is what lets Codex (which generates its own +# session id) share the daemon/bridge correlation path with Claude. +if [ -n "\${KANBAN_SESSION_ID:-}" ]; then + session_id="\$KANBAN_SESSION_ID" +fi [ -z "$session_id" ] && exit 0 # For UserPromptSubmit, capture the whole payload (base64, so the prompt's @@ -145,9 +166,50 @@ export function installHooks(opts: InstallHooksOptions = {}): InstallHooksResult mkdirSync(dirname(settingsPath), { recursive: true }); writeFileSync(settingsPath, sortedStringify(root)); + // NOTE: Codex hooks are intentionally NOT installed here. Codex 0.134.0 gates + // command hooks behind an interactive trust prompt that --dangerously-bypass- + // hook-trust does not reliably suppress in the inline TUI, which hangs the + // headless session on a modal. Codex agents are instead mirrored to Slack via + // their rollout transcript (see findCodexRollout / formatCodexRolloutLines in + // the bridge), and steered via tmux paste, so no Codex hooks are needed. + // installCodexHooks() remains available for when Codex honors trust bypass. + return { settingsPath, hookScriptPath, statuslinePath, events: HOOK_EVENTS }; } +export interface InstallCodexHooksOptions { + hooksPath?: string; + hookScriptPath?: string; +} + +/// Install Codex hooks (~/.codex/hooks.json) pointing at the shared hook.sh. +/// Idempotent and additive: preserves any other registered hooks for each event. +export function installCodexHooks(opts: InstallCodexHooksOptions = {}): { hooksPath: string; events: string[] } { + const hooksPath = opts.hooksPath ?? codexHooksPath(); + const hookScriptPath = opts.hookScriptPath ?? defaultHookScriptPath(); + deployScript(hookScriptPath, HOOK_SCRIPT); + + // Codex's hooks.json nests events under a top-level "hooks" key (mirrors the + // config.toml [hooks] table); without the wrapper Codex ignores the file. + const root = readJson(hooksPath); + const hooks = (root.hooks ?? {}) as Record; + const hookEntry = { type: "command", command: hookScriptPath }; + for (const event of CODEX_HOOK_EVENTS) { + const groups: any[] = Array.isArray(hooks[event]) ? hooks[event] : []; + const present = groups.some((g) => (g?.hooks ?? []).some((h: any) => h?.command === hookScriptPath)); + if (!present) { + if (groups.length === 0) groups.push({ hooks: [hookEntry] }); + else groups[0].hooks = [...(groups[0].hooks ?? []), hookEntry]; + } + hooks[event] = groups; + } + root.hooks = hooks; + + mkdirSync(dirname(hooksPath), { recursive: true }); + writeFileSync(hooksPath, sortedStringify(root)); + return { hooksPath, events: CODEX_HOOK_EVENTS }; +} + /// True if our hook script is registered for every required event. export function areHooksInstalled(settingsPath?: string, hookScriptPath?: string): boolean { const root = readJson(settingsPath ?? claudeSettingsPath()); diff --git a/cli/src/launch.test.ts b/cli/src/launch.test.ts index b0e77d6d..6f3a4c27 100644 --- a/cli/src/launch.test.ts +++ b/cli/src/launch.test.ts @@ -53,7 +53,7 @@ describe("headless agent launch/resume (real tmux)", skipIfNoTmux, () => { }); test("fresh launch creates a tmux session and a stable card", () => { - const result = ensureAgentSession(identity, { cwd: workspace, claudeBin: "true" }); + const result = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); assert.equal(result.action, "launched"); assert.match(result.command!, /true --session-id .* --name /); @@ -72,10 +72,10 @@ describe("headless agent launch/resume (real tmux)", skipIfNoTmux, () => { }); test("re-running while alive is a no-op: no restart, no duplicate card", () => { - const first = ensureAgentSession(identity, { cwd: workspace, claudeBin: "true" }); + const first = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); const firstCardId = first.card.id; - const second = ensureAgentSession(identity, { cwd: workspace, claudeBin: "true" }); + const second = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); assert.equal(second.action, "noop-running"); assert.equal(second.command, undefined, "must not build a launch command when already running"); @@ -90,7 +90,7 @@ describe("headless agent launch/resume (real tmux)", skipIfNoTmux, () => { mkdirSync(projDir, { recursive: true }); writeFileSync(join(projDir, `${identity.sessionId}.jsonl`), '{"type":"user"}\n'); - const result = ensureAgentSession(identity, { cwd: workspace, claudeBin: "true" }); + const result = ensureAgentSession(identity, { cwd: workspace, bin: "true" }); assert.equal(result.action, "resumed"); assert.match(result.command!, new RegExp(`true --resume ${identity.sessionId}`)); execSync(`tmux has-session -t ${identity.tmuxName}`); diff --git a/cli/src/reconcile.test.ts b/cli/src/reconcile.test.ts index 6fd8bf94..93d2b132 100644 --- a/cli/src/reconcile.test.ts +++ b/cli/src/reconcile.test.ts @@ -87,7 +87,7 @@ describe("reconciler (real git + tmux)", skipIfNoTmux, () => { test("creates a per-agent worktree on agent/ and launches the session", () => { provisionRepo(root, reposDir, "myrepo"); const slug = trackSlug(`recon-a-${Date.now()}`); - const result = reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { claudeBin: "true" }); + const result = reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { bin: "true" }); const agent = result.agents[0]; assert.equal(agent.launch.action, "launched"); @@ -107,7 +107,7 @@ describe("reconciler (real git + tmux)", skipIfNoTmux, () => { test("the agent worktree structurally cannot check out main", () => { provisionRepo(root, reposDir, "myrepo"); const slug = trackSlug(`recon-main-${Date.now()}`); - reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { claudeBin: "true" }); + reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { bin: "true" }); const worktree = join(workspacesDir, slug, "myrepo"); // main is checked out in the canonical clone, so git refuses it here. assert.throws(() => g(["-C", worktree, "checkout", "main"]), /already (used|checked out)/i); @@ -118,8 +118,8 @@ describe("reconciler (real git + tmux)", skipIfNoTmux, () => { const slug = trackSlug(`recon-idem-${Date.now()}`); const file = makeFile([{ slug, repos: ["acme/myrepo"] }]); - const first = reconcileAll(file, { claudeBin: "true" }); - const second = reconcileAll(file, { claudeBin: "true" }); + const first = reconcileAll(file, { bin: "true" }); + const second = reconcileAll(file, { bin: "true" }); assert.equal(second.agents[0].launch.action, "noop-running"); assert.equal(second.agents[0].repos[0].worktreeCreated, false); @@ -130,7 +130,7 @@ describe("reconciler (real git + tmux)", skipIfNoTmux, () => { test("a missing canonical clone is a loud error (IaC owns provisioning)", () => { const slug = trackSlug(`recon-missing-${Date.now()}`); assert.throws( - () => reconcileAgent({ slug, repos: ["acme/nope"] }, makeFile([]), { claudeBin: "true" }), + () => reconcileAgent({ slug, repos: ["acme/nope"] }, makeFile([]), { bin: "true" }), /Canonical clone .* is missing/ ); }); @@ -138,10 +138,10 @@ describe("reconciler (real git + tmux)", skipIfNoTmux, () => { test("prune tears down an agent removed from config", () => { provisionRepo(root, reposDir, "myrepo"); const slug = trackSlug(`recon-prune-${Date.now()}`); - reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { claudeBin: "true" }); + reconcileAll(makeFile([{ slug, repos: ["acme/myrepo"] }]), { bin: "true" }); execSync(`tmux has-session -t ${slug}`); - const pruneResult = reconcileAll(makeFile([]), { claudeBin: "true", prune: true }); + const pruneResult = reconcileAll(makeFile([]), { bin: "true", prune: true }); assert.deepEqual(pruneResult.pruned, [slug]); assert.throws(() => execSync(`tmux has-session -t ${slug} 2>/dev/null`), "tmux session should be gone"); diff --git a/cli/src/slack/announce.ts b/cli/src/slack/announce.ts index 4d6f8a08..b4039369 100644 --- a/cli/src/slack/announce.ts +++ b/cli/src/slack/announce.ts @@ -8,16 +8,10 @@ import { homedir } from "node:os"; /// agent's Slack channel. Messages relayed *from* a Slack human must NOT go /// through here, they already appear in Slack as that person's message. -/// Header shown above each mirrored prompt. It marks the text as the input the -/// agent received (not the agent's own reply), so a reader can tell the two -/// apart in the channel. -export const RECEIVED_MESSAGE_HEADER = ">>> Received user message"; - -/// Format an injected prompt for the channel: the header, then the body in -/// italics (Slack mrkdwn uses _underscores_ for italic; * and ** do not work). -export function formatReceivedMessage(text: string): string { - return `${RECEIVED_MESSAGE_HEADER}\n\n_${text}_`; -} +// The received-message formatting lives in format.ts (all Slack rendering does); +// re-exported here so the Claude announce path and existing imports stay stable. +export { RECEIVED_MESSAGE_HEADER, formatReceivedMessage } from "./format.js"; +import { formatReceivedMessage } from "./format.js"; function defaultConfigPath(): string { return process.env.KANBAN_AGENTS_CONFIG || join(homedir(), ".kanban-code", "agents.yaml"); diff --git a/cli/src/slack/bridge.ts b/cli/src/slack/bridge.ts index 89395a79..d3a9d37b 100644 --- a/cli/src/slack/bridge.ts +++ b/cli/src/slack/bridge.ts @@ -1,12 +1,14 @@ import { existsSync, statSync, openSync, readSync, closeSync } from "node:fs"; +import { join } from "node:path"; import { SocketModeClient } from "@slack/socket-mode"; import { SlackClient } from "./client.js"; import { routeSlackMessage, ChannelMapping } from "./inbound.js"; -import { formatTranscriptLines } from "./format.js"; +import { formatTranscriptLines, formatCodexRolloutLines } from "./format.js"; import { loadAgentsConfig } from "../agents/config.js"; import { agentIdentity } from "../agents/identity.js"; +import { Runtime } from "../agents/runtime.js"; import { recordAnnounceSuppress } from "./announce-suppress.js"; -import { findSessionJsonl, pasteTmuxPrompt } from "../data.js"; +import { findSessionJsonl, findCodexRollout, pasteTmuxPrompt } from "../data.js"; export interface BridgeOptions { botToken: string; @@ -18,7 +20,10 @@ export interface BridgeOptions { interface TailState { slug: string; + runtime: Runtime; sessionId: string; + /// For Codex, the per-agent workspace used to locate its rollout by cwd. + cwd?: string; channelId: string; path?: string; offset: number; @@ -64,6 +69,24 @@ export async function runSlackBridge(opts: BridgeOptions): Promise { const mapping: ChannelMapping = {}; const tails: TailState[] = []; + + // Slack-human messages relayed into a Codex agent reappear in that agent's + // rollout as user_message events, which the poll loop would otherwise echo + // back to the channel as ">>> Received user message" (a duplicate of the + // human's own message). Claude suppresses this via the daemon's announce- + // suppress markers, but the Codex rollout mirror runs in this process, so we + // track recent relays here and drop the matching echo. Keyed by slug. + const recentRelays = new Map(); + const RELAY_ECHO_TTL_MS = 90_000; + const consumeRelayEcho = (slug: string, mirrored: string): boolean => { + const list = recentRelays.get(slug); + if (!list?.length) return false; + const now = Date.now(); + const i = list.findIndex((r) => now - r.ts <= RELAY_ECHO_TTL_MS && mirrored.includes(r.text.trim())); + if (i < 0) return false; + list.splice(i, 1); // consume so a genuine resend later is not swallowed + return true; + }; for (const a of agents) { const channelId = await client.resolveChannelId(a.slackChannel!); if (!channelId) { @@ -71,24 +94,40 @@ export async function runSlackBridge(opts: BridgeOptions): Promise { continue; } mapping[channelId] = a.slug; - const sessionId = agentIdentity(a.slug).sessionId; - const path = findSessionJsonl(sessionId); + const runtime = (a.runtime ?? "claude") as Runtime; + const sessionId = agentIdentity(a.slug, runtime).sessionId; + const cwd = join(file.workspacesDir, a.slug); + const path = runtime === "codex" ? findCodexRollout(cwd) : findSessionJsonl(sessionId); // Start at EOF so we mirror only new activity, not the whole backlog. - tails.push({ slug: a.slug, sessionId, channelId, path, offset: path ? statSync(path).size : 0 }); + tails.push({ slug: a.slug, runtime, sessionId, cwd, channelId, path, offset: path ? statSync(path).size : 0 }); } // agent -> slack setInterval(async () => { for (const t of tails) { if (!t.path) { - t.path = findSessionJsonl(t.sessionId); + t.path = t.runtime === "codex" ? findCodexRollout(t.cwd!) : findSessionJsonl(t.sessionId); if (t.path) t.offset = statSync(t.path).size; // skip backlog on first discovery continue; } + // Codex writes a fresh rollout file per session, so a restart (or its own + // auto-compaction) rotates the file. Follow the newest one from its start + // so a relaunched agent keeps mirroring without restarting the bridge. + if (t.runtime === "codex") { + const latest = findCodexRollout(t.cwd!); + if (latest && latest !== t.path) { + t.path = latest; + t.offset = 0; + } + } if (!existsSync(t.path)) continue; const { objs, newOffset } = readAppendedLines(t.path, t.offset); t.offset = newOffset; - for (const post of formatTranscriptLines(objs)) { + const posts = t.runtime === "codex" ? formatCodexRolloutLines(objs) : formatTranscriptLines(objs); + for (const post of posts) { + // Don't echo a prompt we just relayed from a Slack human (it's already + // in the channel as their message). + if (t.runtime === "codex" && post.role === "user" && consumeRelayEcho(t.slug, post.text)) continue; try { await client.post(t.channelId, post.text); } catch (e) { @@ -108,6 +147,10 @@ export async function runSlackBridge(opts: BridgeOptions): Promise { // (it already appears there as that person's Slack message). Recorded // before the paste so the marker is in place before UserPromptSubmit. recordAnnounceSuppress(agentIdentity(decision.slug).sessionId); + // Also remember it for the in-process Codex rollout-echo guard above. + const relays = recentRelays.get(decision.slug) ?? []; + relays.push({ text: decision.text, ts: Date.now() }); + recentRelays.set(decision.slug, relays); pasteTmuxPrompt(decision.slug, decision.text); // tmux session name == slug } }); diff --git a/cli/src/slack/format.ts b/cli/src/slack/format.ts index 974c0694..fc77bc5e 100644 --- a/cli/src/slack/format.ts +++ b/cli/src/slack/format.ts @@ -14,6 +14,17 @@ function truncate(s: string, max = MAX_TEXT): string { return t.length > max ? t.slice(0, max) + "…" : t; } +/// Header shown above each mirrored prompt. It marks the text as the input the +/// agent received (not the agent's own reply), so a reader can tell the two +/// apart in the channel. +export const RECEIVED_MESSAGE_HEADER = ">>> Received user message"; + +/// Format an injected prompt for the channel: the header, then the body in +/// italics (Slack mrkdwn uses _underscores_ for italic; * and ** do not work). +export function formatReceivedMessage(text: string): string { + return `${RECEIVED_MESSAGE_HEADER}\n\n_${text}_`; +} + /// Keep the last 3 path components (mirrors TranscriptReader.shortenPath). export function shortenPath(path: string): string { const parts = path.split("/").filter(Boolean); @@ -127,3 +138,29 @@ export function formatTranscriptLines(objs: any[]): SlackPost[] { flush(); return posts; } + +/// Format Codex rollout (.jsonl) records into Slack posts. Codex logs a stream +/// of records; we mirror the prompts the agent receives (user_message), the +/// agent's own messages (its "movement"), and the commands it runs, skipping +/// reasoning/system noise so the channel reads like the Claude mirror. Codex +/// gates command hooks behind a trust prompt, so this rollout tail is also how +/// a received prompt is announced (the Claude path uses the UserPromptSubmit +/// hook). Event shapes: event_msg{payload:{type:"user_message",message}} for an +/// injected prompt, {type:"agent_message",message} for assistant text, and +/// {type:"exec_command_begin",command} for shell runs. +export function formatCodexRolloutLines(objs: any[]): SlackPost[] { + const posts: SlackPost[] = []; + for (const o of objs) { + if (o?.type !== "event_msg") continue; + const p = o.payload ?? {}; + if (p.type === "user_message" && typeof p.message === "string" && p.message.trim()) { + posts.push({ role: "user", text: formatReceivedMessage(truncate(p.message)) }); + } else if (p.type === "agent_message" && typeof p.message === "string" && p.message.trim()) { + posts.push({ role: "assistant", text: truncate(p.message) }); + } else if (p.type === "exec_command_begin") { + const cmd = Array.isArray(p.command) ? p.command.join(" ") : String(p.command ?? ""); + if (cmd.trim()) posts.push({ role: "assistant", text: fenceBlock(`$ ${truncate(cmd, 300)}`) }); + } + } + return posts; +} diff --git a/specs/system/headless-runtime.feature b/specs/system/headless-runtime.feature index 18de5a79..46742cac 100644 --- a/specs/system/headless-runtime.feature +++ b/specs/system/headless-runtime.feature @@ -43,3 +43,14 @@ Feature: Headless runtime engine (no macOS app) When the box finishes booting Then reconcile-on-boot resumes every configured agent with "--resume " And the daemon is restarted by the service manager + + Scenario: An agent can be driven by the Codex runtime instead of Claude + Given an agent declares "runtime: codex" in the agents config + When the reconciler launches it + Then it runs "codex --no-alt-screen --dangerously-bypass-approvals-and-sandbox --dangerously-bypass-hook-trust" in tmux (no Claude --session-id/--resume) + And the daemon's context-threshold self-compaction is skipped for it (Codex auto-compacts and exposes no context usage) + And sending and Slack-inbound steering work unchanged because they paste into the tmux session by slug + And its whole conversation is mirrored to Slack automatically by tailing its Codex rollout transcript (located by the agent's workspace cwd): received prompts as ">>> Received user message", the agent's own messages, and the commands it runs, just like the Claude hook mirror, since Codex 0.134.0 gates command hooks behind a trust prompt that --dangerously-bypass-hook-trust does not suppress in the inline TUI + And the agent is never instructed to post to Slack itself; it communicates normally and the bridge does the mirroring + And a prompt relayed from a Slack human is not echoed back to the channel (it is already there as that person's message) + And when Codex rotates its rollout file (a relaunched session or its own auto-compaction) the bridge follows the newest rollout so mirroring continues without a bridge restart