diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index bbdce9fd7472..d9ed628ef771 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -19,6 +19,7 @@ import { Effect, Layer, ServiceMap } from "effect" import { makeRuntime } from "@/effect/run-service" import { InstanceState } from "@/effect/instance-state" import { isOverflow as overflow } from "./overflow" +import { Evidence } from "./evidence" export namespace SessionCompaction { const log = Log.create({ service: "session.compaction" }) @@ -130,7 +131,10 @@ export namespace SessionCompaction { if (pruned > PRUNE_MINIMUM) { for (const part of toPrune) { if (part.state.status === "completed") { - part.state.time.compacted = Date.now() + const state = part.state + const evidence = Evidence.tool({ tool: part.tool, state }) + state.time.compacted = Date.now() + state.metadata = { ...state.metadata, evidence } yield* session.updatePart(part) } } diff --git a/packages/opencode/src/session/evidence.ts b/packages/opencode/src/session/evidence.ts new file mode 100644 index 000000000000..326e7a95e856 --- /dev/null +++ b/packages/opencode/src/session/evidence.ts @@ -0,0 +1,90 @@ +import { Hash } from "@/util/hash" +import { Locale } from "@/util/locale" +import type { MessageV2 } from "./message-v2" + +export namespace Evidence { + const INPUT_MAX = 240 + const OUTPUT_MAX = 600 + const OUTPUT_LINES = 12 + const HASH_MAX = 12 + const FILE_MAX = 3 + + export interface Tool { + tool: string + title: string + input: string + excerpt: string + hash: string + bytes: number + lines: number + path?: string + files?: string[] + } + + function clip(input: string) { + return Locale.truncate(input.split("\n").slice(0, OUTPUT_LINES).join("\n"), OUTPUT_MAX) + } + + function files(input?: MessageV2.ToolStateCompleted["attachments"]) { + if (!input?.length) return undefined + const list = input.map((item) => item.filename ?? item.mime) + if (list.length <= FILE_MAX) return list + return [...list.slice(0, FILE_MAX), `+${list.length - FILE_MAX} more`] + } + + function path(input: MessageV2.ToolStateCompleted["metadata"]) { + return typeof input.outputPath === "string" ? input.outputPath : undefined + } + + export function tool(input: { + tool: string + state: Pick + }): Tool { + const data = JSON.stringify(input.state.input) + return { + tool: input.tool, + title: input.state.title, + input: Locale.truncate(data === undefined ? "{}" : data, INPUT_MAX), + excerpt: clip(input.state.output), + hash: Hash.fast(input.state.output).slice(0, HASH_MAX), + bytes: Buffer.byteLength(input.state.output, "utf-8"), + lines: input.state.output.split("\n").length, + path: path(input.state.metadata), + files: files(input.state.attachments), + } + } + + export function isTool(input: unknown): input is Tool { + if (!input || typeof input !== "object") return false + return ( + "tool" in input && + typeof input.tool === "string" && + "title" in input && + typeof input.title === "string" && + "input" in input && + typeof input.input === "string" && + "excerpt" in input && + typeof input.excerpt === "string" && + "hash" in input && + typeof input.hash === "string" && + "bytes" in input && + typeof input.bytes === "number" && + "lines" in input && + typeof input.lines === "number" + ) + } + + export function text(input: Tool) { + return [ + "[Compacted tool result]", + `tool: ${input.tool}`, + `title: ${input.title}`, + `input: ${input.input}`, + `proof: sha1=${input.hash}, bytes=${input.bytes}, lines=${input.lines}`, + ...(input.path ? [`path: ${input.path}`] : []), + ...(input.files?.length ? [`attachments: ${input.files.join(", ")}`] : []), + "excerpt:", + input.excerpt, + ].join("\n") + } +} diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index c9a62c8645e0..0048d5617fe1 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -35,6 +35,7 @@ export namespace LLM { tools: Record retries?: number toolChoice?: "auto" | "required" | "none" + opts?: Record } export type StreamRequest = StreamInput & { @@ -142,6 +143,7 @@ export namespace LLM { mergeDeep(input.model.options), mergeDeep(input.agent.options), mergeDeep(variant), + mergeDeep(input.opts ?? {}), ) if (isOpenaiOauth) { options.instructions = system.join("\n") @@ -255,7 +257,7 @@ export namespace LLM { } } - return streamText({ + const result = streamText({ onError(error) { l.error("stream error", { error, @@ -332,6 +334,8 @@ export namespace LLM { }, }, }) + + return result } function resolveTools(input: Pick) { diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index e8aab62d8423..328ff7782438 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -15,6 +15,7 @@ import type { SystemError } from "bun" import type { Provider } from "@/provider/provider" import { ModelID, ProviderID } from "@/provider/schema" import { Effect } from "effect" +import { Evidence } from "./evidence" /** Error shape thrown by Bun's fetch() when gzip/br decompression fails mid-stream */ interface FetchDecompressionError extends Error { @@ -255,6 +256,7 @@ export namespace MessageV2 { reason: z.string(), snapshot: z.string().optional(), cost: z.number(), + metadata: z.record(z.string(), z.any()).optional(), tokens: z.object({ total: z.number().optional(), input: z.number(), @@ -715,8 +717,15 @@ export namespace MessageV2 { if (part.type === "tool") { toolNames.add(part.tool) if (part.state.status === "completed") { - const outputText = part.state.time.compacted ? "[Old tool result content cleared]" : part.state.output - const attachments = part.state.time.compacted || options?.stripMedia ? [] : (part.state.attachments ?? []) + const state = part.state + const outputText = state.time.compacted + ? Evidence.text( + Evidence.isTool(state.metadata.evidence) + ? state.metadata.evidence + : Evidence.tool({ tool: part.tool, state }), + ) + : state.output + const attachments = state.time.compacted || options?.stripMedia ? [] : (state.attachments ?? []) // For providers that don't support media in tool results, extract media files // (images, PDFs) to be sent as a separate user message @@ -739,7 +748,7 @@ export namespace MessageV2 { type: ("tool-" + part.tool) as `tool-${string}`, state: "output-available", toolCallId: part.callID, - input: part.state.input, + input: state.input, output, ...(differentModel ? {} : { callProviderMetadata: part.metadata }), }) diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 146c73f27712..bcdbf1caafa7 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -277,6 +277,7 @@ export namespace SessionProcessor { id: PartID.ascending(), reason: value.finishReason, snapshot: yield* snapshot.track(), + metadata: value.providerMetadata, messageID: ctx.assistantMessage.id, sessionID: ctx.assistantMessage.sessionID, type: "step-finish", diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index b91dfded5e6b..abf8ee47285c 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -67,6 +67,28 @@ const STRUCTURED_OUTPUT_SYSTEM_PROMPT = `IMPORTANT: The user has requested struc export namespace SessionPrompt { const log = Log.create({ service: "session.prompt" }) + function threaded(model: Provider.Model) { + return model.providerID === "openai" + } + + function chain(input: { model: Provider.Model; user: MessageV2.User; assistant?: MessageV2.WithParts }) { + const msg = input.assistant + if (!threaded(input.model) || !msg || msg.info.role !== "assistant") return + if ((input.model.options as { store?: boolean } | undefined)?.store !== true) return + if (`${input.model.providerID}/${input.model.id}` !== `${msg.info.providerID}/${msg.info.modelID}`) return + if (input.user.id > msg.info.id) return + if ( + !msg.parts.some( + (part) => part.type === "tool" && part.state.status !== "pending" && part.state.status !== "running", + ) + ) + return + const part = msg.parts.findLast((part): part is MessageV2.StepFinishPart => part.type === "step-finish") + const id = part?.metadata?.openai?.responseId + if (typeof id !== "string" || !id) return + return { id, msgs: [msg] } + } + export interface Interface { readonly assertNotBusy: (sessionID: SessionID) => Effect.Effect readonly cancel: (sessionID: SessionID) => Effect.Effect @@ -1343,6 +1365,23 @@ NOTE: At any point in time through this workflow you should feel free to ask the let structured: unknown | undefined let step = 0 const session = yield* sessions.get(sessionID) + const skills = new Map() + const env = new Map() + const systemPrompt = Effect.fnUntraced(function* (agent: Agent.Info, model: Provider.Model) { + const key = `${model.providerID}/${model.id}` + const skill = skills.has(agent.name) + ? skills.get(agent.name) + : yield* Effect.promise(() => SystemPrompt.skills(agent)).pipe( + Effect.tap((value) => Effect.sync(() => skills.set(agent.name, value))), + ) + const vars = env.has(key) + ? env.get(key)! + : yield* Effect.promise(() => SystemPrompt.environment(model)).pipe( + Effect.tap((value) => Effect.sync(() => env.set(key, value))), + ) + const instructions = yield* instruction.system().pipe(Effect.orDie) + return [...vars, ...(skill ? [skill] : []), ...instructions] + }) while (true) { yield* status.set(sessionID, { type: "busy" }) @@ -1501,15 +1540,16 @@ NOTE: At any point in time through this workflow you should feel free to ask the yield* plugin.trigger("experimental.chat.messages.transform", {}, { messages: msgs }) - const [skills, env, instructions, modelMsgs] = yield* Effect.all([ - Effect.promise(() => SystemPrompt.skills(agent)), - Effect.promise(() => SystemPrompt.environment(model)), - instruction.system().pipe(Effect.orDie), - Effect.promise(() => MessageV2.toModelMessages(msgs, model)), + const reuse = chain({ model, user: lastUser, assistant: lastAssistantMsg }) + const src = reuse ? reuse.msgs : msgs + + const [system, modelMsgs] = yield* Effect.all([ + systemPrompt(agent, model), + Effect.promise(() => MessageV2.toModelMessages(src, model)), ]) - const system = [...env, ...(skills ? [skills] : []), ...instructions] const format = lastUser.format ?? { type: "text" as const } if (format.type === "json_schema") system.push(STRUCTURED_OUTPUT_SYSTEM_PROMPT) + const send = reuse ? modelMsgs.filter((msg) => msg.role === "tool") : modelMsgs const result = yield* handle.process({ user: lastUser, agent, @@ -1517,10 +1557,11 @@ NOTE: At any point in time through this workflow you should feel free to ask the sessionID, parentSessionID: session.parentID, system, - messages: [...modelMsgs, ...(isLastStep ? [{ role: "assistant" as const, content: MAX_STEPS }] : [])], + messages: [...send, ...(isLastStep ? [{ role: "assistant" as const, content: MAX_STEPS }] : [])], tools, model, toolChoice: format.type === "json_schema" ? "required" : undefined, + opts: reuse ? { previousResponseId: reuse.id, store: true } : undefined, }) if (structured !== undefined) { diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 799bb3e2aeb1..848bd4eb8dc0 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -482,6 +482,13 @@ describe("session.compaction.prune", () => { expect(part?.state.status).toBe("completed") if (part?.type === "tool" && part.state.status === "completed") { expect(part.state.time.compacted).toBeNumber() + expect(part.state.metadata.evidence).toMatchObject({ + tool: "bash", + title: "done", + lines: 1, + }) + expect(part.state.metadata.evidence).toHaveProperty("hash") + expect(part.state.metadata.evidence).toHaveProperty("excerpt") } }, }) diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts index 3634d6fb7ec8..d19edd1cc659 100644 --- a/packages/opencode/test/session/message-v2.test.ts +++ b/packages/opencode/test/session/message-v2.test.ts @@ -434,7 +434,7 @@ describe("session.message-v2.toModelMessage", () => { ]) }) - test("replaces compacted tool output with placeholder", async () => { + test("replaces compacted tool output with an evidence digest", async () => { const userID = "m-user" const assistantID = "m-assistant" @@ -470,35 +470,53 @@ describe("session.message-v2.toModelMessage", () => { }, ] - expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([ - { - role: "user", - content: [{ type: "text", text: "run tool" }], - }, - { - role: "assistant", - content: [ - { - type: "tool-call", - toolCallId: "call-1", - toolName: "bash", - input: { cmd: "ls" }, - providerExecuted: undefined, - }, - ], - }, - { - role: "tool", - content: [ - { - type: "tool-result", - toolCallId: "call-1", - toolName: "bash", - output: { type: "text", value: "[Old tool result content cleared]" }, + const result = await MessageV2.toModelMessages(input, model) + + expect(result).toHaveLength(3) + expect(result[0]).toStrictEqual({ + role: "user", + content: [{ type: "text", text: "run tool" }], + }) + expect(result[1]).toStrictEqual({ + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call-1", + toolName: "bash", + input: { cmd: "ls" }, + providerExecuted: undefined, + }, + ], + }) + const tool = result[2] as { + role: string + content: Array<{ + type: string + toolCallId: string + toolName: string + output: { type: string; value: string } + }> + } + expect(tool).toMatchObject({ + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call-1", + toolName: "bash", + output: { + type: "text", }, - ], - }, - ]) + }, + ], + }) + + const text = tool.content[0]!.output.value + expect(text).toContain("tool: bash") + expect(text).toContain('input: {"cmd":"ls"}') + expect(text).toContain("excerpt:\nthis should be cleared") + expect(text).not.toContain("[Old tool result content cleared]") }) test("converts assistant tool error into error-text tool result", async () => { diff --git a/packages/opencode/test/session/prompt-effect.test.ts b/packages/opencode/test/session/prompt-effect.test.ts index 17689cf274ec..6efe554749d9 100644 --- a/packages/opencode/test/session/prompt-effect.test.ts +++ b/packages/opencode/test/session/prompt-effect.test.ts @@ -27,6 +27,7 @@ import { SessionProcessor } from "../../src/session/processor" import { SessionPrompt } from "../../src/session/prompt" import { MessageID, PartID, SessionID } from "../../src/session/schema" import { SessionStatus } from "../../src/session/status" +import { SystemPrompt } from "../../src/session/system" import { Shell } from "../../src/shell/shell" import { Snapshot } from "../../src/snapshot" import { TaskTool } from "../../src/tool/task" @@ -235,6 +236,43 @@ function providerCfg(url: string) { } } +function openaiCfg(url: string, store = true) { + return { + enabled_providers: ["openai"], + agent: { + build: { + model: "openai/gpt-5.2", + }, + }, + provider: { + openai: { + name: "OpenAI", + env: ["OPENAI_API_KEY"], + npm: "@ai-sdk/openai", + api: "https://api.openai.com/v1", + models: { + "gpt-5.2": { + id: "gpt-5.2", + name: "GPT 5.2", + attachment: false, + reasoning: true, + temperature: false, + tool_call: true, + release_date: "2026-01-01", + limit: { context: 200000, output: 32000 }, + cost: { input: 0, output: 0, cache: { read: 0, write: 0 } }, + options: store ? { store: true } : {}, + }, + }, + options: { + apiKey: "test-key", + baseURL: url, + }, + }, + }, + } +} + const user = Effect.fn("test.user")(function* (sessionID: SessionID, text: string) { const session = yield* Session.Service const msg = yield* session.updateMessage({ @@ -460,6 +498,77 @@ it.live("loop continues when finish is tool-calls", () => ), ) +it.live("openai tool continuation threads the previous response", () => + provideTmpdirServer( + Effect.fnUntraced(function* ({ llm }) { + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + const session = yield* sessions.create({ + title: "Pinned", + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + + yield* prompt.prompt({ + sessionID: session.id, + agent: "build", + noReply: true, + parts: [{ type: "text", text: "hello" }], + }) + yield* llm.tool("bash", { command: "pwd" }) + yield* llm.text("done") + + const result = yield* prompt.loop({ sessionID: session.id }) + expect(result.info.role).toBe("assistant") + expect(yield* llm.calls).toBe(2) + + const input = yield* llm.inputs + expect(input).toHaveLength(2) + expect(input[0]?.previous_response_id).toBeUndefined() + expect(input[0]?.store).toBe(true) + expect(input[1]?.previous_response_id).toBe("resp_test") + expect(input[1]?.store).toBe(true) + expect(Array.isArray(input[1]?.input)).toBe(true) + if (!Array.isArray(input[1]?.input)) return + expect(input[1].input.some((item) => item?.type === "function_call_output")).toBe(true) + expect(input[1].input.some((item) => item?.type === "function_call")).toBe(false) + expect(input[1].input.some((item) => item?.role === "user")).toBe(false) + }), + { git: true, config: (url) => openaiCfg(url, true) }, + ), +) + +it.live("openai tool continuation skips threading when response storage is disabled", () => + provideTmpdirServer( + Effect.fnUntraced(function* ({ llm }) { + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + const session = yield* sessions.create({ + title: "Pinned", + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + + yield* prompt.prompt({ + sessionID: session.id, + agent: "build", + noReply: true, + parts: [{ type: "text", text: "hello" }], + }) + yield* llm.tool("bash", { command: "pwd" }) + yield* llm.text("done") + + const result = yield* prompt.loop({ sessionID: session.id }) + expect(result.info.role).toBe("assistant") + expect(yield* llm.calls).toBe(2) + + const input = yield* llm.inputs + expect(input).toHaveLength(2) + expect(input[0]?.store).toBe(false) + expect(input[1]?.previous_response_id).toBeUndefined() + }), + { git: true, config: (url) => openaiCfg(url, false) }, + ), +) + it.live("loop continues when finish is stop but assistant has tool parts", () => provideTmpdirServer( Effect.fnUntraced(function* ({ llm }) { @@ -490,6 +599,81 @@ it.live("loop continues when finish is stop but assistant has tool parts", () => ), ) +it.live("loop reloads system instructions on the next iteration", () => + provideTmpdirServer( + Effect.fnUntraced(function* ({ dir, llm }) { + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + const root = path.join(dir, "AGENTS.md") + yield* Effect.promise(() => Bun.write(root, "# First Instructions")) + + const session = yield* sessions.create({ + title: "Pinned", + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + yield* prompt.prompt({ + sessionID: session.id, + agent: "build", + noReply: true, + parts: [{ type: "text", text: "hello" }], + }) + yield* llm.tool("bash", { command: "sleep 0.2; pwd" }) + yield* llm.text("done") + + const run = yield* prompt.loop({ sessionID: session.id }).pipe(Effect.forkChild) + yield* llm.wait(1) + yield* Effect.promise(() => Bun.write(root, "# Second Instructions")) + + const exit = yield* Fiber.await(run) + expect(Exit.isSuccess(exit)).toBe(true) + if (Exit.isFailure(exit)) return + + const inputs = yield* llm.inputs + expect(inputs).toHaveLength(2) + expect(JSON.stringify(inputs[0]?.messages)).toContain("First Instructions") + expect(JSON.stringify(inputs[1]?.messages)).toContain("Second Instructions") + }), + { git: true, config: providerCfg }, + ), +) + +it.live("loop memoizes skills and environment across iterations", () => + provideTmpdirServer( + Effect.fnUntraced(function* ({ llm }) { + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + const skills = spyOn(SystemPrompt, "skills") + const env = spyOn(SystemPrompt, "environment") + yield* Effect.addFinalizer(() => + Effect.sync(() => { + skills.mockRestore() + env.mockRestore() + }), + ) + + const session = yield* sessions.create({ + title: "Pinned", + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + yield* prompt.prompt({ + sessionID: session.id, + agent: "build", + noReply: true, + parts: [{ type: "text", text: "hello" }], + }) + yield* llm.tool("bash", { command: "pwd" }) + yield* llm.text("done") + + const result = yield* prompt.loop({ sessionID: session.id }) + expect(result.info.role).toBe("assistant") + expect(yield* llm.calls).toBe(2) + expect(skills).toHaveBeenCalledTimes(1) + expect(env).toHaveBeenCalledTimes(1) + }), + { git: true, config: providerCfg }, + ), +) + it.live("failed subtask preserves metadata on error tool state", () => provideTmpdirServer( Effect.fnUntraced(function* ({ llm }) {