diff --git a/packages/app/src/context/global-sync.tsx b/packages/app/src/context/global-sync.tsx index 0cf3570a8b3d..0709725c2c21 100644 --- a/packages/app/src/context/global-sync.tsx +++ b/packages/app/src/context/global-sync.tsx @@ -24,7 +24,7 @@ import { estimateRootSessionTotal, loadRootSessionsWithFallback } from "./global import { trimSessions } from "./global-sync/session-trim" import type { ProjectMeta } from "./global-sync/types" import { SESSION_RECENT_LIMIT } from "./global-sync/types" -import { sanitizeProject } from "./global-sync/utils" +import { cloneProject, sanitizeProject } from "./global-sync/utils" import { formatServerError } from "@/utils/server-errors" type GlobalStore = { @@ -97,8 +97,9 @@ function createGlobalSync() { cacheProjects() return } - setGlobalStore("project", next) - cacheProjects() + const list = next.map(cloneProject) + setGlobalStore("project", list) + setProjectCache("value", list.map(sanitizeProject)) } const setBootStore = ((...input: unknown[]) => { diff --git a/packages/app/src/context/global-sync/utils.test.ts b/packages/app/src/context/global-sync/utils.test.ts index 6d44ac9a8928..f845fe9b7882 100644 --- a/packages/app/src/context/global-sync/utils.test.ts +++ b/packages/app/src/context/global-sync/utils.test.ts @@ -1,6 +1,7 @@ import { describe, expect, test } from "bun:test" -import type { Agent } from "@opencode-ai/sdk/v2/client" -import { normalizeAgentList } from "./utils" +import type { Agent, Project } from "@opencode-ai/sdk/v2/client" +import { createStore } from "solid-js/store" +import { cloneProject, normalizeAgentList, sanitizeProject } from "./utils" const agent = (name = "build") => ({ @@ -33,3 +34,89 @@ describe("normalizeAgentList", () => { expect(normalizeAgentList([{ name: "build" }, agent("docs")])).toEqual([agent("docs")]) }) }) + +describe("sanitizeProject", () => { + test("cloneProject detaches nested project data without stripping icon fields", () => { + const [store] = createStore({ + value: { + id: "proj_clone", + worktree: "/tmp/project-clone", + icon: { + url: "https://example.com/icon.png", + override: "data:image/png;base64,abc", + color: "blue", + }, + commands: { + start: "bun dev", + }, + time: { + created: 1, + updated: 2, + }, + sandboxes: ["/tmp/project-a"], + } satisfies Project, + }) + + const next = cloneProject(store.value) + + expect(next).not.toBe(store.value) + expect(next.time).not.toBe(store.value.time) + expect(next.sandboxes).not.toBe(store.value.sandboxes) + expect(next.commands).not.toBe(store.value.commands) + expect(next.icon).not.toBe(store.value.icon) + expect(next.icon?.url).toBe("https://example.com/icon.png") + expect(next.icon?.override).toBe("data:image/png;base64,abc") + }) + + test("clones nested project data and strips cached icon urls", () => { + const [store] = createStore({ + value: { + id: "proj_1", + worktree: "/tmp/project", + name: "Project", + icon: { + url: "https://example.com/icon.png", + override: "data:image/png;base64,abc", + color: "pink", + }, + commands: { + start: "bun dev", + }, + time: { + created: 1, + updated: 2, + }, + sandboxes: ["/tmp/project-a"], + } satisfies Project, + }) + + const next = sanitizeProject(store.value) + + expect(next).not.toBe(store.value) + expect(next.time).not.toBe(store.value.time) + expect(next.sandboxes).not.toBe(store.value.sandboxes) + expect(next.commands).not.toBe(store.value.commands) + expect(next.icon).not.toBe(store.value.icon) + expect(next.icon?.url).toBeUndefined() + expect(next.icon?.override).toBeUndefined() + expect(next.icon?.color).toBe("pink") + + next.sandboxes.push("/tmp/project-b") + expect(store.value.sandboxes).toEqual(["/tmp/project-a"]) + }) + + test("returns a detached copy even without icon overrides", () => { + const project = { + id: "proj_2", + worktree: "/tmp/project-2", + time: { created: 1, updated: 1 }, + sandboxes: [], + } satisfies Project + + const next = sanitizeProject(project) + + expect(next).not.toBe(project) + expect(next.time).not.toBe(project.time) + expect(next.sandboxes).not.toBe(project.sandboxes) + }) +}) diff --git a/packages/app/src/context/global-sync/utils.ts b/packages/app/src/context/global-sync/utils.ts index cac58f3174e1..b07a1721fbf9 100644 --- a/packages/app/src/context/global-sync/utils.ts +++ b/packages/app/src/context/global-sync/utils.ts @@ -26,14 +26,29 @@ export function normalizeProviderList(input: ProviderListResponse): ProviderList } } -export function sanitizeProject(project: Project) { - if (!project.icon?.url && !project.icon?.override) return project +export function cloneProject(project: Project) { return { ...project, - icon: { - ...project.icon, - url: undefined, - override: undefined, - }, + time: { ...project.time }, + sandboxes: [...project.sandboxes], + ...(project.commands ? { commands: { ...project.commands } } : {}), + ...(project.icon ? { icon: { ...project.icon } } : {}), + } +} + +export function sanitizeProject(project: Project) { + const next = cloneProject(project) + if (!next.icon) return next + return { + ...next, + ...(project.icon + ? { + icon: { + ...next.icon, + url: undefined, + override: undefined, + }, + } + : {}), } } diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index bbdce9fd7472..d9ed628ef771 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -19,6 +19,7 @@ import { Effect, Layer, ServiceMap } from "effect" import { makeRuntime } from "@/effect/run-service" import { InstanceState } from "@/effect/instance-state" import { isOverflow as overflow } from "./overflow" +import { Evidence } from "./evidence" export namespace SessionCompaction { const log = Log.create({ service: "session.compaction" }) @@ -130,7 +131,10 @@ export namespace SessionCompaction { if (pruned > PRUNE_MINIMUM) { for (const part of toPrune) { if (part.state.status === "completed") { - part.state.time.compacted = Date.now() + const state = part.state + const evidence = Evidence.tool({ tool: part.tool, state }) + state.time.compacted = Date.now() + state.metadata = { ...state.metadata, evidence } yield* session.updatePart(part) } } diff --git a/packages/opencode/src/session/evidence.ts b/packages/opencode/src/session/evidence.ts new file mode 100644 index 000000000000..b3324d01fa11 --- /dev/null +++ b/packages/opencode/src/session/evidence.ts @@ -0,0 +1,90 @@ +import { Hash } from "@/util/hash" +import { Locale } from "@/util/locale" +import type { MessageV2 } from "./message-v2" + +export namespace Evidence { + const INPUT_MAX = 240 + const OUTPUT_MAX = 600 + const OUTPUT_LINES = 12 + const HASH_MAX = 12 + const FILE_MAX = 3 + + export interface Tool { + tool: string + title: string + input: string + excerpt: string + hash: string + bytes: number + lines: number + path?: string + files?: string[] + } + + function clip(input: string) { + return Locale.truncate(input.split("\n").slice(0, OUTPUT_LINES).join("\n"), OUTPUT_MAX) + } + + function files(input?: MessageV2.ToolStateCompleted["attachments"]) { + if (!input?.length) return undefined + const list = input.map((item) => item.filename ?? item.mime) + if (list.length <= FILE_MAX) return list + return [...list.slice(0, FILE_MAX), `+${list.length - FILE_MAX} more`] + } + + function path(input?: MessageV2.ToolStateCompleted["metadata"]) { + return typeof input?.outputPath === "string" ? input.outputPath : undefined + } + + export function tool(input: { + tool: string + state: Pick + }): Tool { + const data = JSON.stringify(input.state.input) + return { + tool: input.tool, + title: input.state.title, + input: Locale.truncate(data === undefined ? "{}" : data, INPUT_MAX), + excerpt: clip(input.state.output), + hash: Hash.fast(input.state.output).slice(0, HASH_MAX), + bytes: Buffer.byteLength(input.state.output, "utf-8"), + lines: input.state.output.split("\n").length, + path: path(input.state.metadata), + files: files(input.state.attachments), + } + } + + export function isTool(input: unknown): input is Tool { + if (!input || typeof input !== "object") return false + return ( + "tool" in input && + typeof input.tool === "string" && + "title" in input && + typeof input.title === "string" && + "input" in input && + typeof input.input === "string" && + "excerpt" in input && + typeof input.excerpt === "string" && + "hash" in input && + typeof input.hash === "string" && + "bytes" in input && + typeof input.bytes === "number" && + "lines" in input && + typeof input.lines === "number" + ) + } + + export function text(input: Tool) { + return [ + "[Compacted tool result]", + `tool: ${input.tool}`, + `title: ${input.title}`, + `input: ${input.input}`, + `proof: sha1=${input.hash}, bytes=${input.bytes}, lines=${input.lines}`, + ...(input.path ? [`path: ${input.path}`] : []), + ...(input.files?.length ? [`attachments: ${input.files.join(", ")}`] : []), + "excerpt:", + input.excerpt, + ].join("\n") + } +} diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index c9a62c8645e0..0048d5617fe1 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -35,6 +35,7 @@ export namespace LLM { tools: Record retries?: number toolChoice?: "auto" | "required" | "none" + opts?: Record } export type StreamRequest = StreamInput & { @@ -142,6 +143,7 @@ export namespace LLM { mergeDeep(input.model.options), mergeDeep(input.agent.options), mergeDeep(variant), + mergeDeep(input.opts ?? {}), ) if (isOpenaiOauth) { options.instructions = system.join("\n") @@ -255,7 +257,7 @@ export namespace LLM { } } - return streamText({ + const result = streamText({ onError(error) { l.error("stream error", { error, @@ -332,6 +334,8 @@ export namespace LLM { }, }, }) + + return result } function resolveTools(input: Pick) { diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index e8aab62d8423..d88c142a25aa 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -15,6 +15,7 @@ import type { SystemError } from "bun" import type { Provider } from "@/provider/provider" import { ModelID, ProviderID } from "@/provider/schema" import { Effect } from "effect" +import { Evidence } from "./evidence" /** Error shape thrown by Bun's fetch() when gzip/br decompression fails mid-stream */ interface FetchDecompressionError extends Error { @@ -255,6 +256,7 @@ export namespace MessageV2 { reason: z.string(), snapshot: z.string().optional(), cost: z.number(), + metadata: z.record(z.string(), z.any()).optional(), tokens: z.object({ total: z.number().optional(), input: z.number(), @@ -715,8 +717,12 @@ export namespace MessageV2 { if (part.type === "tool") { toolNames.add(part.tool) if (part.state.status === "completed") { - const outputText = part.state.time.compacted ? "[Old tool result content cleared]" : part.state.output - const attachments = part.state.time.compacted || options?.stripMedia ? [] : (part.state.attachments ?? []) + const state = part.state + const proof = Evidence.isTool(state.metadata?.evidence) + ? state.metadata.evidence + : Evidence.tool({ tool: part.tool, state }) + const outputText = state.time.compacted ? Evidence.text(proof) : state.output + const attachments = state.time.compacted || options?.stripMedia ? [] : (state.attachments ?? []) // For providers that don't support media in tool results, extract media files // (images, PDFs) to be sent as a separate user message @@ -739,7 +745,7 @@ export namespace MessageV2 { type: ("tool-" + part.tool) as `tool-${string}`, state: "output-available", toolCallId: part.callID, - input: part.state.input, + input: state.input, output, ...(differentModel ? {} : { callProviderMetadata: part.metadata }), }) diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 146c73f27712..b48ece780fd5 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -23,6 +23,12 @@ export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 const log = Log.create({ service: "session.processor" }) + function finishMetadata(input: Record | undefined) { + const id = typeof input?.openai?.responseId === "string" ? input.openai.responseId : undefined + if (!id) return + return { openai: { responseId: id } } + } + export type Result = "compact" | "stop" | "continue" export type Event = LLM.Event @@ -277,6 +283,7 @@ export namespace SessionProcessor { id: PartID.ascending(), reason: value.finishReason, snapshot: yield* snapshot.track(), + metadata: finishMetadata(value.providerMetadata), messageID: ctx.assistantMessage.id, sessionID: ctx.assistantMessage.sessionID, type: "step-finish", diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index c29733999214..6f8368e09a7f 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -65,6 +65,28 @@ const STRUCTURED_OUTPUT_SYSTEM_PROMPT = `IMPORTANT: The user has requested struc export namespace SessionPrompt { const log = Log.create({ service: "session.prompt" }) + function threaded(model: Provider.Model) { + return model.providerID === "openai" + } + + function chain(input: { model: Provider.Model; user: MessageV2.User; assistant?: MessageV2.WithParts }) { + const msg = input.assistant + if (!threaded(input.model) || !msg || msg.info.role !== "assistant") return + if ((input.model.options as { store?: boolean } | undefined)?.store !== true) return + if (`${input.model.providerID}/${input.model.id}` !== `${msg.info.providerID}/${msg.info.modelID}`) return + if (input.user.id > msg.info.id) return + if ( + !msg.parts.some( + (part) => part.type === "tool" && part.state.status !== "pending" && part.state.status !== "running", + ) + ) + return + const part = msg.parts.findLast((part): part is MessageV2.StepFinishPart => part.type === "step-finish") + const id = part?.metadata?.openai?.responseId + if (typeof id !== "string" || !id) return + return { id, msgs: [msg] } + } + export interface Interface { readonly assertNotBusy: (sessionID: SessionID) => Effect.Effect readonly cancel: (sessionID: SessionID) => Effect.Effect @@ -1342,6 +1364,23 @@ NOTE: At any point in time through this workflow you should feel free to ask the let structured: unknown | undefined let step = 0 const session = yield* sessions.get(sessionID) + const skills = new Map() + const env = new Map() + const systemPrompt = Effect.fnUntraced(function* (agent: Agent.Info, model: Provider.Model) { + const key = `${model.providerID}/${model.id}` + const skill = skills.has(agent.name) + ? skills.get(agent.name) + : yield* Effect.promise(() => SystemPrompt.skills(agent)).pipe( + Effect.tap((value) => Effect.sync(() => skills.set(agent.name, value))), + ) + const vars = env.has(key) + ? env.get(key)! + : yield* Effect.promise(() => SystemPrompt.environment(model)).pipe( + Effect.tap((value) => Effect.sync(() => env.set(key, value))), + ) + const instructions = yield* instruction.system().pipe(Effect.orDie) + return [...vars, ...(skill ? [skill] : []), ...instructions] + }) while (true) { yield* status.set(sessionID, { type: "busy" }) @@ -1500,15 +1539,16 @@ NOTE: At any point in time through this workflow you should feel free to ask the yield* plugin.trigger("experimental.chat.messages.transform", {}, { messages: msgs }) - const [skills, env, instructions, modelMsgs] = yield* Effect.all([ - Effect.promise(() => SystemPrompt.skills(agent)), - Effect.promise(() => SystemPrompt.environment(model)), - instruction.system().pipe(Effect.orDie), - Effect.promise(() => MessageV2.toModelMessages(msgs, model)), + const reuse = chain({ model, user: lastUser, assistant: lastAssistantMsg }) + const src = reuse ? reuse.msgs : msgs + + const [system, modelMsgs] = yield* Effect.all([ + systemPrompt(agent, model), + Effect.promise(() => MessageV2.toModelMessages(src, model)), ]) - const system = [...env, ...(skills ? [skills] : []), ...instructions] const format = lastUser.format ?? { type: "text" as const } if (format.type === "json_schema") system.push(STRUCTURED_OUTPUT_SYSTEM_PROMPT) + const send = reuse ? modelMsgs.filter((msg) => msg.role === "tool") : modelMsgs const result = yield* handle.process({ user: lastUser, agent, @@ -1516,10 +1556,11 @@ NOTE: At any point in time through this workflow you should feel free to ask the sessionID, parentSessionID: session.parentID, system, - messages: [...modelMsgs, ...(isLastStep ? [{ role: "assistant" as const, content: MAX_STEPS }] : [])], + messages: [...send, ...(isLastStep ? [{ role: "assistant" as const, content: MAX_STEPS }] : [])], tools, model, toolChoice: format.type === "json_schema" ? "required" : undefined, + opts: reuse ? { previousResponseId: reuse.id, store: true } : undefined, }) if (structured !== undefined) { diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 799bb3e2aeb1..848bd4eb8dc0 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -482,6 +482,13 @@ describe("session.compaction.prune", () => { expect(part?.state.status).toBe("completed") if (part?.type === "tool" && part.state.status === "completed") { expect(part.state.time.compacted).toBeNumber() + expect(part.state.metadata.evidence).toMatchObject({ + tool: "bash", + title: "done", + lines: 1, + }) + expect(part.state.metadata.evidence).toHaveProperty("hash") + expect(part.state.metadata.evidence).toHaveProperty("excerpt") } }, }) diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts index 3634d6fb7ec8..5d7613650811 100644 --- a/packages/opencode/test/session/message-v2.test.ts +++ b/packages/opencode/test/session/message-v2.test.ts @@ -434,7 +434,7 @@ describe("session.message-v2.toModelMessage", () => { ]) }) - test("replaces compacted tool output with placeholder", async () => { + test("replaces compacted tool output with an evidence digest", async () => { const userID = "m-user" const assistantID = "m-assistant" @@ -470,35 +470,100 @@ describe("session.message-v2.toModelMessage", () => { }, ] - expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([ - { - role: "user", - content: [{ type: "text", text: "run tool" }], - }, + const result = await MessageV2.toModelMessages(input, model) + + expect(result).toHaveLength(3) + expect(result[0]).toStrictEqual({ + role: "user", + content: [{ type: "text", text: "run tool" }], + }) + expect(result[1]).toStrictEqual({ + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call-1", + toolName: "bash", + input: { cmd: "ls" }, + providerExecuted: undefined, + }, + ], + }) + const tool = result[2] as { + role: string + content: Array<{ + type: string + toolCallId: string + toolName: string + output: { type: string; value: string } + }> + } + expect(tool).toMatchObject({ + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call-1", + toolName: "bash", + output: { + type: "text", + }, + }, + ], + }) + + const text = tool.content[0]!.output.value + expect(text).toContain("tool: bash") + expect(text).toContain('input: {"cmd":"ls"}') + expect(text).toContain("excerpt:\nthis should be cleared") + expect(text).not.toContain("[Old tool result content cleared]") + }) + + test("replaces compacted tool output when legacy evidence metadata is missing", async () => { + const userID = "m-user-legacy" + const assistantID = "m-assistant-legacy" + + const input = [ { - role: "assistant", - content: [ + info: userInfo(userID), + parts: [ { - type: "tool-call", - toolCallId: "call-1", - toolName: "bash", - input: { cmd: "ls" }, - providerExecuted: undefined, + ...basePart(userID, "u1"), + type: "text", + text: "run tool", }, - ], + ] as MessageV2.Part[], }, { - role: "tool", - content: [ + info: assistantInfo(assistantID, userID), + parts: [ { - type: "tool-result", - toolCallId: "call-1", - toolName: "bash", - output: { type: "text", value: "[Old tool result content cleared]" }, + ...basePart(assistantID, "a1"), + type: "tool", + callID: "call-legacy", + tool: "bash", + state: { + status: "completed", + input: { cmd: "pwd" }, + output: "legacy compacted output", + title: "Bash", + metadata: undefined, + time: { start: 0, end: 1, compacted: 1 }, + }, }, - ], + ] as unknown as MessageV2.Part[], }, - ]) + ] satisfies MessageV2.WithParts[] + + const result = await MessageV2.toModelMessages(input, model) + const tool = result[2] as { + content: Array<{ + output: { value: string } + }> + } + + expect(tool.content[0]?.output.value).toContain("tool: bash") + expect(tool.content[0]?.output.value).toContain("excerpt:\nlegacy compacted output") }) test("converts assistant tool error into error-text tool result", async () => { diff --git a/packages/opencode/test/session/prompt-effect.test.ts b/packages/opencode/test/session/prompt-effect.test.ts index 17689cf274ec..6efe554749d9 100644 --- a/packages/opencode/test/session/prompt-effect.test.ts +++ b/packages/opencode/test/session/prompt-effect.test.ts @@ -27,6 +27,7 @@ import { SessionProcessor } from "../../src/session/processor" import { SessionPrompt } from "../../src/session/prompt" import { MessageID, PartID, SessionID } from "../../src/session/schema" import { SessionStatus } from "../../src/session/status" +import { SystemPrompt } from "../../src/session/system" import { Shell } from "../../src/shell/shell" import { Snapshot } from "../../src/snapshot" import { TaskTool } from "../../src/tool/task" @@ -235,6 +236,43 @@ function providerCfg(url: string) { } } +function openaiCfg(url: string, store = true) { + return { + enabled_providers: ["openai"], + agent: { + build: { + model: "openai/gpt-5.2", + }, + }, + provider: { + openai: { + name: "OpenAI", + env: ["OPENAI_API_KEY"], + npm: "@ai-sdk/openai", + api: "https://api.openai.com/v1", + models: { + "gpt-5.2": { + id: "gpt-5.2", + name: "GPT 5.2", + attachment: false, + reasoning: true, + temperature: false, + tool_call: true, + release_date: "2026-01-01", + limit: { context: 200000, output: 32000 }, + cost: { input: 0, output: 0, cache: { read: 0, write: 0 } }, + options: store ? { store: true } : {}, + }, + }, + options: { + apiKey: "test-key", + baseURL: url, + }, + }, + }, + } +} + const user = Effect.fn("test.user")(function* (sessionID: SessionID, text: string) { const session = yield* Session.Service const msg = yield* session.updateMessage({ @@ -460,6 +498,77 @@ it.live("loop continues when finish is tool-calls", () => ), ) +it.live("openai tool continuation threads the previous response", () => + provideTmpdirServer( + Effect.fnUntraced(function* ({ llm }) { + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + const session = yield* sessions.create({ + title: "Pinned", + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + + yield* prompt.prompt({ + sessionID: session.id, + agent: "build", + noReply: true, + parts: [{ type: "text", text: "hello" }], + }) + yield* llm.tool("bash", { command: "pwd" }) + yield* llm.text("done") + + const result = yield* prompt.loop({ sessionID: session.id }) + expect(result.info.role).toBe("assistant") + expect(yield* llm.calls).toBe(2) + + const input = yield* llm.inputs + expect(input).toHaveLength(2) + expect(input[0]?.previous_response_id).toBeUndefined() + expect(input[0]?.store).toBe(true) + expect(input[1]?.previous_response_id).toBe("resp_test") + expect(input[1]?.store).toBe(true) + expect(Array.isArray(input[1]?.input)).toBe(true) + if (!Array.isArray(input[1]?.input)) return + expect(input[1].input.some((item) => item?.type === "function_call_output")).toBe(true) + expect(input[1].input.some((item) => item?.type === "function_call")).toBe(false) + expect(input[1].input.some((item) => item?.role === "user")).toBe(false) + }), + { git: true, config: (url) => openaiCfg(url, true) }, + ), +) + +it.live("openai tool continuation skips threading when response storage is disabled", () => + provideTmpdirServer( + Effect.fnUntraced(function* ({ llm }) { + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + const session = yield* sessions.create({ + title: "Pinned", + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + + yield* prompt.prompt({ + sessionID: session.id, + agent: "build", + noReply: true, + parts: [{ type: "text", text: "hello" }], + }) + yield* llm.tool("bash", { command: "pwd" }) + yield* llm.text("done") + + const result = yield* prompt.loop({ sessionID: session.id }) + expect(result.info.role).toBe("assistant") + expect(yield* llm.calls).toBe(2) + + const input = yield* llm.inputs + expect(input).toHaveLength(2) + expect(input[0]?.store).toBe(false) + expect(input[1]?.previous_response_id).toBeUndefined() + }), + { git: true, config: (url) => openaiCfg(url, false) }, + ), +) + it.live("loop continues when finish is stop but assistant has tool parts", () => provideTmpdirServer( Effect.fnUntraced(function* ({ llm }) { @@ -490,6 +599,81 @@ it.live("loop continues when finish is stop but assistant has tool parts", () => ), ) +it.live("loop reloads system instructions on the next iteration", () => + provideTmpdirServer( + Effect.fnUntraced(function* ({ dir, llm }) { + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + const root = path.join(dir, "AGENTS.md") + yield* Effect.promise(() => Bun.write(root, "# First Instructions")) + + const session = yield* sessions.create({ + title: "Pinned", + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + yield* prompt.prompt({ + sessionID: session.id, + agent: "build", + noReply: true, + parts: [{ type: "text", text: "hello" }], + }) + yield* llm.tool("bash", { command: "sleep 0.2; pwd" }) + yield* llm.text("done") + + const run = yield* prompt.loop({ sessionID: session.id }).pipe(Effect.forkChild) + yield* llm.wait(1) + yield* Effect.promise(() => Bun.write(root, "# Second Instructions")) + + const exit = yield* Fiber.await(run) + expect(Exit.isSuccess(exit)).toBe(true) + if (Exit.isFailure(exit)) return + + const inputs = yield* llm.inputs + expect(inputs).toHaveLength(2) + expect(JSON.stringify(inputs[0]?.messages)).toContain("First Instructions") + expect(JSON.stringify(inputs[1]?.messages)).toContain("Second Instructions") + }), + { git: true, config: providerCfg }, + ), +) + +it.live("loop memoizes skills and environment across iterations", () => + provideTmpdirServer( + Effect.fnUntraced(function* ({ llm }) { + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + const skills = spyOn(SystemPrompt, "skills") + const env = spyOn(SystemPrompt, "environment") + yield* Effect.addFinalizer(() => + Effect.sync(() => { + skills.mockRestore() + env.mockRestore() + }), + ) + + const session = yield* sessions.create({ + title: "Pinned", + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + yield* prompt.prompt({ + sessionID: session.id, + agent: "build", + noReply: true, + parts: [{ type: "text", text: "hello" }], + }) + yield* llm.tool("bash", { command: "pwd" }) + yield* llm.text("done") + + const result = yield* prompt.loop({ sessionID: session.id }) + expect(result.info.role).toBe("assistant") + expect(yield* llm.calls).toBe(2) + expect(skills).toHaveBeenCalledTimes(1) + expect(env).toHaveBeenCalledTimes(1) + }), + { git: true, config: providerCfg }, + ), +) + it.live("failed subtask preserves metadata on error tool state", () => provideTmpdirServer( Effect.fnUntraced(function* ({ llm }) {