fix: preserve thinking block signatures and fix compaction headroom asymmetry

gnadaban · gnadaban · commit a49278b310a1 · 2026-03-16T09:38:20.000+01:00
Two compounding bugs caused sessions to crash with 'thinking blocks cannot
be modified' when compaction fired for models with extended thinking:

1. toModelMessages() stripped providerMetadata (including cryptographic
   signatures) from message parts when the current model differed from the
   original. Anthropic's API requires signatures to be byte-identical.
   Fix: always pass providerMetadata through — the API handles filtering.

2. isOverflow() used an asymmetric buffer when limit.input was set
   (capped at 20K via COMPACTION_BUFFER) vs the full maxOutputTokens on
   the non-input path. This caused compaction to trigger too late.
   Fix: use maxOutputTokens (capped at 32K) for both paths. Also fixed
   the non-input path to respect config.compaction.reserved.
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
@@ -28,8 +28,6 @@ export namespace SessionCompaction {
     ),
   }
 
-  const COMPACTION_BUFFER = 20_000
-
   export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
     const config = await Config.get()
     if (config.compaction?.auto === false) return false
@@ -40,11 +38,12 @@ export namespace SessionCompaction {
       input.tokens.total ||
       input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
 
-    const reserved =
-      config.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
-    const usable = input.model.limit.input
-      ? input.model.limit.input - reserved
-      : context - ProviderTransform.maxOutputTokens(input.model)
+    // Reserve headroom so compaction triggers before the next turn overflows.
+    // maxOutputTokens() is capped at 32K (OUTPUT_TOKEN_MAX) regardless of the
+    // model's raw output limit, so this is never excessively aggressive.
+    // Users can override via config.compaction.reserved if needed (#12924).
+    const reserved = config.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model)
+    const usable = input.model.limit.input ? input.model.limit.input - reserved : context - reserved
     return count >= usable
   }
 
diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts
@@ -666,7 +666,6 @@ export namespace MessageV2 {
       }
 
       if (msg.info.role === "assistant") {
-        const differentModel = `${model.providerID}/${model.id}` !== `${msg.info.providerID}/${msg.info.modelID}`
         const media: Array<{ mime: string; url: string }> = []
 
         if (
@@ -688,7 +687,7 @@ export namespace MessageV2 {
             assistantMessage.parts.push({
               type: "text",
               text: part.text,
-              ...(differentModel ? {} : { providerMetadata: part.metadata }),
+              providerMetadata: part.metadata,
             })
           if (part.type === "step-start")
             assistantMessage.parts.push({
@@ -723,7 +722,7 @@ export namespace MessageV2 {
                 toolCallId: part.callID,
                 input: part.state.input,
                 output,
-                ...(differentModel ? {} : { callProviderMetadata: part.metadata }),
+                callProviderMetadata: part.metadata,
               })
             }
             if (part.state.status === "error")
@@ -733,7 +732,7 @@ export namespace MessageV2 {
                 toolCallId: part.callID,
                 input: part.state.input,
                 errorText: part.state.error,
-                ...(differentModel ? {} : { callProviderMetadata: part.metadata }),
+                callProviderMetadata: part.metadata,
               })
             // Handle pending/running tool calls to prevent dangling tool_use blocks
             // Anthropic/Claude APIs require every tool_use to have a corresponding tool_result
@@ -744,14 +743,14 @@ export namespace MessageV2 {
                 toolCallId: part.callID,
                 input: part.state.input,
                 errorText: "[Tool execution was interrupted]",
-                ...(differentModel ? {} : { callProviderMetadata: part.metadata }),
+                callProviderMetadata: part.metadata,
               })
           }
           if (part.type === "reasoning") {
             assistantMessage.parts.push({
               type: "reasoning",
               text: part.text,
-              ...(differentModel ? {} : { providerMetadata: part.metadata }),
+              providerMetadata: part.metadata,
             })
           }
         }
diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
@@ -113,19 +113,19 @@ describe("session.compaction.isOverflow", () => {
     })
   })
 
-  // ─── Bug reproduction tests ───────────────────────────────────────────
-  // These tests demonstrate that when limit.input is set, isOverflow()
-  // does not subtract any headroom for the next model response. This means
-  // compaction only triggers AFTER we've already consumed the full input
-  // budget, leaving zero room for the next API call's output tokens.
+  // ─── Headroom reservation tests ──────────────────────────────────────
+  // These tests verify that when limit.input is set, isOverflow()
+  // correctly reserves headroom (maxOutputTokens, capped at 32K) so
+  // compaction triggers before the next API call overflows.
   //
-  // Compare: without limit.input, usable = context - output (reserves space).
-  // With limit.input, usable = limit.input (reserves nothing).
+  // Previously (bug), the limit.input path only subtracted a 20K buffer
+  // while the non-input path subtracted the full maxOutputTokens — an
+  // asymmetry that let sessions grow ~12K tokens too large before compacting.
   //
   // Related issues: #10634, #8089, #11086, #12621
   // Open PRs: #6875, #12924
 
-  test("BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not", async () => {
+  test("no headroom when limit.input is set — compaction should trigger near boundary", async () => {
     await using tmp = await tmpdir()
     await Instance.provide({
       directory: tmp.path,
@@ -151,7 +151,7 @@ describe("session.compaction.isOverflow", () => {
     })
   })
 
-  test("BUG: without limit.input, same token count correctly triggers compaction", async () => {
+  test("without limit.input, same token count correctly triggers compaction", async () => {
     await using tmp = await tmpdir()
     await Instance.provide({
       directory: tmp.path,
@@ -171,7 +171,7 @@ describe("session.compaction.isOverflow", () => {
     })
   })
 
-  test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => {
+  test("asymmetry — limit.input model does not allow more usage than equivalent model without it", async () => {
     await using tmp = await tmpdir()
     await Instance.provide({
       directory: tmp.path,
@@ -180,7 +180,7 @@ describe("session.compaction.isOverflow", () => {
         const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
         const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
 
-        // 170K total tokens — well above context-output (168K) but below input limit (200K)
+        // 181K total tokens — above usable (context - maxOutput = 168K)
         const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
 
         const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit })
diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts
@@ -56,6 +56,17 @@ const model: Provider.Model = {
   release_date: "2026-01-01",
 }
 
+const model2: Provider.Model = {
+  ...model,
+  id: "other-model",
+  providerID: "other",
+  api: {
+    ...model.api,
+    id: "other-model",
+  },
+  name: "Other Model",
+}
+
 function userInfo(id: string): MessageV2.User {
   return {
     id,
@@ -358,7 +369,90 @@ describe("session.message-v2.toModelMessage", () => {
     ])
   })
 
-  test("omits provider metadata when assistant model differs", () => {
+  test("preserves reasoning providerMetadata when model matches", () => {
+    const assistantID = "m-assistant"
+
+    const input: MessageV2.WithParts[] = [
+      {
+        info: assistantInfo(assistantID, "m-parent"),
+        parts: [
+          {
+            ...basePart(assistantID, "a1"),
+            type: "reasoning",
+            text: "thinking",
+            metadata: { openai: { signature: "sig-match" } },
+            time: { start: 0 },
+          },
+        ] as MessageV2.Part[],
+      },
+    ]
+
+    expect(MessageV2.toModelMessages(input, model)).toStrictEqual([
+      {
+        role: "assistant",
+        content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-match" } } }],
+      },
+    ])
+  })
+
+  test("preserves reasoning providerMetadata when model differs", () => {
+    const assistantID = "m-assistant"
+
+    const input: MessageV2.WithParts[] = [
+      {
+        info: assistantInfo(assistantID, "m-parent", undefined, {
+          providerID: model2.providerID,
+          modelID: model2.api.id,
+        }),
+        parts: [
+          {
+            ...basePart(assistantID, "a1"),
+            type: "reasoning",
+            text: "thinking",
+            metadata: { openai: { signature: "sig-different" } },
+            time: { start: 0 },
+          },
+        ] as MessageV2.Part[],
+      },
+    ]
+
+    expect(MessageV2.toModelMessages(input, model)).toStrictEqual([
+      {
+        role: "assistant",
+        content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-different" } } }],
+      },
+    ])
+  })
+
+  test("preserves text providerMetadata when model differs", () => {
+    const assistantID = "m-assistant"
+
+    const input: MessageV2.WithParts[] = [
+      {
+        info: assistantInfo(assistantID, "m-parent", undefined, {
+          providerID: model2.providerID,
+          modelID: model2.api.id,
+        }),
+        parts: [
+          {
+            ...basePart(assistantID, "a1"),
+            type: "text",
+            text: "done",
+            metadata: { openai: { assistant: "meta" } },
+          },
+        ] as MessageV2.Part[],
+      },
+    ]
+
+    expect(MessageV2.toModelMessages(input, model)).toStrictEqual([
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "done", providerOptions: { openai: { assistant: "meta" } } }],
+      },
+    ])
+  })
+
+  test("preserves tool callProviderMetadata when model differs", () => {
     const userID = "m-user"
     const assistantID = "m-assistant"
 
@@ -374,16 +468,97 @@ describe("session.message-v2.toModelMessage", () => {
         ] as MessageV2.Part[],
       },
       {
-        info: assistantInfo(assistantID, userID, undefined, { providerID: "other", modelID: "other" }),
+        info: assistantInfo(assistantID, userID, undefined, {
+          providerID: model2.providerID,
+          modelID: model2.api.id,
+        }),
+        parts: [
+          {
+            ...basePart(assistantID, "a1"),
+            type: "tool",
+            callID: "call-1",
+            tool: "bash",
+            state: {
+              status: "completed",
+              input: { cmd: "ls" },
+              output: "ok",
+              title: "Bash",
+              metadata: {},
+              time: { start: 0, end: 1 },
+            },
+            metadata: { openai: { tool: "meta" } },
+          },
+        ] as MessageV2.Part[],
+      },
+    ]
+
+    expect(MessageV2.toModelMessages(input, model)).toStrictEqual([
+      {
+        role: "user",
+        content: [{ type: "text", text: "run tool" }],
+      },
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "tool-call",
+            toolCallId: "call-1",
+            toolName: "bash",
+            input: { cmd: "ls" },
+            providerExecuted: undefined,
+            providerOptions: { openai: { tool: "meta" } },
+          },
+        ],
+      },
+      {
+        role: "tool",
+        content: [
+          {
+            type: "tool-result",
+            toolCallId: "call-1",
+            toolName: "bash",
+            output: { type: "text", value: "ok" },
+            providerOptions: { openai: { tool: "meta" } },
+          },
+        ],
+      },
+    ])
+  })
+
+  test("handles undefined metadata gracefully", () => {
+    const userID = "m-user"
+    const assistantID = "m-assistant"
+
+    const input: MessageV2.WithParts[] = [
+      {
+        info: userInfo(userID),
+        parts: [
+          {
+            ...basePart(userID, "u1"),
+            type: "text",
+            text: "run tool",
+          },
+        ] as MessageV2.Part[],
+      },
+      {
+        info: assistantInfo(assistantID, userID, undefined, {
+          providerID: model2.providerID,
+          modelID: model2.api.id,
+        }),
         parts: [
           {
             ...basePart(assistantID, "a1"),
             type: "text",
             text: "done",
-            metadata: { openai: { assistant: "meta" } },
           },
           {
             ...basePart(assistantID, "a2"),
+            type: "reasoning",
+            text: "thinking",
+            time: { start: 0 },
+          },
+          {
+            ...basePart(assistantID, "a3"),
             type: "tool",
             callID: "call-1",
             tool: "bash",
@@ -395,7 +570,6 @@ describe("session.message-v2.toModelMessage", () => {
               metadata: {},
               time: { start: 0, end: 1 },
             },
-            metadata: { openai: { tool: "meta" } },
           },
         ] as MessageV2.Part[],
       },
@@ -410,6 +584,7 @@ describe("session.message-v2.toModelMessage", () => {
         role: "assistant",
         content: [
           { type: "text", text: "done" },
+          { type: "reasoning", text: "thinking", providerOptions: undefined },
           {
             type: "tool-call",
             toolCallId: "call-1",