fix: preserve thinking block signatures and fix compaction headroom asymmetry

gnadaban · gnadaban · commit bb23cf6ba1e1 · 2026-04-17T16:23:35.000+02:00
Two compounding bugs caused sessions to crash with 'thinking blocks cannot
be modified' when compaction fired for models with extended thinking:

1. toModelMessages() stripped providerMetadata (including cryptographic
   signatures) from message parts when the current model differed from the
   original. Anthropic's API requires signatures to be byte-identical.
   Fix: always pass providerMetadata through — the API handles filtering.

2. isOverflow() used an asymmetric buffer when limit.input was set
   (capped at 20K via COMPACTION_BUFFER) vs the full maxOutputTokens on
   the non-input path. This caused compaction to trigger too late.
   Fix: use maxOutputTokens (capped at 32K) for both paths. Also fixed
   the non-input path to respect config.compaction.reserved.
diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts
@@ -689,7 +689,6 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
     }
 
     if (msg.info.role === "assistant") {
-      const differentModel = `${model.providerID}/${model.id}` !== `${msg.info.providerID}/${msg.info.modelID}`
       const media: Array<{ mime: string; url: string }> = []
 
       if (
@@ -711,7 +710,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
           assistantMessage.parts.push({
             type: "text",
             text: part.text,
-            ...(differentModel ? {} : { providerMetadata: part.metadata }),
+            providerMetadata: part.metadata,
           })
         if (part.type === "step-start")
           assistantMessage.parts.push({
@@ -747,7 +746,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
               input: part.state.input,
               output,
               ...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
-              ...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
+              callProviderMetadata: providerMeta(part.metadata),
             })
           }
           if (part.state.status === "error") {
@@ -760,7 +759,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
                 input: part.state.input,
                 output,
                 ...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
-                ...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
+                callProviderMetadata: providerMeta(part.metadata),
               })
             } else {
               assistantMessage.parts.push({
@@ -770,7 +769,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
                 input: part.state.input,
                 errorText: part.state.error,
                 ...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
-                ...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
+                callProviderMetadata: providerMeta(part.metadata),
               })
             }
           }
@@ -784,14 +783,14 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
               input: part.state.input,
               errorText: "[Tool execution was interrupted]",
               ...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
-              ...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
+              callProviderMetadata: providerMeta(part.metadata),
             })
         }
         if (part.type === "reasoning") {
           assistantMessage.parts.push({
             type: "reasoning",
             text: part.text,
-            ...(differentModel ? {} : { providerMetadata: part.metadata }),
+            providerMetadata: part.metadata,
           })
         }
       }
diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts
@@ -3,8 +3,6 @@ import type { Provider } from "@/provider"
 import { ProviderTransform } from "@/provider"
 import type { MessageV2 } from "./message-v2"
 
-const COMPACTION_BUFFER = 20_000
-
 export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
   if (input.cfg.compaction?.auto === false) return false
   const context = input.model.limit.context
@@ -13,10 +11,11 @@ export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistan
   const count =
     input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
 
-  const reserved =
-    input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
-  const usable = input.model.limit.input
-    ? input.model.limit.input - reserved
-    : context - ProviderTransform.maxOutputTokens(input.model)
+  // Reserve headroom so compaction triggers before the next turn overflows.
+  // maxOutputTokens() is capped at 32K (OUTPUT_TOKEN_MAX) regardless of the
+  // model's raw output limit, so this is never excessively aggressive.
+  // Users can override via config.compaction.reserved if needed (#12924).
+  const reserved = input.cfg.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model)
+  const usable = input.model.limit.input ? input.model.limit.input - reserved : context - reserved
   return count >= usable
 }
diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
@@ -351,20 +351,20 @@ describe("session.compaction.isOverflow", () => {
     ),
   )
 
-  // ─── Bug reproduction tests ───────────────────────────────────────────
-  // These tests demonstrate that when limit.input is set, isOverflow()
-  // does not subtract any headroom for the next model response. This means
-  // compaction only triggers AFTER we've already consumed the full input
-  // budget, leaving zero room for the next API call's output tokens.
+  // ─── Headroom reservation tests ──────────────────────────────────────
+  // These tests verify that when limit.input is set, isOverflow()
+  // correctly reserves headroom (maxOutputTokens, capped at 32K) so
+  // compaction triggers before the next API call overflows.
   //
-  // Compare: without limit.input, usable = context - output (reserves space).
-  // With limit.input, usable = limit.input (reserves nothing).
+  // Previously (bug), the limit.input path only subtracted a 20K buffer
+  // while the non-input path subtracted the full maxOutputTokens — an
+  // asymmetry that let sessions grow ~12K tokens too large before compacting.
   //
   // Related issues: #10634, #8089, #11086, #12621
   // Open PRs: #6875, #12924
 
   it.live(
-    "BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not",
+    "no headroom when limit.input is set — compaction should trigger near boundary",
     provideTmpdirInstance(() =>
       Effect.gen(function* () {
         const compact = yield* SessionCompaction.Service
@@ -390,7 +390,7 @@ describe("session.compaction.isOverflow", () => {
   )
 
   it.live(
-    "BUG: without limit.input, same token count correctly triggers compaction",
+    "without limit.input, same token count correctly triggers compaction",
     provideTmpdirInstance(() =>
       Effect.gen(function* () {
         const compact = yield* SessionCompaction.Service
@@ -410,15 +410,15 @@ describe("session.compaction.isOverflow", () => {
   )
 
   it.live(
-    "BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it",
+    "asymmetry — limit.input model does not allow more usage than equivalent model without it",
     provideTmpdirInstance(() =>
       Effect.gen(function* () {
         const compact = yield* SessionCompaction.Service
         // Two models with identical context/output limits, differing only in limit.input
         const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
         const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
 
-        // 170K total tokens — well above context-output (168K) but below input limit (200K)
+        // 181K total tokens — above usable (context - maxOutput = 168K)
         const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
 
         const withLimit = yield* compact.isOverflow({ tokens, model: withInputLimit })
diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts
@@ -57,6 +57,17 @@ const model: Provider.Model = {
   release_date: "2026-01-01",
 }
 
+const model2: Provider.Model = {
+  ...model,
+  id: ModelID.make("other-model"),
+  providerID: ProviderID.make("other"),
+  api: {
+    ...model.api,
+    id: "other-model",
+  },
+  name: "Other Model",
+}
+
 function userInfo(id: string): MessageV2.User {
   return {
     id,
@@ -359,7 +370,90 @@ describe("session.message-v2.toModelMessage", () => {
     ])
   })
 
-  test("omits provider metadata when assistant model differs", async () => {
+  test("preserves reasoning providerMetadata when model matches", async () => {
+    const assistantID = "m-assistant"
+
+    const input: MessageV2.WithParts[] = [
+      {
+        info: assistantInfo(assistantID, "m-parent"),
+        parts: [
+          {
+            ...basePart(assistantID, "a1"),
+            type: "reasoning",
+            text: "thinking",
+            metadata: { openai: { signature: "sig-match" } },
+            time: { start: 0 },
+          },
+        ] as MessageV2.Part[],
+      },
+    ]
+
+    expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([
+      {
+        role: "assistant",
+        content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-match" } } }],
+      },
+    ])
+  })
+
+  test("preserves reasoning providerMetadata when model differs", async () => {
+    const assistantID = "m-assistant"
+
+    const input: MessageV2.WithParts[] = [
+      {
+        info: assistantInfo(assistantID, "m-parent", undefined, {
+          providerID: model2.providerID,
+          modelID: model2.api.id,
+        }),
+        parts: [
+          {
+            ...basePart(assistantID, "a1"),
+            type: "reasoning",
+            text: "thinking",
+            metadata: { openai: { signature: "sig-different" } },
+            time: { start: 0 },
+          },
+        ] as MessageV2.Part[],
+      },
+    ]
+
+    expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([
+      {
+        role: "assistant",
+        content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-different" } } }],
+      },
+    ])
+  })
+
+  test("preserves text providerMetadata when model differs", async () => {
+    const assistantID = "m-assistant"
+
+    const input: MessageV2.WithParts[] = [
+      {
+        info: assistantInfo(assistantID, "m-parent", undefined, {
+          providerID: model2.providerID,
+          modelID: model2.api.id,
+        }),
+        parts: [
+          {
+            ...basePart(assistantID, "a1"),
+            type: "text",
+            text: "done",
+            metadata: { openai: { assistant: "meta" } },
+          },
+        ] as MessageV2.Part[],
+      },
+    ]
+
+    expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "done", providerOptions: { openai: { assistant: "meta" } } }],
+      },
+    ])
+  })
+
+  test("preserves tool callProviderMetadata when model differs", async () => {
     const userID = "m-user"
     const assistantID = "m-assistant"
 
@@ -375,16 +469,97 @@ describe("session.message-v2.toModelMessage", () => {
         ] as MessageV2.Part[],
       },
       {
-        info: assistantInfo(assistantID, userID, undefined, { providerID: "other", modelID: "other" }),
+        info: assistantInfo(assistantID, userID, undefined, {
+          providerID: model2.providerID,
+          modelID: model2.api.id,
+        }),
+        parts: [
+          {
+            ...basePart(assistantID, "a1"),
+            type: "tool",
+            callID: "call-1",
+            tool: "bash",
+            state: {
+              status: "completed",
+              input: { cmd: "ls" },
+              output: "ok",
+              title: "Bash",
+              metadata: {},
+              time: { start: 0, end: 1 },
+            },
+            metadata: { openai: { tool: "meta" } },
+          },
+        ] as MessageV2.Part[],
+      },
+    ]
+
+    expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([
+      {
+        role: "user",
+        content: [{ type: "text", text: "run tool" }],
+      },
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "tool-call",
+            toolCallId: "call-1",
+            toolName: "bash",
+            input: { cmd: "ls" },
+            providerExecuted: undefined,
+            providerOptions: { openai: { tool: "meta" } },
+          },
+        ],
+      },
+      {
+        role: "tool",
+        content: [
+          {
+            type: "tool-result",
+            toolCallId: "call-1",
+            toolName: "bash",
+            output: { type: "text", value: "ok" },
+            providerOptions: { openai: { tool: "meta" } },
+          },
+        ],
+      },
+    ])
+  })
+
+  test("handles undefined metadata gracefully", async () => {
+    const userID = "m-user"
+    const assistantID = "m-assistant"
+
+    const input: MessageV2.WithParts[] = [
+      {
+        info: userInfo(userID),
+        parts: [
+          {
+            ...basePart(userID, "u1"),
+            type: "text",
+            text: "run tool",
+          },
+        ] as MessageV2.Part[],
+      },
+      {
+        info: assistantInfo(assistantID, userID, undefined, {
+          providerID: model2.providerID,
+          modelID: model2.api.id,
+        }),
         parts: [
           {
             ...basePart(assistantID, "a1"),
             type: "text",
             text: "done",
-            metadata: { openai: { assistant: "meta" } },
           },
           {
             ...basePart(assistantID, "a2"),
+            type: "reasoning",
+            text: "thinking",
+            time: { start: 0 },
+          },
+          {
+            ...basePart(assistantID, "a3"),
             type: "tool",
             callID: "call-1",
             tool: "bash",
@@ -396,7 +571,6 @@ describe("session.message-v2.toModelMessage", () => {
               metadata: {},
               time: { start: 0, end: 1 },
             },
-            metadata: { openai: { tool: "meta" } },
           },
         ] as MessageV2.Part[],
       },
@@ -411,6 +585,7 @@ describe("session.message-v2.toModelMessage", () => {
         role: "assistant",
         content: [
           { type: "text", text: "done" },
+          { type: "reasoning", text: "thinking", providerOptions: undefined },
           {
             type: "tool-call",
             toolCallId: "call-1",