Skip to content

Commit 0bf6a56

Browse files
committed
fix: preserve thinking block signatures and fix compaction headroom asymmetry
Two compounding bugs caused sessions to crash with 'thinking blocks cannot be modified' when compaction fired for models with extended thinking: 1. toModelMessages() stripped providerMetadata (including cryptographic signatures) from message parts when the current model differed from the original. Anthropic's API requires signatures to be byte-identical. Fix: always pass providerMetadata through — the API handles filtering. 2. isOverflow() used an asymmetric buffer when limit.input was set (capped at 20K via COMPACTION_BUFFER) vs the full maxOutputTokens on the non-input path. This caused compaction to trigger too late. Fix: use maxOutputTokens (capped at 32K) for both paths. Also fixed the non-input path to respect config.compaction.reserved.
1 parent 9a1c9ae commit 0bf6a56

4 files changed

Lines changed: 201 additions & 28 deletions

File tree

packages/opencode/src/session/message-v2.ts

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,6 @@ export namespace MessageV2 {
684684
}
685685

686686
if (msg.info.role === "assistant") {
687-
const differentModel = `${model.providerID}/${model.id}` !== `${msg.info.providerID}/${msg.info.modelID}`
688687
const media: Array<{ mime: string; url: string }> = []
689688

690689
if (
@@ -706,7 +705,7 @@ export namespace MessageV2 {
706705
assistantMessage.parts.push({
707706
type: "text",
708707
text: part.text,
709-
...(differentModel ? {} : { providerMetadata: part.metadata }),
708+
providerMetadata: part.metadata,
710709
})
711710
if (part.type === "step-start")
712711
assistantMessage.parts.push({
@@ -741,7 +740,7 @@ export namespace MessageV2 {
741740
toolCallId: part.callID,
742741
input: part.state.input,
743742
output,
744-
...(differentModel ? {} : { callProviderMetadata: part.metadata }),
743+
callProviderMetadata: part.metadata,
745744
})
746745
}
747746
if (part.state.status === "error")
@@ -751,7 +750,7 @@ export namespace MessageV2 {
751750
toolCallId: part.callID,
752751
input: part.state.input,
753752
errorText: part.state.error,
754-
...(differentModel ? {} : { callProviderMetadata: part.metadata }),
753+
callProviderMetadata: part.metadata,
755754
})
756755
// Handle pending/running tool calls to prevent dangling tool_use blocks
757756
// Anthropic/Claude APIs require every tool_use to have a corresponding tool_result
@@ -762,14 +761,14 @@ export namespace MessageV2 {
762761
toolCallId: part.callID,
763762
input: part.state.input,
764763
errorText: "[Tool execution was interrupted]",
765-
...(differentModel ? {} : { callProviderMetadata: part.metadata }),
764+
callProviderMetadata: part.metadata,
766765
})
767766
}
768767
if (part.type === "reasoning") {
769768
assistantMessage.parts.push({
770769
type: "reasoning",
771770
text: part.text,
772-
...(differentModel ? {} : { providerMetadata: part.metadata }),
771+
providerMetadata: part.metadata,
773772
})
774773
}
775774
}

packages/opencode/src/session/overflow.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ import type { Provider } from "@/provider/provider"
33
import { ProviderTransform } from "@/provider/transform"
44
import type { MessageV2 } from "./message-v2"
55

6-
const COMPACTION_BUFFER = 20_000
7-
86
export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
97
if (input.cfg.compaction?.auto === false) return false
108
const context = input.model.limit.context
@@ -13,10 +11,11 @@ export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistan
1311
const count =
1412
input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
1513

16-
const reserved =
17-
input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
18-
const usable = input.model.limit.input
19-
? input.model.limit.input - reserved
20-
: context - ProviderTransform.maxOutputTokens(input.model)
14+
// Reserve headroom so compaction triggers before the next turn overflows.
15+
// maxOutputTokens() is capped at 32K (OUTPUT_TOKEN_MAX) regardless of the
16+
// model's raw output limit, so this is never excessively aggressive.
17+
// Users can override via config.compaction.reserved if needed (#12924).
18+
const reserved = input.cfg.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model)
19+
const usable = input.model.limit.input ? input.model.limit.input - reserved : context - reserved
2120
return count >= usable
2221
}

packages/opencode/test/session/compaction.test.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -317,19 +317,19 @@ describe("session.compaction.isOverflow", () => {
317317
})
318318
})
319319

320-
// ─── Bug reproduction tests ───────────────────────────────────────────
321-
// These tests demonstrate that when limit.input is set, isOverflow()
322-
// does not subtract any headroom for the next model response. This means
323-
// compaction only triggers AFTER we've already consumed the full input
324-
// budget, leaving zero room for the next API call's output tokens.
320+
// ─── Headroom reservation tests ──────────────────────────────────────
321+
// These tests verify that when limit.input is set, isOverflow()
322+
// correctly reserves headroom (maxOutputTokens, capped at 32K) so
323+
// compaction triggers before the next API call overflows.
325324
//
326-
// Compare: without limit.input, usable = context - output (reserves space).
327-
// With limit.input, usable = limit.input (reserves nothing).
325+
// Previously (bug), the limit.input path only subtracted a 20K buffer
326+
// while the non-input path subtracted the full maxOutputTokens — an
327+
// asymmetry that let sessions grow ~12K tokens too large before compacting.
328328
//
329329
// Related issues: #10634, #8089, #11086, #12621
330330
// Open PRs: #6875, #12924
331331

332-
test("BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not", async () => {
332+
test("no headroom when limit.input is set — compaction should trigger near boundary", async () => {
333333
await using tmp = await tmpdir()
334334
await Instance.provide({
335335
directory: tmp.path,
@@ -355,7 +355,7 @@ describe("session.compaction.isOverflow", () => {
355355
})
356356
})
357357

358-
test("BUG: without limit.input, same token count correctly triggers compaction", async () => {
358+
test("without limit.input, same token count correctly triggers compaction", async () => {
359359
await using tmp = await tmpdir()
360360
await Instance.provide({
361361
directory: tmp.path,
@@ -375,7 +375,7 @@ describe("session.compaction.isOverflow", () => {
375375
})
376376
})
377377

378-
test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => {
378+
test("asymmetry — limit.input model does not allow more usage than equivalent model without it", async () => {
379379
await using tmp = await tmpdir()
380380
await Instance.provide({
381381
directory: tmp.path,
@@ -384,7 +384,7 @@ describe("session.compaction.isOverflow", () => {
384384
const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
385385
const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
386386

387-
// 170K total tokens — well above context-output (168K) but below input limit (200K)
387+
// 181K total tokens — above usable (context - maxOutput = 168K)
388388
const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
389389

390390
const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit })

packages/opencode/test/session/message-v2.test.ts

Lines changed: 179 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,17 @@ const model: Provider.Model = {
5757
release_date: "2026-01-01",
5858
}
5959

60+
const model2: Provider.Model = {
61+
...model,
62+
id: "other-model",
63+
providerID: "other",
64+
api: {
65+
...model.api,
66+
id: "other-model",
67+
},
68+
name: "Other Model",
69+
}
70+
6071
function userInfo(id: string): MessageV2.User {
6172
return {
6273
id,
@@ -359,7 +370,90 @@ describe("session.message-v2.toModelMessage", () => {
359370
])
360371
})
361372

362-
test("omits provider metadata when assistant model differs", async () => {
373+
test("preserves reasoning providerMetadata when model matches", async () => {
374+
const assistantID = "m-assistant"
375+
376+
const input: MessageV2.WithParts[] = [
377+
{
378+
info: assistantInfo(assistantID, "m-parent"),
379+
parts: [
380+
{
381+
...basePart(assistantID, "a1"),
382+
type: "reasoning",
383+
text: "thinking",
384+
metadata: { openai: { signature: "sig-match" } },
385+
time: { start: 0 },
386+
},
387+
] as MessageV2.Part[],
388+
},
389+
]
390+
391+
expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([
392+
{
393+
role: "assistant",
394+
content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-match" } } }],
395+
},
396+
])
397+
})
398+
399+
test("preserves reasoning providerMetadata when model differs", async () => {
400+
const assistantID = "m-assistant"
401+
402+
const input: MessageV2.WithParts[] = [
403+
{
404+
info: assistantInfo(assistantID, "m-parent", undefined, {
405+
providerID: model2.providerID,
406+
modelID: model2.api.id,
407+
}),
408+
parts: [
409+
{
410+
...basePart(assistantID, "a1"),
411+
type: "reasoning",
412+
text: "thinking",
413+
metadata: { openai: { signature: "sig-different" } },
414+
time: { start: 0 },
415+
},
416+
] as MessageV2.Part[],
417+
},
418+
]
419+
420+
expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([
421+
{
422+
role: "assistant",
423+
content: [{ type: "reasoning", text: "thinking", providerOptions: { openai: { signature: "sig-different" } } }],
424+
},
425+
])
426+
})
427+
428+
test("preserves text providerMetadata when model differs", async () => {
429+
const assistantID = "m-assistant"
430+
431+
const input: MessageV2.WithParts[] = [
432+
{
433+
info: assistantInfo(assistantID, "m-parent", undefined, {
434+
providerID: model2.providerID,
435+
modelID: model2.api.id,
436+
}),
437+
parts: [
438+
{
439+
...basePart(assistantID, "a1"),
440+
type: "text",
441+
text: "done",
442+
metadata: { openai: { assistant: "meta" } },
443+
},
444+
] as MessageV2.Part[],
445+
},
446+
]
447+
448+
expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([
449+
{
450+
role: "assistant",
451+
content: [{ type: "text", text: "done", providerOptions: { openai: { assistant: "meta" } } }],
452+
},
453+
])
454+
})
455+
456+
test("preserves tool callProviderMetadata when model differs", async () => {
363457
const userID = "m-user"
364458
const assistantID = "m-assistant"
365459

@@ -375,16 +469,97 @@ describe("session.message-v2.toModelMessage", () => {
375469
] as MessageV2.Part[],
376470
},
377471
{
378-
info: assistantInfo(assistantID, userID, undefined, { providerID: "other", modelID: "other" }),
472+
info: assistantInfo(assistantID, userID, undefined, {
473+
providerID: model2.providerID,
474+
modelID: model2.api.id,
475+
}),
476+
parts: [
477+
{
478+
...basePart(assistantID, "a1"),
479+
type: "tool",
480+
callID: "call-1",
481+
tool: "bash",
482+
state: {
483+
status: "completed",
484+
input: { cmd: "ls" },
485+
output: "ok",
486+
title: "Bash",
487+
metadata: {},
488+
time: { start: 0, end: 1 },
489+
},
490+
metadata: { openai: { tool: "meta" } },
491+
},
492+
] as MessageV2.Part[],
493+
},
494+
]
495+
496+
expect(await MessageV2.toModelMessages(input, model)).toStrictEqual([
497+
{
498+
role: "user",
499+
content: [{ type: "text", text: "run tool" }],
500+
},
501+
{
502+
role: "assistant",
503+
content: [
504+
{
505+
type: "tool-call",
506+
toolCallId: "call-1",
507+
toolName: "bash",
508+
input: { cmd: "ls" },
509+
providerExecuted: undefined,
510+
providerOptions: { openai: { tool: "meta" } },
511+
},
512+
],
513+
},
514+
{
515+
role: "tool",
516+
content: [
517+
{
518+
type: "tool-result",
519+
toolCallId: "call-1",
520+
toolName: "bash",
521+
output: { type: "text", value: "ok" },
522+
providerOptions: { openai: { tool: "meta" } },
523+
},
524+
],
525+
},
526+
])
527+
})
528+
529+
test("handles undefined metadata gracefully", async () => {
530+
const userID = "m-user"
531+
const assistantID = "m-assistant"
532+
533+
const input: MessageV2.WithParts[] = [
534+
{
535+
info: userInfo(userID),
536+
parts: [
537+
{
538+
...basePart(userID, "u1"),
539+
type: "text",
540+
text: "run tool",
541+
},
542+
] as MessageV2.Part[],
543+
},
544+
{
545+
info: assistantInfo(assistantID, userID, undefined, {
546+
providerID: model2.providerID,
547+
modelID: model2.api.id,
548+
}),
379549
parts: [
380550
{
381551
...basePart(assistantID, "a1"),
382552
type: "text",
383553
text: "done",
384-
metadata: { openai: { assistant: "meta" } },
385554
},
386555
{
387556
...basePart(assistantID, "a2"),
557+
type: "reasoning",
558+
text: "thinking",
559+
time: { start: 0 },
560+
},
561+
{
562+
...basePart(assistantID, "a3"),
388563
type: "tool",
389564
callID: "call-1",
390565
tool: "bash",
@@ -396,7 +571,6 @@ describe("session.message-v2.toModelMessage", () => {
396571
metadata: {},
397572
time: { start: 0, end: 1 },
398573
},
399-
metadata: { openai: { tool: "meta" } },
400574
},
401575
] as MessageV2.Part[],
402576
},
@@ -411,6 +585,7 @@ describe("session.message-v2.toModelMessage", () => {
411585
role: "assistant",
412586
content: [
413587
{ type: "text", text: "done" },
588+
{ type: "reasoning", text: "thinking", providerOptions: undefined },
414589
{
415590
type: "tool-call",
416591
toolCallId: "call-1",

0 commit comments

Comments
 (0)