Skip to content

Commit 626e8a5

Browse files
committed
fix: preserve thinking block signatures and fix compaction headroom asymmetry
Two compounding bugs caused sessions to crash with 'thinking blocks cannot be modified' when compaction fired for models with extended thinking: 1. toModelMessages() stripped providerMetadata (including cryptographic signatures) from message parts when the current model differed from the original. Anthropic's API requires signatures to be byte-identical. Fix: always pass providerMetadata through — the API handles filtering. 2. isOverflow() used an asymmetric buffer when limit.input was set (capped at 20K via COMPACTION_BUFFER) vs the full maxOutputTokens on the non-input path. This caused compaction to trigger too late. Fix: use maxOutputTokens (capped at 32K) for both paths. Also fixed the non-input path to respect config.compaction.reserved.
1 parent 91468fe commit 626e8a5

4 files changed

Lines changed: 245 additions & 27 deletions

File tree

packages/opencode/src/session/message-v2.ts

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,12 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
688688
}
689689

690690
if (msg.info.role === "assistant") {
691-
const differentModel = `${model.providerID}/${model.id}` !== `${msg.info.providerID}/${msg.info.modelID}`
691+
// Only strip provider metadata when crossing provider boundaries (e.g. Anthropic → OpenAI).
692+
// Metadata is provider-namespaced so a different provider ignores unknown keys, but
693+
// passing it is still unnecessary. Within the same provider (e.g. compaction using a
694+
// different model variant), metadata MUST be preserved — Anthropic requires thinking
695+
// block signatures to be byte-identical on replay.
696+
const differentProvider = model.providerID !== msg.info.providerID
692697
const media: Array<{ mime: string; url: string }> = []
693698

694699
if (
@@ -710,7 +715,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
710715
assistantMessage.parts.push({
711716
type: "text",
712717
text: part.text,
713-
...(differentModel ? {} : { providerMetadata: part.metadata }),
718+
...(differentProvider ? {} : { providerMetadata: part.metadata }),
714719
})
715720
if (part.type === "step-start")
716721
assistantMessage.parts.push({
@@ -746,7 +751,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
746751
input: part.state.input,
747752
output,
748753
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
749-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
754+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
750755
})
751756
}
752757
if (part.state.status === "error") {
@@ -759,7 +764,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
759764
input: part.state.input,
760765
output,
761766
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
762-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
767+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
763768
})
764769
} else {
765770
assistantMessage.parts.push({
@@ -769,7 +774,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
769774
input: part.state.input,
770775
errorText: part.state.error,
771776
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
772-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
777+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
773778
})
774779
}
775780
}
@@ -783,14 +788,14 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
783788
input: part.state.input,
784789
errorText: "[Tool execution was interrupted]",
785790
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
786-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
791+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
787792
})
788793
}
789794
if (part.type === "reasoning") {
790795
assistantMessage.parts.push({
791796
type: "reasoning",
792797
text: part.text,
793-
...(differentModel ? {} : { providerMetadata: part.metadata }),
798+
...(differentProvider ? {} : { providerMetadata: part.metadata }),
794799
})
795800
}
796801
}

packages/opencode/src/session/overflow.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,18 @@ import type { Provider } from "@/provider"
33
import { ProviderTransform } from "@/provider"
44
import type { MessageV2 } from "./message-v2"
55

6-
const COMPACTION_BUFFER = 20_000
7-
86
export function usable(input: { cfg: Config.Info; model: Provider.Model }) {
97
const context = input.model.limit.context
108
if (context === 0) return 0
119

12-
const reserved =
13-
input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
10+
// Reserve headroom so compaction triggers before the next turn overflows.
11+
// maxOutputTokens() is capped at 32K (OUTPUT_TOKEN_MAX) regardless of the
12+
// model's raw output limit, so this is never excessively aggressive.
13+
// Users can override via config.compaction.reserved if needed (#12924).
14+
const reserved = input.cfg.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model)
1415
return input.model.limit.input
1516
? Math.max(0, input.model.limit.input - reserved)
16-
: Math.max(0, context - ProviderTransform.maxOutputTokens(input.model))
17+
: Math.max(0, context - reserved)
1718
}
1819

1920
export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {

packages/opencode/test/session/compaction.test.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -418,20 +418,20 @@ describe("session.compaction.isOverflow", () => {
418418
),
419419
)
420420

421-
// ─── Bug reproduction tests ───────────────────────────────────────────
422-
// These tests demonstrate that when limit.input is set, isOverflow()
423-
// does not subtract any headroom for the next model response. This means
424-
// compaction only triggers AFTER we've already consumed the full input
425-
// budget, leaving zero room for the next API call's output tokens.
421+
// ─── Headroom reservation tests ──────────────────────────────────────
422+
// These tests verify that when limit.input is set, isOverflow()
423+
// correctly reserves headroom (maxOutputTokens, capped at 32K) so
424+
// compaction triggers before the next API call overflows.
426425
//
427-
// Compare: without limit.input, usable = context - output (reserves space).
428-
// With limit.input, usable = limit.input (reserves nothing).
426+
// Previously (bug), the limit.input path only subtracted a 20K buffer
427+
// while the non-input path subtracted the full maxOutputTokens — an
428+
// asymmetry that let sessions grow ~12K tokens too large before compacting.
429429
//
430430
// Related issues: #10634, #8089, #11086, #12621
431431
// Open PRs: #6875, #12924
432432

433433
it.live(
434-
"BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not",
434+
"no headroom when limit.input is set — compaction should trigger near boundary",
435435
provideTmpdirInstance(() =>
436436
Effect.gen(function* () {
437437
const compact = yield* SessionCompaction.Service
@@ -457,7 +457,7 @@ describe("session.compaction.isOverflow", () => {
457457
)
458458

459459
it.live(
460-
"BUG: without limit.input, same token count correctly triggers compaction",
460+
"without limit.input, same token count correctly triggers compaction",
461461
provideTmpdirInstance(() =>
462462
Effect.gen(function* () {
463463
const compact = yield* SessionCompaction.Service
@@ -477,15 +477,15 @@ describe("session.compaction.isOverflow", () => {
477477
)
478478

479479
it.live(
480-
"BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it",
480+
"asymmetry — limit.input model does not allow more usage than equivalent model without it",
481481
provideTmpdirInstance(() =>
482482
Effect.gen(function* () {
483483
const compact = yield* SessionCompaction.Service
484484
// Two models with identical context/output limits, differing only in limit.input
485485
const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
486486
const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
487487

488-
// 170K total tokens — well above context-output (168K) but below input limit (200K)
488+
// 181K total tokens — above usable (context - maxOutput = 168K)
489489
const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
490490

491491
const withLimit = yield* compact.isOverflow({ tokens, model: withInputLimit })

0 commit comments

Comments
 (0)