Skip to content

Commit 07ba2f7

Browse files
committed
fix: preserve thinking block signatures and fix compaction headroom asymmetry
Two compounding bugs caused sessions to crash with 'thinking blocks cannot be modified' when compaction fired for models with extended thinking: 1. toModelMessages() stripped providerMetadata (including cryptographic signatures) from message parts when the current model differed from the original. Anthropic's API requires signatures to be byte-identical. Fix: always pass providerMetadata through — the API handles filtering. 2. isOverflow() used an asymmetric buffer when limit.input was set (capped at 20K via COMPACTION_BUFFER) vs the full maxOutputTokens on the non-input path. This caused compaction to trigger too late. Fix: use maxOutputTokens (capped at 32K) for both paths. Also fixed the non-input path to respect config.compaction.reserved.
1 parent 88c5f6b commit 07ba2f7

4 files changed

Lines changed: 245 additions & 27 deletions

File tree

packages/opencode/src/session/message-v2.ts

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,12 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
808808
}
809809

810810
if (msg.info.role === "assistant") {
811-
const differentModel = `${model.providerID}/${model.id}` !== `${msg.info.providerID}/${msg.info.modelID}`
811+
// Only strip provider metadata when crossing provider boundaries (e.g. Anthropic → OpenAI).
812+
// Metadata is provider-namespaced so a different provider ignores unknown keys, but
813+
// passing it is still unnecessary. Within the same provider (e.g. compaction using a
814+
// different model variant), metadata MUST be preserved — Anthropic requires thinking
815+
// block signatures to be byte-identical on replay.
816+
const differentProvider = model.providerID !== msg.info.providerID
812817
const media: Array<{ mime: string; url: string }> = []
813818

814819
if (
@@ -830,7 +835,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
830835
assistantMessage.parts.push({
831836
type: "text",
832837
text: part.text,
833-
...(differentModel ? {} : { providerMetadata: part.metadata }),
838+
...(differentProvider ? {} : { providerMetadata: part.metadata }),
834839
})
835840
if (part.type === "step-start")
836841
assistantMessage.parts.push({
@@ -866,7 +871,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
866871
input: part.state.input,
867872
output,
868873
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
869-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
874+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
870875
})
871876
}
872877
if (part.state.status === "error") {
@@ -879,7 +884,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
879884
input: part.state.input,
880885
output,
881886
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
882-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
887+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
883888
})
884889
} else {
885890
assistantMessage.parts.push({
@@ -889,7 +894,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
889894
input: part.state.input,
890895
errorText: part.state.error,
891896
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
892-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
897+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
893898
})
894899
}
895900
}
@@ -903,14 +908,14 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
903908
input: part.state.input,
904909
errorText: "[Tool execution was interrupted]",
905910
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
906-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
911+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
907912
})
908913
}
909914
if (part.type === "reasoning") {
910915
assistantMessage.parts.push({
911916
type: "reasoning",
912917
text: part.text,
913-
...(differentModel ? {} : { providerMetadata: part.metadata }),
918+
...(differentProvider ? {} : { providerMetadata: part.metadata }),
914919
})
915920
}
916921
}

packages/opencode/src/session/overflow.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,18 @@ import type { Provider } from "@/provider"
33
import { ProviderTransform } from "@/provider"
44
import type { MessageV2 } from "./message-v2"
55

6-
const COMPACTION_BUFFER = 20_000
7-
86
export function usable(input: { cfg: Config.Info; model: Provider.Model }) {
97
const context = input.model.limit.context
108
if (context === 0) return 0
119

12-
const reserved =
13-
input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
10+
// Reserve headroom so compaction triggers before the next turn overflows.
11+
// maxOutputTokens() is capped at 32K (OUTPUT_TOKEN_MAX) regardless of the
12+
// model's raw output limit, so this is never excessively aggressive.
13+
// Users can override via config.compaction.reserved if needed (#12924).
14+
const reserved = input.cfg.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model)
1415
return input.model.limit.input
1516
? Math.max(0, input.model.limit.input - reserved)
16-
: Math.max(0, context - ProviderTransform.maxOutputTokens(input.model))
17+
: Math.max(0, context - reserved)
1718
}
1819

1920
export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {

packages/opencode/test/session/compaction.test.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -418,20 +418,20 @@ describe("session.compaction.isOverflow", () => {
418418
),
419419
)
420420

421-
// ─── Bug reproduction tests ───────────────────────────────────────────
422-
// These tests demonstrate that when limit.input is set, isOverflow()
423-
// does not subtract any headroom for the next model response. This means
424-
// compaction only triggers AFTER we've already consumed the full input
425-
// budget, leaving zero room for the next API call's output tokens.
421+
// ─── Headroom reservation tests ──────────────────────────────────────
422+
// These tests verify that when limit.input is set, isOverflow()
423+
// correctly reserves headroom (maxOutputTokens, capped at 32K) so
424+
// compaction triggers before the next API call overflows.
426425
//
427-
// Compare: without limit.input, usable = context - output (reserves space).
428-
// With limit.input, usable = limit.input (reserves nothing).
426+
// Previously (bug), the limit.input path only subtracted a 20K buffer
427+
// while the non-input path subtracted the full maxOutputTokens — an
428+
// asymmetry that let sessions grow ~12K tokens too large before compacting.
429429
//
430430
// Related issues: #10634, #8089, #11086, #12621
431431
// Open PRs: #6875, #12924
432432

433433
it.live(
434-
"BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not",
434+
"no headroom when limit.input is set — compaction should trigger near boundary",
435435
provideTmpdirInstance(() =>
436436
Effect.gen(function* () {
437437
const compact = yield* SessionCompaction.Service
@@ -457,7 +457,7 @@ describe("session.compaction.isOverflow", () => {
457457
)
458458

459459
it.live(
460-
"BUG: without limit.input, same token count correctly triggers compaction",
460+
"without limit.input, same token count correctly triggers compaction",
461461
provideTmpdirInstance(() =>
462462
Effect.gen(function* () {
463463
const compact = yield* SessionCompaction.Service
@@ -477,15 +477,15 @@ describe("session.compaction.isOverflow", () => {
477477
)
478478

479479
it.live(
480-
"BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it",
480+
"asymmetry — limit.input model does not allow more usage than equivalent model without it",
481481
provideTmpdirInstance(() =>
482482
Effect.gen(function* () {
483483
const compact = yield* SessionCompaction.Service
484484
// Two models with identical context/output limits, differing only in limit.input
485485
const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
486486
const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
487487

488-
// 170K total tokens — well above context-output (168K) but below input limit (200K)
488+
// 181K total tokens — above usable (context - maxOutput = 168K)
489489
const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
490490

491491
const withLimit = yield* compact.isOverflow({ tokens, model: withInputLimit })

0 commit comments

Comments
 (0)