Skip to content

Commit 611ebdb

Browse files
committed
fix: preserve thinking block signatures and fix compaction headroom asymmetry
Two compounding bugs caused sessions to crash with 'thinking blocks cannot be modified' when compaction fired for models with extended thinking: 1. toModelMessages() stripped providerMetadata (including cryptographic signatures) from message parts when the current model differed from the original. Anthropic's API requires signatures to be byte-identical. Fix: always pass providerMetadata through — the API handles filtering. 2. isOverflow() used an asymmetric buffer when limit.input was set (capped at 20K via COMPACTION_BUFFER) vs the full maxOutputTokens on the non-input path. This caused compaction to trigger too late. Fix: use maxOutputTokens (capped at 32K) for both paths. Also fixed the non-input path to respect config.compaction.reserved.
1 parent 745cc65 commit 611ebdb

4 files changed

Lines changed: 245 additions & 27 deletions

File tree

packages/opencode/src/session/message-v2.ts

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,12 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
814814
}
815815

816816
if (msg.info.role === "assistant") {
817-
const differentModel = `${model.providerID}/${model.id}` !== `${msg.info.providerID}/${msg.info.modelID}`
817+
// Only strip provider metadata when crossing provider boundaries (e.g. Anthropic → OpenAI).
818+
// Metadata is provider-namespaced so a different provider ignores unknown keys, but
819+
// passing it is still unnecessary. Within the same provider (e.g. compaction using a
820+
// different model variant), metadata MUST be preserved — Anthropic requires thinking
821+
// block signatures to be byte-identical on replay.
822+
const differentProvider = model.providerID !== msg.info.providerID
818823
const media: Array<{ mime: string; url: string }> = []
819824

820825
if (
@@ -836,7 +841,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
836841
assistantMessage.parts.push({
837842
type: "text",
838843
text: part.text,
839-
...(differentModel ? {} : { providerMetadata: part.metadata }),
844+
...(differentProvider ? {} : { providerMetadata: part.metadata }),
840845
})
841846
if (part.type === "step-start")
842847
assistantMessage.parts.push({
@@ -874,7 +879,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
874879
input: part.state.input,
875880
output,
876881
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
877-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
882+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
878883
})
879884
}
880885
if (part.state.status === "error") {
@@ -887,7 +892,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
887892
input: part.state.input,
888893
output,
889894
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
890-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
895+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
891896
})
892897
} else {
893898
assistantMessage.parts.push({
@@ -897,7 +902,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
897902
input: part.state.input,
898903
errorText: part.state.error,
899904
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
900-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
905+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
901906
})
902907
}
903908
}
@@ -911,14 +916,14 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
911916
input: part.state.input,
912917
errorText: "[Tool execution was interrupted]",
913918
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
914-
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
919+
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
915920
})
916921
}
917922
if (part.type === "reasoning") {
918923
assistantMessage.parts.push({
919924
type: "reasoning",
920925
text: part.text,
921-
...(differentModel ? {} : { providerMetadata: part.metadata }),
926+
...(differentProvider ? {} : { providerMetadata: part.metadata }),
922927
})
923928
}
924929
}

packages/opencode/src/session/overflow.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,18 @@ import type { Provider } from "@/provider"
33
import { ProviderTransform } from "@/provider"
44
import type { MessageV2 } from "./message-v2"
55

6-
const COMPACTION_BUFFER = 20_000
7-
86
export function usable(input: { cfg: Config.Info; model: Provider.Model }) {
97
const context = input.model.limit.context
108
if (context === 0) return 0
119

12-
const reserved =
13-
input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
10+
// Reserve headroom so compaction triggers before the next turn overflows.
11+
// maxOutputTokens() is capped at 32K (OUTPUT_TOKEN_MAX) regardless of the
12+
// model's raw output limit, so this is never excessively aggressive.
13+
// Users can override via config.compaction.reserved if needed (#12924).
14+
const reserved = input.cfg.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model)
1415
return input.model.limit.input
1516
? Math.max(0, input.model.limit.input - reserved)
16-
: Math.max(0, context - ProviderTransform.maxOutputTokens(input.model))
17+
: Math.max(0, context - reserved)
1718
}
1819

1920
export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {

packages/opencode/test/session/compaction.test.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -457,20 +457,20 @@ describe("session.compaction.isOverflow", () => {
457457
),
458458
)
459459

460-
// ─── Bug reproduction tests ───────────────────────────────────────────
461-
// These tests demonstrate that when limit.input is set, isOverflow()
462-
// does not subtract any headroom for the next model response. This means
463-
// compaction only triggers AFTER we've already consumed the full input
464-
// budget, leaving zero room for the next API call's output tokens.
460+
// ─── Headroom reservation tests ──────────────────────────────────────
461+
// These tests verify that when limit.input is set, isOverflow()
462+
// correctly reserves headroom (maxOutputTokens, capped at 32K) so
463+
// compaction triggers before the next API call overflows.
465464
//
466-
// Compare: without limit.input, usable = context - output (reserves space).
467-
// With limit.input, usable = limit.input (reserves nothing).
465+
// Previously (bug), the limit.input path only subtracted a 20K buffer
466+
// while the non-input path subtracted the full maxOutputTokens — an
467+
// asymmetry that let sessions grow ~12K tokens too large before compacting.
468468
//
469469
// Related issues: #10634, #8089, #11086, #12621
470470
// Open PRs: #6875, #12924
471471

472472
it.live(
473-
"BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not",
473+
"no headroom when limit.input is set — compaction should trigger near boundary",
474474
provideTmpdirInstance(() =>
475475
Effect.gen(function* () {
476476
const compact = yield* SessionCompaction.Service
@@ -496,7 +496,7 @@ describe("session.compaction.isOverflow", () => {
496496
)
497497

498498
it.live(
499-
"BUG: without limit.input, same token count correctly triggers compaction",
499+
"without limit.input, same token count correctly triggers compaction",
500500
provideTmpdirInstance(() =>
501501
Effect.gen(function* () {
502502
const compact = yield* SessionCompaction.Service
@@ -516,15 +516,15 @@ describe("session.compaction.isOverflow", () => {
516516
)
517517

518518
it.live(
519-
"BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it",
519+
"asymmetry — limit.input model does not allow more usage than equivalent model without it",
520520
provideTmpdirInstance(() =>
521521
Effect.gen(function* () {
522522
const compact = yield* SessionCompaction.Service
523523
// Two models with identical context/output limits, differing only in limit.input
524524
const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
525525
const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
526526

527-
// 170K total tokens — well above context-output (168K) but below input limit (200K)
527+
// 181K total tokens — above usable (context - maxOutput = 168K)
528528
const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
529529

530530
const withLimit = yield* compact.isOverflow({ tokens, model: withInputLimit })

0 commit comments

Comments
 (0)