Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/opencode/src/plugin/codex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ export async function CodexAuthPlugin(input: PluginInput): Promise<Hooks> {
"gpt-5.3-codex",
"gpt-5.4",
"gpt-5.4-mini",
"gpt-5.5",
])
for (const [modelId, model] of Object.entries(provider.models)) {
if (modelId.includes("codex")) continue
Expand Down
19 changes: 12 additions & 7 deletions packages/opencode/src/session/message-v2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,12 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
}

if (msg.info.role === "assistant") {
const differentModel = `${model.providerID}/${model.id}` !== `${msg.info.providerID}/${msg.info.modelID}`
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Killing this logic will break switching models mid convo for a variety of cases, we cannot do this, also your PR says compaction uses a different model? Well that'd only happen if you manually specify a different model and even then, this logic should prevent a separate model from causing issues

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, what do you think of the revised change?

// Only strip provider metadata when crossing provider boundaries (e.g. Anthropic → OpenAI).
// Metadata is provider-namespaced so a different provider ignores unknown keys, but
// passing it is still unnecessary. Within the same provider (e.g. compaction using a
// different model variant), metadata MUST be preserved — Anthropic requires thinking
// block signatures to be byte-identical on replay.
const differentProvider = model.providerID !== msg.info.providerID
const media: Array<{ mime: string; url: string }> = []

if (
Expand All @@ -836,7 +841,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
assistantMessage.parts.push({
type: "text",
text: part.text,
...(differentModel ? {} : { providerMetadata: part.metadata }),
...(differentProvider ? {} : { providerMetadata: part.metadata }),
})
if (part.type === "step-start")
assistantMessage.parts.push({
Expand Down Expand Up @@ -874,7 +879,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
input: part.state.input,
output,
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
})
}
if (part.state.status === "error") {
Expand All @@ -887,7 +892,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
input: part.state.input,
output,
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
})
} else {
assistantMessage.parts.push({
Expand All @@ -897,7 +902,7 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
input: part.state.input,
errorText: part.state.error,
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
})
}
}
Expand All @@ -911,14 +916,14 @@ export const toModelMessagesEffect = Effect.fnUntraced(function* (
input: part.state.input,
errorText: "[Tool execution was interrupted]",
...(part.metadata?.providerExecuted ? { providerExecuted: true } : {}),
...(differentModel ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
...(differentProvider ? {} : { callProviderMetadata: providerMeta(part.metadata) }),
})
}
if (part.type === "reasoning") {
assistantMessage.parts.push({
type: "reasoning",
text: part.text,
...(differentModel ? {} : { providerMetadata: part.metadata }),
...(differentProvider ? {} : { providerMetadata: part.metadata }),
})
}
}
Expand Down
11 changes: 6 additions & 5 deletions packages/opencode/src/session/overflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,18 @@ import type { Provider } from "@/provider"
import { ProviderTransform } from "@/provider"
import type { MessageV2 } from "./message-v2"

const COMPACTION_BUFFER = 20_000

export function usable(input: { cfg: Config.Info; model: Provider.Model }) {
const context = input.model.limit.context
if (context === 0) return 0

const reserved =
input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
// Reserve headroom so compaction triggers before the next turn overflows.
// maxOutputTokens() is capped at 32K (OUTPUT_TOKEN_MAX) regardless of the
// model's raw output limit, so this is never excessively aggressive.
// Users can override via config.compaction.reserved if needed (#12924).
const reserved = input.cfg.compaction?.reserved ?? ProviderTransform.maxOutputTokens(input.model)
return input.model.limit.input
? Math.max(0, input.model.limit.input - reserved)
: Math.max(0, context - ProviderTransform.maxOutputTokens(input.model))
: Math.max(0, context - reserved)
}

export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
Expand Down
22 changes: 11 additions & 11 deletions packages/opencode/test/session/compaction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -457,20 +457,20 @@ describe("session.compaction.isOverflow", () => {
),
)

// ─── Bug reproduction tests ───────────────────────────────────────────
// These tests demonstrate that when limit.input is set, isOverflow()
// does not subtract any headroom for the next model response. This means
// compaction only triggers AFTER we've already consumed the full input
// budget, leaving zero room for the next API call's output tokens.
// ─── Headroom reservation tests ──────────────────────────────────────
// These tests verify that when limit.input is set, isOverflow()
// correctly reserves headroom (maxOutputTokens, capped at 32K) so
// compaction triggers before the next API call overflows.
//
// Compare: without limit.input, usable = context - output (reserves space).
// With limit.input, usable = limit.input (reserves nothing).
// Previously (bug), the limit.input path only subtracted a 20K buffer
// while the non-input path subtracted the full maxOutputTokens — an
// asymmetry that let sessions grow ~12K tokens too large before compacting.
//
// Related issues: #10634, #8089, #11086, #12621
// Open PRs: #6875, #12924

it.live(
"BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not",
"no headroom when limit.input is set — compaction should trigger near boundary",
provideTmpdirInstance(() =>
Effect.gen(function* () {
const compact = yield* SessionCompaction.Service
Expand All @@ -496,7 +496,7 @@ describe("session.compaction.isOverflow", () => {
)

it.live(
"BUG: without limit.input, same token count correctly triggers compaction",
"without limit.input, same token count correctly triggers compaction",
provideTmpdirInstance(() =>
Effect.gen(function* () {
const compact = yield* SessionCompaction.Service
Expand All @@ -516,15 +516,15 @@ describe("session.compaction.isOverflow", () => {
)

it.live(
"BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it",
"asymmetry — limit.input model does not allow more usage than equivalent model without it",
provideTmpdirInstance(() =>
Effect.gen(function* () {
const compact = yield* SessionCompaction.Service
// Two models with identical context/output limits, differing only in limit.input
const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })

// 170K total tokens — well above context-output (168K) but below input limit (200K)
// 181K total tokens — above usable (context - maxOutput = 168K)
const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }

const withLimit = yield* compact.isOverflow({ tokens, model: withInputLimit })
Expand Down
Loading
Loading