@@ -45,6 +45,7 @@ import { LiteData } from "@opencode-ai/console-core/lite.js"
4545import { Resource } from "@opencode-ai/console-resource"
4646import { i18n , type Key } from "~/i18n"
4747import { localeFromRequest } from "~/lib/language"
48+ import { createModelTpmLimiter } from "./modelTpmLimiter"
4849
4950type ZenData = Awaited < ReturnType < typeof ZenData . list > >
5051type RetryOptions = {
@@ -121,6 +122,8 @@ export async function handler(
121122 const authInfo = await authenticate ( modelInfo , zenApiKey )
122123 const billingSource = validateBilling ( authInfo , modelInfo )
123124 logger . metric ( { source : billingSource } )
125+ const modelTpmLimiter = createModelTpmLimiter ( modelInfo . providers )
126+ const modelTpmLimits = await modelTpmLimiter ?. check ( )
124127
125128 const retriableRequest = async ( retry : RetryOptions = { excludeProviders : [ ] , retryCount : 0 } ) => {
126129 const providerInfo = selectProvider (
@@ -133,6 +136,7 @@ export async function handler(
133136 trialProviders ,
134137 retry ,
135138 stickyProvider ,
139+ modelTpmLimits ,
136140 )
137141 validateModelSettings ( billingSource , authInfo )
138142 updateProviderKey ( authInfo , providerInfo )
@@ -229,6 +233,7 @@ export async function handler(
229233 const usageInfo = providerInfo . normalizeUsage ( json . usage )
230234 const costInfo = calculateCost ( modelInfo , usageInfo )
231235 await trialLimiter ?. track ( usageInfo )
236+ await modelTpmLimiter ?. track ( providerInfo . id , providerInfo . model , usageInfo )
232237 await trackUsage ( sessionId , billingSource , authInfo , modelInfo , providerInfo , usageInfo , costInfo )
233238 await reload ( billingSource , authInfo , costInfo )
234239 json . cost = calculateOccurredCost ( billingSource , costInfo )
@@ -278,6 +283,7 @@ export async function handler(
278283 const usageInfo = providerInfo . normalizeUsage ( usage )
279284 const costInfo = calculateCost ( modelInfo , usageInfo )
280285 await trialLimiter ?. track ( usageInfo )
286+ await modelTpmLimiter ?. track ( providerInfo . id , providerInfo . model , usageInfo )
281287 await trackUsage ( sessionId , billingSource , authInfo , modelInfo , providerInfo , usageInfo , costInfo )
282288 await reload ( billingSource , authInfo , costInfo )
283289 const cost = calculateOccurredCost ( billingSource , costInfo )
@@ -433,12 +439,16 @@ export async function handler(
433439 trialProviders : string [ ] | undefined ,
434440 retry : RetryOptions ,
435441 stickyProvider : string | undefined ,
442+ modelTpmLimits : Record < string , number > | undefined ,
436443 ) {
437444 const modelProvider = ( ( ) => {
445+ // Byok is top priority b/c if user set their own API key, we should use it
446+ // instead of using the sticky provider for the same session
438447 if ( authInfo ?. provider ?. credentials ) {
439448 return modelInfo . providers . find ( ( provider ) => provider . id === modelInfo . byokProvider )
440449 }
441450
451+ // Always use the same provider for the same session
442452 if ( stickyProvider ) {
443453 const provider = modelInfo . providers . find ( ( provider ) => provider . id === stickyProvider )
444454 if ( provider ) return provider
@@ -451,10 +461,20 @@ export async function handler(
451461 }
452462
453463 if ( retry . retryCount !== MAX_FAILOVER_RETRIES ) {
454- const providers = modelInfo . providers
464+ const allProviders = modelInfo . providers
455465 . filter ( ( provider ) => ! provider . disabled )
466+ . filter ( ( provider ) => provider . weight !== 0 )
456467 . filter ( ( provider ) => ! retry . excludeProviders . includes ( provider . id ) )
457- . flatMap ( ( provider ) => Array < typeof provider > ( provider . weight ?? 1 ) . fill ( provider ) )
468+ . filter ( ( provider ) => {
469+ if ( ! provider . tpmLimit ) return true
470+ const usage = modelTpmLimits ?. [ `${ provider . id } /${ provider . model } ` ] ?? 0
471+ return usage < provider . tpmLimit * 1_000_000
472+ } )
473+
474+ const topPriority = Math . min ( ...allProviders . map ( ( p ) => p . priority ) )
475+ const providers = allProviders
476+ . filter ( ( p ) => p . priority <= topPriority )
477+ . flatMap ( ( provider ) => Array < typeof provider > ( provider . weight ) . fill ( provider ) )
458478
459479 // Use the last 4 characters of session ID to select a provider
460480 const identifier = sessionId . length ? sessionId : ip
0 commit comments