Skip to content

Commit cac632c

Browse files
committed
Merge branch 'dev' into fix/flush-stdin-on-exit
2 parents 4b55ab2 + cc063d4 commit cac632c

32 files changed

Lines changed: 2806 additions & 134 deletions

File tree

bun.lock

Lines changed: 30 additions & 30 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nix/hashes.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"nodeModules": {
3-
"x86_64-linux": "sha256-OPbZUo/fQv2Xsf+NEZV08GLBMN/DXovhRvn2JkesFtY=",
4-
"aarch64-linux": "sha256-WK7xlVLuirKDN5LaqjBn7qpv5bYVtYHZw0qRNKX4xXg=",
5-
"aarch64-darwin": "sha256-BAoAdeLQ+lXDD7Klxoxij683OVVug8KXEMRUqIQAjc8=",
6-
"x86_64-darwin": "sha256-ZOBwNR2gZgc5f+y3VIBBT4qZpeZfg7Of6AaGDOfqsG8="
3+
"x86_64-linux": "sha256-yGb+EPlFNDptIi4yFdJ0z7fhAyfOCRXu0GpNxrOnLVA=",
4+
"aarch64-linux": "sha256-h8oBwLB6LnFN4xGB0/ocvxRbBMwewrEck91ADihTUCk=",
5+
"aarch64-darwin": "sha256-/0IOgoihi4OR2AhDDfstLn2DcY/261v/KRQV4Pwhbmk=",
6+
"x86_64-darwin": "sha256-rPkqF+led93s1plBbhFpszrnzF2H+EUz8QlfbUkSHvM="
77
}
88
}

packages/app/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@opencode-ai/app",
3-
"version": "1.4.7",
3+
"version": "1.4.8",
44
"description": "",
55
"type": "module",
66
"exports": {

packages/console/app/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@opencode-ai/console-app",
3-
"version": "1.4.7",
3+
"version": "1.4.8",
44
"type": "module",
55
"license": "MIT",
66
"scripts": {

packages/console/app/src/routes/zen/util/handler.ts

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import { LiteData } from "@opencode-ai/console-core/lite.js"
4545
import { Resource } from "@opencode-ai/console-resource"
4646
import { i18n, type Key } from "~/i18n"
4747
import { localeFromRequest } from "~/lib/language"
48+
import { createModelTpmLimiter } from "./modelTpmLimiter"
4849

4950
type ZenData = Awaited<ReturnType<typeof ZenData.list>>
5051
type RetryOptions = {
@@ -121,6 +122,8 @@ export async function handler(
121122
const authInfo = await authenticate(modelInfo, zenApiKey)
122123
const billingSource = validateBilling(authInfo, modelInfo)
123124
logger.metric({ source: billingSource })
125+
const modelTpmLimiter = createModelTpmLimiter(modelInfo.providers)
126+
const modelTpmLimits = await modelTpmLimiter?.check()
124127

125128
const retriableRequest = async (retry: RetryOptions = { excludeProviders: [], retryCount: 0 }) => {
126129
const providerInfo = selectProvider(
@@ -133,6 +136,7 @@ export async function handler(
133136
trialProviders,
134137
retry,
135138
stickyProvider,
139+
modelTpmLimits,
136140
)
137141
validateModelSettings(billingSource, authInfo)
138142
updateProviderKey(authInfo, providerInfo)
@@ -229,6 +233,7 @@ export async function handler(
229233
const usageInfo = providerInfo.normalizeUsage(json.usage)
230234
const costInfo = calculateCost(modelInfo, usageInfo)
231235
await trialLimiter?.track(usageInfo)
236+
await modelTpmLimiter?.track(providerInfo.id, providerInfo.model, usageInfo)
232237
await trackUsage(sessionId, billingSource, authInfo, modelInfo, providerInfo, usageInfo, costInfo)
233238
await reload(billingSource, authInfo, costInfo)
234239
json.cost = calculateOccurredCost(billingSource, costInfo)
@@ -278,6 +283,7 @@ export async function handler(
278283
const usageInfo = providerInfo.normalizeUsage(usage)
279284
const costInfo = calculateCost(modelInfo, usageInfo)
280285
await trialLimiter?.track(usageInfo)
286+
await modelTpmLimiter?.track(providerInfo.id, providerInfo.model, usageInfo)
281287
await trackUsage(sessionId, billingSource, authInfo, modelInfo, providerInfo, usageInfo, costInfo)
282288
await reload(billingSource, authInfo, costInfo)
283289
const cost = calculateOccurredCost(billingSource, costInfo)
@@ -433,12 +439,16 @@ export async function handler(
433439
trialProviders: string[] | undefined,
434440
retry: RetryOptions,
435441
stickyProvider: string | undefined,
442+
modelTpmLimits: Record<string, number> | undefined,
436443
) {
437444
const modelProvider = (() => {
445+
// Byok is top priority b/c if user set their own API key, we should use it
446+
// instead of using the sticky provider for the same session
438447
if (authInfo?.provider?.credentials) {
439448
return modelInfo.providers.find((provider) => provider.id === modelInfo.byokProvider)
440449
}
441450

451+
// Always use the same provider for the same session
442452
if (stickyProvider) {
443453
const provider = modelInfo.providers.find((provider) => provider.id === stickyProvider)
444454
if (provider) return provider
@@ -451,10 +461,20 @@ export async function handler(
451461
}
452462

453463
if (retry.retryCount !== MAX_FAILOVER_RETRIES) {
454-
const providers = modelInfo.providers
464+
const allProviders = modelInfo.providers
455465
.filter((provider) => !provider.disabled)
466+
.filter((provider) => provider.weight !== 0)
456467
.filter((provider) => !retry.excludeProviders.includes(provider.id))
457-
.flatMap((provider) => Array<typeof provider>(provider.weight ?? 1).fill(provider))
468+
.filter((provider) => {
469+
if (!provider.tpmLimit) return true
470+
const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0
471+
return usage < provider.tpmLimit * 1_000_000
472+
})
473+
474+
const topPriority = Math.min(...allProviders.map((p) => p.priority))
475+
const providers = allProviders
476+
.filter((p) => p.priority <= topPriority)
477+
.flatMap((provider) => Array<typeof provider>(provider.weight).fill(provider))
458478

459479
// Use the last 4 characters of session ID to select a provider
460480
const identifier = sessionId.length ? sessionId : ip
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { and, Database, eq, inArray, sql } from "@opencode-ai/console-core/drizzle/index.js"
2+
import { ModelRateLimitTable } from "@opencode-ai/console-core/schema/ip.sql.js"
3+
import { UsageInfo } from "./provider/provider"
4+
5+
export function createModelTpmLimiter(providers: { id: string; model: string; tpmLimit?: number }[]) {
6+
const keys = providers.filter((p) => p.tpmLimit).map((p) => `${p.id}/${p.model}`)
7+
if (keys.length === 0) return
8+
9+
const yyyyMMddHHmm = new Date(Date.now())
10+
.toISOString()
11+
.replace(/[^0-9]/g, "")
12+
.substring(0, 12)
13+
14+
return {
15+
check: async () => {
16+
const data = await Database.use((tx) =>
17+
tx
18+
.select()
19+
.from(ModelRateLimitTable)
20+
.where(and(inArray(ModelRateLimitTable.key, keys), eq(ModelRateLimitTable.interval, yyyyMMddHHmm))),
21+
)
22+
23+
// convert to map of model to count
24+
return data.reduce(
25+
(acc, curr) => {
26+
acc[curr.key] = curr.count
27+
return acc
28+
},
29+
{} as Record<string, number>,
30+
)
31+
},
32+
track: async (id: string, model: string, usageInfo: UsageInfo) => {
33+
const usage =
34+
usageInfo.inputTokens +
35+
usageInfo.outputTokens +
36+
(usageInfo.reasoningTokens ?? 0) +
37+
(usageInfo.cacheReadTokens ?? 0) +
38+
(usageInfo.cacheWrite5mTokens ?? 0) +
39+
(usageInfo.cacheWrite1hTokens ?? 0)
40+
if (usage <= 0) return
41+
await Database.use((tx) =>
42+
tx
43+
.insert(ModelRateLimitTable)
44+
.values({ key: `${id}/${model}`, interval: yyyyMMddHHmm, count: usage })
45+
.onDuplicateKeyUpdate({ set: { count: sql`${ModelRateLimitTable.count} + ${usage}` } }),
46+
)
47+
},
48+
}
49+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
CREATE TABLE `model_rate_limit` (
2+
`key` varchar(255) NOT NULL,
3+
`interval` varchar(40) NOT NULL,
4+
`count` int NOT NULL,
5+
CONSTRAINT PRIMARY KEY(`key`,`interval`)
6+
);

0 commit comments

Comments
 (0)