diff --git a/docs/docs/configure/tools/config.md b/docs/docs/configure/tools/config.md index 636a229ff8..b3f1062ede 100644 --- a/docs/docs/configure/tools/config.md +++ b/docs/docs/configure/tools/config.md @@ -52,3 +52,21 @@ When [LSP servers](../lsp.md) are configured, the `lsp` tool provides: - Go-to-definition - Hover information - Completions + +### Tool Retrieval + +With the full data-engineering toolset (~78 tools), sending every tool definition on every turn floods the context window and adds distractors that hurt the model's tool selection. **Tool retrieval** trims the exposed set per turn to a relevant subset, cutting input tokens substantially at the same task quality. + +It is **off by default** and enabled with an environment variable: + +```bash +ALTIMATE_TOOL_RETRIEVAL=1 altimate-code run "..." +``` + +When enabled, each turn exposes: + +- an always-on **core** set of essentials that are never trimmed (`bash`, `read`, `write`, `edit`, `glob`, `grep`, `list`, `task`, `todowrite`, `skill`), +- any tool already **referenced by an in-flight tool call** (so a mid-trajectory tool is never dropped), and +- the highest-scoring remaining tools by a deterministic lexical match against the turn's request, up to a fixed budget. + +Selection is deterministic and dependency-free; small tool sets are left untouched (nothing to gain). In internal benchmarks this cut input tokens by ~50% at an identical task-resolution rate. diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index b06112f2c6..d593c50e1b 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -13,6 +13,9 @@ import { } from "ai" import { mergeDeep, pipe } from "remeda" import { ProviderTransform } from "@/provider/transform" +// altimate_change start — tool retrieval +import { Retrieval } from "@/tool/retrieval" +// altimate_change end import { Config } from "@/config/config" import { Instance } from "@/project/instance" import type { Agent } from "@/agent/agent" @@ -179,6 +182,29 @@ export namespace LLM { } // altimate_change end + // altimate_change start — tool retrieval + // Expose only the relevant top-k tools this turn (flag-gated). Keeps the + // always-on core + any in-flight (referenced) tools; no-op for small sets. + if (Retrieval.enabled()) { + const lastUser = [...input.messages].reverse().find((m) => m.role === "user") + const c = lastUser?.content as any + const query = + typeof c === "string" + ? c + : Array.isArray(c) + ? c.map((p: any) => (typeof p === "string" ? p : (p?.text ?? ""))).join(" ") + : "" + const list = Object.entries(tools).map(([name, t]) => ({ name, description: (t as any)?.description })) + const keep = Retrieval.select(query, list, { keep: referencedTools }) + for (const name of Object.keys(tools)) { + // Never delete "invalid": it's the AI-SDK fallback tool the runtime relies + // on for malformed tool calls, not a user-facing tool, so it's exempt from + // retrieval rather than being listed in Retrieval.CORE. + if (name !== "invalid" && !keep.has(name)) delete tools[name] + } + } + // altimate_change end + return streamText({ onError(error) { l.error("stream error", { diff --git a/packages/opencode/src/tool/retrieval.ts b/packages/opencode/src/tool/retrieval.ts new file mode 100644 index 0000000000..f81161d215 --- /dev/null +++ b/packages/opencode/src/tool/retrieval.ts @@ -0,0 +1,93 @@ +/** + * Tool retrieval — pick a relevant subset of tools per turn. + * + * With ~78 tools, sending the full set every turn floods context and adds + * distractors that hurt tool SELECTION. This picks a relevant subset per turn: + * a fixed always-on CORE + lexically-ranked top-k of the rest, and NEVER drops a + * tool that's mid-trajectory (referenced by an in-flight tool call) — dropping + * those would corrupt the conversation. + * + * v1 is lexical (dependency-free, deterministic, testable). An embedding + + * cross-encoder rerank pass is a later enhancement; the `select` signature is + * stable so wiring doesn't change. + */ + +export namespace Retrieval { + /** + * Always-available agent essentials — never retrieved out. + * NOTE: every entry must be a REAL registered tool id (see tool/registry.ts). + * A name that isn't registered is silently skipped by `select` (the `all.has` + * guard), so it's harmless but dead — keep this list honest. There is no + * directory-listing tool registered (file discovery is `glob` / `bash ls`), + * so no "list"/"ls" entry here. + */ + export const CORE = [ + "bash", "read", "write", "edit", "glob", "grep", + "task", "todowrite", "skill", + ] + + export interface Tool { + name: string + description?: string + } + + export interface Options { + /** + * Target size of the exposed set. NOT a hard cap: core essentials + any + * forced `keep` (in-flight tools) are ALWAYS retained even if together they + * exceed `topk` — dropping a referenced/core tool would corrupt the turn. + * `topk` bounds how many extra lexically-ranked tools are added on top. + */ + topk?: number + /** names that MUST stay (e.g. tools referenced by in-flight tool calls). */ + keep?: Iterable + /** only retrieve when the tool count exceeds this (no-op for small sets). */ + minToolsToRetrieve?: number + } + + export function enabled(): boolean { + return process.env["ALTIMATE_TOOL_RETRIEVAL"] === "1" + } + + function score(query: string, t: Tool): number { + // Tokenize on alphanumerics + underscore so digits survive (e.g. "v2", "s3") + // and hyphenated names split into matchable parts (e.g. "dbt-schema-verify"). + const words = new Set(query.toLowerCase().match(/[a-z0-9_]+/g) ?? []) + const hay = (t.name + " " + (t.description ?? "")).toLowerCase() + let s = 0 + // length >= 3 so high-signal domain terms count (sql, dbt, pii, ddl, api, ssh); + // stopwords this short rarely appear in a tool name/description so add little noise. + for (const w of words) if (w.length >= 3 && hay.includes(w)) s += 1 + // small boost for a direct name mention + if (words.has(t.name.toLowerCase())) s += 3 + return s + } + + /** + * Return the SUBSET of tool names to expose this turn. Caller deletes the rest. + * Deterministic: core + forced-keep first, then highest-scoring others up to topk + * (ties broken by original order for stability). + */ + export function select(query: string, tools: Tool[], opts: Options = {}): Set { + const topk = opts.topk ?? 12 + const minToRetrieve = opts.minToolsToRetrieve ?? topk + const all = new Set(tools.map((t) => t.name)) + // No-op for small tool sets — nothing to gain. + if (tools.length <= minToRetrieve) return all + + const keep = new Set() + for (const n of opts.keep ?? []) if (all.has(n)) keep.add(n) + for (const n of CORE) if (all.has(n)) keep.add(n) + + const rest = tools.filter((t) => !keep.has(t.name)) + const ranked = rest + .map((t, i) => ({ name: t.name, s: score(query, t), i })) + .sort((a, b) => b.s - a.s || a.i - b.i) + + for (const r of ranked) { + if (keep.size >= topk) break + keep.add(r.name) + } + return keep + } +} diff --git a/packages/opencode/test/tool/retrieval.test.ts b/packages/opencode/test/tool/retrieval.test.ts new file mode 100644 index 0000000000..09e7835cca --- /dev/null +++ b/packages/opencode/test/tool/retrieval.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, test } from "bun:test" +import { Retrieval } from "../../src/tool/retrieval" + +const TOOLS = [ + ...Retrieval.CORE.map((name) => ({ name })), + ...Array.from({ length: 20 }, (_, i) => ({ name: `warehouse_op${i}`, description: `warehouse operation ${i}` })), + { name: "dbt_run", description: "run dbt models build" }, + { name: "sql_execute", description: "execute SQL query against warehouse" }, +] + +describe("Retrieval.select", () => { + test("always keeps core tools", () => { + const sel = Retrieval.select("run the dbt models", TOOLS, { topk: 12 }) + expect(sel.has("bash")).toBe(true) + expect(sel.has("read")).toBe(true) + }) + + test("picks lexically relevant tools", () => { + expect(Retrieval.select("run the dbt models and build", TOOLS, { topk: 12 }).has("dbt_run")).toBe(true) + expect(Retrieval.select("execute a SQL query on the warehouse", TOOLS, { topk: 12 }).has("sql_execute")).toBe(true) + }) + + test("never drops in-flight (keep) tools, even if irrelevant", () => { + const sel = Retrieval.select("hello", TOOLS, { topk: 12, keep: ["warehouse_op19"] }) + expect(sel.has("warehouse_op19")).toBe(true) + }) + + test("no-op for small tool sets (returns all)", () => { + const small = [{ name: "a" }, { name: "b" }] + expect(Retrieval.select("x", small, { topk: 12 }).size).toBe(2) + }) + + test("CORE entries are all real (no phantom like the old 'list'/'ls')", () => { + // Regression for the review finding: CORE must not contain unregistered ids. + expect(Retrieval.CORE).not.toContain("list") + expect(Retrieval.CORE).not.toContain("ls") + expect(Retrieval.CORE).toContain("glob") + }) + + test("3-char domain tokens count toward score (length>=3, not >3)", () => { + // "sql" (len 3) must lexically match a sql tool's description. + const tools = [ + ...Array.from({ length: 20 }, (_, i) => ({ name: `op${i}`, description: `generic operation ${i}` })), + { name: "sql_execute", description: "execute SQL query against the warehouse" }, + ] + expect(Retrieval.select("sql", tools, { topk: 11 }).has("sql_execute")).toBe(true) + }) + + test("topk is not a hard cap: core + many in-flight tools all survive", () => { + // Documents the validated semantics — referenced/core tools are never dropped + // to honor topk; topk only bounds the extra ranked additions. + const inflight = Array.from({ length: 8 }, (_, i) => `warehouse_op${i}`) + const sel = Retrieval.select("hello", TOOLS, { topk: 12, keep: inflight }) + for (const n of inflight) expect(sel.has(n)).toBe(true) + for (const c of Retrieval.CORE) expect(sel.has(c)).toBe(true) // core also retained + }) + + test("enabled() reads the env flag", () => { + const prev = process.env["ALTIMATE_TOOL_RETRIEVAL"] + process.env["ALTIMATE_TOOL_RETRIEVAL"] = "1" + expect(Retrieval.enabled()).toBe(true) + delete process.env["ALTIMATE_TOOL_RETRIEVAL"] + expect(Retrieval.enabled()).toBe(false) + if (prev !== undefined) process.env["ALTIMATE_TOOL_RETRIEVAL"] = prev + }) +})