-
Notifications
You must be signed in to change notification settings - Fork 86
feat: per-turn tool retrieval (trim tool-definition context flood) #858
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
278a3fa
e6b70c6
38f8737
0729fc6
71db185
c770459
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,93 @@ | ||||||||||||||
| /** | ||||||||||||||
| * Tool retrieval — pick a relevant subset of tools per turn. | ||||||||||||||
| * | ||||||||||||||
| * With ~78 tools, sending the full set every turn floods context and adds | ||||||||||||||
| * distractors that hurt tool SELECTION. This picks a relevant subset per turn: | ||||||||||||||
| * a fixed always-on CORE + lexically-ranked top-k of the rest, and NEVER drops a | ||||||||||||||
| * tool that's mid-trajectory (referenced by an in-flight tool call) — dropping | ||||||||||||||
| * those would corrupt the conversation. | ||||||||||||||
| * | ||||||||||||||
| * v1 is lexical (dependency-free, deterministic, testable). An embedding + | ||||||||||||||
| * cross-encoder rerank pass is a later enhancement; the `select` signature is | ||||||||||||||
| * stable so wiring doesn't change. | ||||||||||||||
| */ | ||||||||||||||
|
|
||||||||||||||
| export namespace Retrieval { | ||||||||||||||
| /** | ||||||||||||||
| * Always-available agent essentials — never retrieved out. | ||||||||||||||
| * NOTE: every entry must be a REAL registered tool id (see tool/registry.ts). | ||||||||||||||
| * A name that isn't registered is silently skipped by `select` (the `all.has` | ||||||||||||||
| * guard), so it's harmless but dead — keep this list honest. There is no | ||||||||||||||
| * directory-listing tool registered (file discovery is `glob` / `bash ls`), | ||||||||||||||
| * so no "list"/"ls" entry here. | ||||||||||||||
| */ | ||||||||||||||
| export const CORE = [ | ||||||||||||||
| "bash", "read", "write", "edit", "glob", "grep", | ||||||||||||||
| "task", "todowrite", "skill", | ||||||||||||||
| ] | ||||||||||||||
|
coderabbitai[bot] marked this conversation as resolved.
|
||||||||||||||
|
|
||||||||||||||
| export interface Tool { | ||||||||||||||
| name: string | ||||||||||||||
| description?: string | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| export interface Options { | ||||||||||||||
| /** | ||||||||||||||
| * Target size of the exposed set. NOT a hard cap: core essentials + any | ||||||||||||||
| * forced `keep` (in-flight tools) are ALWAYS retained even if together they | ||||||||||||||
| * exceed `topk` — dropping a referenced/core tool would corrupt the turn. | ||||||||||||||
| * `topk` bounds how many extra lexically-ranked tools are added on top. | ||||||||||||||
| */ | ||||||||||||||
| topk?: number | ||||||||||||||
| /** names that MUST stay (e.g. tools referenced by in-flight tool calls). */ | ||||||||||||||
| keep?: Iterable<string> | ||||||||||||||
| /** only retrieve when the tool count exceeds this (no-op for small sets). */ | ||||||||||||||
| minToolsToRetrieve?: number | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| export function enabled(): boolean { | ||||||||||||||
| return process.env["ALTIMATE_TOOL_RETRIEVAL"] === "1" | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| function score(query: string, t: Tool): number { | ||||||||||||||
| // Tokenize on alphanumerics + underscore so digits survive (e.g. "v2", "s3") | ||||||||||||||
| // and hyphenated names split into matchable parts (e.g. "dbt-schema-verify"). | ||||||||||||||
| const words = new Set(query.toLowerCase().match(/[a-z0-9_]+/g) ?? []) | ||||||||||||||
| const hay = (t.name + " " + (t.description ?? "")).toLowerCase() | ||||||||||||||
|
Comment on lines
+52
to
+56
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [🟠 MEDIUM] The regular expression Consider including numbers and hyphens in the regular expression. Suggested change:
Suggested change
|
||||||||||||||
| let s = 0 | ||||||||||||||
| // length >= 3 so high-signal domain terms count (sql, dbt, pii, ddl, api, ssh); | ||||||||||||||
| // stopwords this short rarely appear in a tool name/description so add little noise. | ||||||||||||||
| for (const w of words) if (w.length >= 3 && hay.includes(w)) s += 1 | ||||||||||||||
| // small boost for a direct name mention | ||||||||||||||
| if (words.has(t.name.toLowerCase())) s += 3 | ||||||||||||||
| return s | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| /** | ||||||||||||||
| * Return the SUBSET of tool names to expose this turn. Caller deletes the rest. | ||||||||||||||
| * Deterministic: core + forced-keep first, then highest-scoring others up to topk | ||||||||||||||
| * (ties broken by original order for stability). | ||||||||||||||
| */ | ||||||||||||||
| export function select(query: string, tools: Tool[], opts: Options = {}): Set<string> { | ||||||||||||||
| const topk = opts.topk ?? 12 | ||||||||||||||
| const minToRetrieve = opts.minToolsToRetrieve ?? topk | ||||||||||||||
|
Comment on lines
+71
to
+73
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [🟠 MEDIUM] The default Consider increasing the default Suggested change:
Suggested change
|
||||||||||||||
| const all = new Set(tools.map((t) => t.name)) | ||||||||||||||
| // No-op for small tool sets — nothing to gain. | ||||||||||||||
| if (tools.length <= minToRetrieve) return all | ||||||||||||||
|
|
||||||||||||||
| const keep = new Set<string>() | ||||||||||||||
| for (const n of opts.keep ?? []) if (all.has(n)) keep.add(n) | ||||||||||||||
| for (const n of CORE) if (all.has(n)) keep.add(n) | ||||||||||||||
|
|
||||||||||||||
| const rest = tools.filter((t) => !keep.has(t.name)) | ||||||||||||||
| const ranked = rest | ||||||||||||||
| .map((t, i) => ({ name: t.name, s: score(query, t), i })) | ||||||||||||||
| .sort((a, b) => b.s - a.s || a.i - b.i) | ||||||||||||||
|
|
||||||||||||||
| for (const r of ranked) { | ||||||||||||||
| if (keep.size >= topk) break | ||||||||||||||
| keep.add(r.name) | ||||||||||||||
| } | ||||||||||||||
| return keep | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| import { describe, expect, test } from "bun:test" | ||
| import { Retrieval } from "../../src/tool/retrieval" | ||
|
|
||
| const TOOLS = [ | ||
| ...Retrieval.CORE.map((name) => ({ name })), | ||
| ...Array.from({ length: 20 }, (_, i) => ({ name: `warehouse_op${i}`, description: `warehouse operation ${i}` })), | ||
| { name: "dbt_run", description: "run dbt models build" }, | ||
| { name: "sql_execute", description: "execute SQL query against warehouse" }, | ||
| ] | ||
|
|
||
| describe("Retrieval.select", () => { | ||
| test("always keeps core tools", () => { | ||
| const sel = Retrieval.select("run the dbt models", TOOLS, { topk: 12 }) | ||
| expect(sel.has("bash")).toBe(true) | ||
| expect(sel.has("read")).toBe(true) | ||
| }) | ||
|
|
||
| test("picks lexically relevant tools", () => { | ||
| expect(Retrieval.select("run the dbt models and build", TOOLS, { topk: 12 }).has("dbt_run")).toBe(true) | ||
| expect(Retrieval.select("execute a SQL query on the warehouse", TOOLS, { topk: 12 }).has("sql_execute")).toBe(true) | ||
| }) | ||
|
|
||
| test("never drops in-flight (keep) tools, even if irrelevant", () => { | ||
| const sel = Retrieval.select("hello", TOOLS, { topk: 12, keep: ["warehouse_op19"] }) | ||
| expect(sel.has("warehouse_op19")).toBe(true) | ||
| }) | ||
|
|
||
| test("no-op for small tool sets (returns all)", () => { | ||
| const small = [{ name: "a" }, { name: "b" }] | ||
| expect(Retrieval.select("x", small, { topk: 12 }).size).toBe(2) | ||
| }) | ||
|
|
||
| test("CORE entries are all real (no phantom like the old 'list'/'ls')", () => { | ||
| // Regression for the review finding: CORE must not contain unregistered ids. | ||
| expect(Retrieval.CORE).not.toContain("list") | ||
| expect(Retrieval.CORE).not.toContain("ls") | ||
| expect(Retrieval.CORE).toContain("glob") | ||
| }) | ||
|
|
||
| test("3-char domain tokens count toward score (length>=3, not >3)", () => { | ||
| // "sql" (len 3) must lexically match a sql tool's description. | ||
| const tools = [ | ||
| ...Array.from({ length: 20 }, (_, i) => ({ name: `op${i}`, description: `generic operation ${i}` })), | ||
| { name: "sql_execute", description: "execute SQL query against the warehouse" }, | ||
| ] | ||
| expect(Retrieval.select("sql", tools, { topk: 11 }).has("sql_execute")).toBe(true) | ||
| }) | ||
|
|
||
| test("topk is not a hard cap: core + many in-flight tools all survive", () => { | ||
| // Documents the validated semantics — referenced/core tools are never dropped | ||
| // to honor topk; topk only bounds the extra ranked additions. | ||
| const inflight = Array.from({ length: 8 }, (_, i) => `warehouse_op${i}`) | ||
| const sel = Retrieval.select("hello", TOOLS, { topk: 12, keep: inflight }) | ||
| for (const n of inflight) expect(sel.has(n)).toBe(true) | ||
| for (const c of Retrieval.CORE) expect(sel.has(c)).toBe(true) // core also retained | ||
| }) | ||
|
|
||
| test("enabled() reads the env flag", () => { | ||
| const prev = process.env["ALTIMATE_TOOL_RETRIEVAL"] | ||
| process.env["ALTIMATE_TOOL_RETRIEVAL"] = "1" | ||
| expect(Retrieval.enabled()).toBe(true) | ||
| delete process.env["ALTIMATE_TOOL_RETRIEVAL"] | ||
| expect(Retrieval.enabled()).toBe(false) | ||
| if (prev !== undefined) process.env["ALTIMATE_TOOL_RETRIEVAL"] = prev | ||
| }) | ||
| }) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[🔵 LOW] According to the code quality checklist:
queryassignment is prohibited. Please refactor this to useif-elsestatements.anytype (c as any,p: any,t as any). If they are strictly necessary due to third-party SDK typing issues, please provide comments explaining the reason.Suggested change: