Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions crates/rag-rat-cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,13 +285,16 @@ pub(crate) enum OracleToolArg {
RustAnalyzer,
#[value(name = "scip-clang")]
ScipClang,
#[value(name = "scip-python")]
ScipPython,
}

impl OracleToolArg {
pub(crate) fn core(self) -> rag_rat_core::index::oracle::OracleTool {
match self {
OracleToolArg::RustAnalyzer => rag_rat_core::index::oracle::OracleTool::RustAnalyzer,
OracleToolArg::ScipClang => rag_rat_core::index::oracle::OracleTool::ScipClang,
OracleToolArg::ScipPython => rag_rat_core::index::oracle::OracleTool::ScipPython,
}
}
}
Expand Down
13 changes: 13 additions & 0 deletions crates/rag-rat-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,20 @@ fn spawn_detached_oracle_auto_run(config: &rag_rat_core::Config) {
None => return Ok(()),
}
};
// The languages this checkout actually indexes. Gating background runs to these (#176)
// stops the auto-run loop from invoking a backend whose language isn't present —
// e.g. scip-python installed but no Python target: it would index nothing, fail,
// the error would be swallowed with no `oracle_runs` row recorded, and the loop
// would retry the doomed run every poll.
let configured_languages: std::collections::HashSet<&str> =
config.targets.iter().map(|target| target.language.as_str()).collect();
for &tool in OracleTool::ALL {
// Skip a backend whose language this checkout doesn't index — never auto-run it here
// (the status registry stays broad; only background runs are gated).
let manifest = oracle::ToolManifest::for_tool(tool);
if !manifest.languages.iter().any(|lang| configured_languages.contains(lang)) {
continue;
}
// Cheap probe before any decision: an uninstalled tool can never run, so don't even
// read its run history.
if matches!(oracle::probe_oracle_tool(tool), oracle::ToolAvailability::Blocked { .. }) {
Expand Down
78 changes: 75 additions & 3 deletions crates/rag-rat-core/src/index/oracle/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@ impl ToolManifest {
scripts/clang-tools/gen_compile_commands.py), or pass a pre-built \
index with `--scip <path>`.",
},
OracleTool::ScipPython => ToolManifest {
tool,
program: "scip-python",
languages: &["python"],
install_hint: "scip-python not found on PATH. Install it (e.g. `npm install -g \
@sourcegraph/scip-python`) AND install the project's dependencies \
(e.g. into a virtualenv) so imports resolve, or pass a pre-built \
index with `--scip <path>`.",
},
}
}

Expand Down Expand Up @@ -109,6 +118,13 @@ impl ToolManifest {
.output()
.is_ok_and(|output| output.status.success()),
OracleTool::ScipClang => true,
// scip-python emits via an `index` subcommand; `index --help` exiting 0 is the analog
// of rust-analyzer's `scip --help` capability check.
OracleTool::ScipPython => Command::new(self.program)
.arg("index")
.arg("--help")
.output()
.is_ok_and(|output| output.status.success()),
}
}

Expand All @@ -119,7 +135,11 @@ impl ToolManifest {
/// pre-built `--scip` path never reaches here.
pub fn prerequisite_blocked(&self, root: &Path) -> Option<String> {
match self.tool {
OracleTool::RustAnalyzer => None,
// scip-python's "deps must be installed" prerequisite has no single sentinel file to
// check (it's whatever the corpus `prepare` venv installs); a failed environment shows
// up as a near-zero moniker count the report health gate catches, so there's nothing to
// block on here.
OracleTool::RustAnalyzer | OracleTool::ScipPython => None,
OracleTool::ScipClang => (!root.join("compile_commands.json").exists()).then(|| {
format!(
"scip-clang requires a compile_commands.json at {} — generate one (e.g. `bear \
Expand Down Expand Up @@ -152,6 +172,32 @@ impl ToolManifest {
.arg(format!("--index-output-path={}", output.display()));
cmd
},
// scip-python indexes a working directory (not a source root arg) via its `index`
// subcommand. `--cwd <root>` is where it resolves the project + its installed deps;
// `--project-name` (the root's dir name) becomes the package component of in-corpus
// monikers, so a non-empty name is what lets `count_symbols_with_moniker` see them.
// `--project-version _` is PINNED (Codex on #176): scip-python otherwise defaults the
// version to the checkout's git revision, which is embedded in every SCIP symbol
// string, so every commit would churn all Python monikers — breaking
// moniker-anchored memory relocation (which resolves by exact moniker per
// tool). A constant version keeps a symbol's moniker stable across commits
// (and sidesteps scip-python's crash on a non-git checkout, where the
// git-rev default is undefined). `--output` is absolute, so it's unaffected
// by `--cwd`.
OracleTool::ScipPython => {
let project_name = root.file_name().and_then(|n| n.to_str()).unwrap_or("project");
let mut cmd = Command::new(self.program);
cmd.arg("index")
.arg("--project-name")
.arg(project_name)
Comment on lines +190 to +192

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Pin scip-python project version

This invocation never passes --project-version, so scip-python falls back to the checkout's git revision (the new docs note this behavior). Because rag-rat stores the full SCIP symbol string in logical_symbol_monikers and later resolves memories by exact moniker for the same tool, every commit changes all Python monikers even when symbols did not move, breaking the advertised stable memory relocation for scip-python runs. Pass a stable project version (for example the conventional _) when invoking the indexer.

Useful? React with 👍 / 👎.

.arg("--project-version")
.arg("_")
.arg("--cwd")
.arg(root)
.arg("--output")
.arg(output);
cmd
},
}
}
}
Expand All @@ -177,8 +223,8 @@ mod tests {
#[test]
fn every_tool_has_a_manifest_entry() {
// Exhaustive over the OracleTool registry: each variant must resolve to a manifest with a
// non-empty program + hint, so `oracle run`/`status` can always describe it. (One variant
// today; the `match` is the exhaustiveness guard a new variant trips.)
// non-empty program + hint, so `oracle run`/`status` can always describe it. (The `match`
// in `for_tool` is the exhaustiveness guard a new variant trips.)
for &tool in OracleTool::ALL {
let manifest = ToolManifest::for_tool(tool);
assert_eq!(manifest.tool, tool);
Expand Down Expand Up @@ -270,4 +316,30 @@ mod tests {
assert_eq!(cmd.get_program().to_string_lossy(), "rust-analyzer");
assert_eq!(args, vec!["scip", "/repo", "--output", "/tmp/out.scip"]);
}

#[test]
fn scip_python_indexes_a_cwd_with_a_project_name() {
// scip-python's invocation: `scip-python index --project-name <root-basename> --cwd <root>
// --output <abs>`. The project name (the root's dir name) is what gives in-corpus symbols
// a non-empty moniker package, and `--cwd` is where it resolves the installed deps. No
// compile_commands.json prerequisite (the venv install is the corpus `prepare` step's job).
let manifest = ToolManifest::for_tool(OracleTool::ScipPython);
assert_eq!(manifest.program, "scip-python");
assert_eq!(manifest.languages, &["python"]);
let cmd = manifest.scip_command(Path::new("/work/requests"), Path::new("/tmp/out.scip"));
let args: Vec<_> = cmd.get_args().map(|a| a.to_string_lossy().into_owned()).collect();
assert_eq!(args, vec![
"index",
"--project-name",
"requests",
// Pinned constant version (Codex #176): keeps monikers stable across commits.
"--project-version",
"_",
"--cwd",
"/work/requests",
"--output",
"/tmp/out.scip",
]);
assert!(manifest.prerequisite_blocked(Path::new("/no/such/repo/xyzzy")).is_none());
}
}
29 changes: 28 additions & 1 deletion crates/rag-rat-core/src/index/oracle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,27 @@ pub fn latest_run_tool_version(
store::latest_run_tool_version(conn, tool, commit_sha, worktree_id)
}

/// Every oracle tool that has at least one run in this checkout, paired with its latest
/// `tool_version`. The multi-language surfacing seam (#176): the graph read paths
/// (`enrich_hops_with_oracle`, `compare_graph_to_scip`) iterate THIS rather than hardcoding
/// `RustAnalyzer`, so a repo indexed in several languages surfaces each backend's verdicts on its
/// own edges. An edge belongs to one language, so at most one tool ever has a verdict for it — the
/// per-tool verdict sets are disjoint and merge cleanly. Tools with no run in scope are skipped.
pub fn latest_runs_in_scope(
conn: &Connection,
commit_sha: &str,
worktree_id: &str,
) -> anyhow::Result<Vec<(OracleTool, String)>> {
let mut runs = Vec::new();
for &tool in OracleTool::ALL {
if let Some(version) = store::latest_run_tool_version(conn, tool, commit_sha, worktree_id)?
{
runs.push((tool, version));
}
}
Ok(runs)
}

/// The `started_at` (Unix-epoch ms) of the most recent run for `tool` in the active checkout, or
/// `None` when no run exists — the staleness clock the background auto-fresh oracle compares
/// against the index's `indexed_at_ms`. See [`auto_run_decision`].
Expand Down Expand Up @@ -499,24 +520,30 @@ pub enum OracleTool {
/// database rather than a source root, and is the SCIP emitter directly (no `scip`
/// subcommand), so its probe + invocation differ from rust-analyzer's — see `ToolManifest`.
ScipClang,
/// `scip-python index` — Python (#164 B6). Resolves imports against the project's INSTALLED
/// dependencies, so the corpus must install them (a venv) first; an unresolved environment
/// shows up as a near-zero moniker count the report's health gate catches.
ScipPython,

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Route scip-python through graph surfacing

When users run rag-rat oracle run --tool scip-python on a Python repo, the verdicts are persisted under the new scip-python tool, but the graph read paths still only query OracleTool::RustAnalyzer in enrich_hops_with_oracle and compare_graph_to_scip (crates/rag-rat-core/src/index/query_api/graph.rs). That means the new backend can complete and show up in status while trace_callees/find_callers/SCIP comparisons never see its rows, so Python edges do not get the advertised compiler tier. Please route surfacing/comparison through the tool registry or select the oracle tool for the edge language before exposing this variant.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Count Python constructor references in recall

Enabling ScipPython sends Python runs through the existing recall-gap code, but count_uncovered_calls only considers SCIP symbols ending in ). callable via scip::symbol_is_callable. scip-python represents class constructor call references such as ClientSession() as references to the class/type symbol (...ClientSession#), while our Python extractor explicitly emits these as CallsName, so constructor calls that the heuristic misses never increment oracle_only_calls; this inflates Python recall and can make the new py-requests oracle report look healthier than it is. Please make the callable filter language/tool-aware before treating scip-python reports as compiler-grade metrics.

Useful? React with 👍 / 👎.

}

impl OracleTool {
/// Every known oracle tool, for "report on all tools" surfaces (`oracle status` with no
/// `--tool`). Later language backends (#72 Kotlin) extend this alongside the enum.
pub const ALL: &[OracleTool] = &[Self::RustAnalyzer, Self::ScipClang];
pub const ALL: &[OracleTool] = &[Self::RustAnalyzer, Self::ScipClang, Self::ScipPython];

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Filter auto-run tools by configured language

Adding ScipPython to OracleTool::ALL also feeds the MCP background auto-run loop, which iterates OracleTool::ALL without checking config.targets. In an auto-run repo where scip-python is installed but Python is not indexed, the server will still invoke the Python indexer against the checkout; if it fails, the error is swallowed and no oracle_runs row is recorded, so the loop retries on every poll. Keep the status registry broad, but gate background runs to tools whose languages are present in the config.

Useful? React with 👍 / 👎.


pub fn as_db_str(self) -> &'static str {
match self {
Self::RustAnalyzer => "rust-analyzer",
Self::ScipClang => "scip-clang",
Self::ScipPython => "scip-python",
}
}

pub fn from_db_str(value: &str) -> Option<Self> {
match value {
"rust-analyzer" => Some(Self::RustAnalyzer),
"scip-clang" => Some(Self::ScipClang),
"scip-python" => Some(Self::ScipPython),
_ => None,
}
}
Expand Down
17 changes: 14 additions & 3 deletions crates/rag-rat-core/src/index/oracle/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,23 @@ pub(crate) fn run(conn: &Connection, input: &OracleRunInput<'_>) -> anyhow::Resu
// join selected (reference-preferred, full containment), not a re-derived start-only match
// — on overlapping occurrences the two could pick different occurrences. Every
// verdict marks `matched_occurrences` (so it's never a recall gap); only a CALL
// (`calls_name`) edge also marks `covered_call_occurrences` (the recall numerator
// population — finding 1).
// (`calls_name`) edge whose matched SCIP symbol is itself callable also marks
// `covered_call_occurrences` (the recall numerator population — finding 1).
//
// The `symbol_is_callable` filter keeps the covered side over the SAME population as the
// uncovered side (`count_uncovered_calls`), which already requires it (#176 review):
// without it, a constructor call our extractor emits as `calls_name` but SCIP
// represents as a reference to the CLASS symbol (`…Foo#`, not `…).`) — e.g.
// scip-python's `Foo()` — counted as covered while a MISSED such call could never
// be counted as oracle-only, inflating recall. Requiring callability on both sides
// confines recall to method/function calls (the `).` kind) uniformly;
// constructor-via-class-symbol refs are out of the recall population because SCIP
// can't distinguish a constructor *call* from a type *annotation* of the same
// `Foo#` symbol.
let (occ_start, occ_end) = verdict.matched_occurrence;
let key = (candidate.source_path.clone(), occ_start, occ_end);
matched_occurrences.insert(key.clone());
if candidate.edge_kind == CALL_EDGE_KIND {
if candidate.edge_kind == CALL_EDGE_KIND && scip::symbol_is_callable(&verdict.scip_symbol) {
covered_call_occurrences.insert(key);
}

Expand Down
57 changes: 57 additions & 0 deletions crates/rag-rat-core/src/index/oracle/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2228,6 +2228,63 @@ fn covered_side_ignores_references_type_confirmation() {
assert_eq!(m.covered_calls, 1);
}

/// #176 (covered side): the covered side requires the matched SCIP symbol be CALLABLE (`).`) — the
/// same filter `count_uncovered_calls` applies. A `calls_name` edge a verdict matched to a CLASS
/// symbol (`…Thing#`, e.g. scip-python's `Thing()` constructor, which our extractor emits as
/// `CallsName` but SCIP records as a reference to the class) must NOT inflate `covered_calls`.
/// Otherwise the two sides measure different populations and a MISSED constructor — invisible to
/// the callable-filtered uncovered side — would never offset a covered one, inflating recall.
#[test]
fn covered_side_requires_a_callable_scip_symbol() {
let h = Harness::new();
// `caller.rs`: a method call `target` at 14..20 and a constructor call `Thing` at 24..29.
let caller = h.add_file("caller.rs", "fn caller() { target(); Thing(); }\n");
let defs = h.add_file("defs.rs", "fn target() {}\nstruct Thing;\n");
let target_sym = h.add_symbol(defs, "target", 3, 9);
let thing_sym = h.add_symbol(defs, "Thing", 22, 27);
// BOTH are `calls_name` edges (a constructor call is a `CallsName` in our extractor).
let call_edge = h.add_edge(caller, "target", 14, 20, "Exact", Some(target_sym));
let ctor_edge = h.add_edge(caller, "Thing", 24, 29, "Exact", Some(thing_sym));

let call_sym = "scip-rust crate v1 `target`().";
// Class symbol — ends `#`, NOT `).`: not callable (how scip-python records a constructor ref).
let class_sym = "scip-rust crate v1 `Thing`#";
let mut index = Index {
documents: vec![Document {
relative_path: "caller.rs".to_string(),
occurrences: vec![
occurrence(0, 14, 20, call_sym, SymbolRole::UnspecifiedSymbolRole as i32),
occurrence(0, 24, 29, class_sym, SymbolRole::UnspecifiedSymbolRole as i32),
],
position_encoding: EnumOrUnknown::new(
PositionEncoding::UTF8CodeUnitOffsetFromLineStart,
),
..Default::default()
}],
..Default::default()
};
index.documents.push(Document {
relative_path: "defs.rs".to_string(),
occurrences: vec![
occurrence(0, 3, 9, call_sym, SymbolRole::Definition as i32),
occurrence(1, 7, 12, class_sym, SymbolRole::Definition as i32),
],
position_encoding: EnumOrUnknown::new(PositionEncoding::UTF8CodeUnitOffsetFromLineStart),
..Default::default()
});
let bytes = index.write_to_bytes().unwrap();

let report =
run_oracle(&h.conn, TOOL, VERSION, COMMIT, WORKTREE, &bytes, h.root(), None, None).unwrap();
// Both edges still get verdicts (both join + resolve in-corpus)…
assert!(h.verdict(call_edge).is_some(), "call edge verdicted");
assert!(h.verdict(ctor_edge).is_some(), "constructor edge verdicted");
// …but only the callable-symbol call counts as covered; the class-symbol constructor does not,
// and the uncovered side excludes it too → no phantom recall gap.
assert_eq!(report.covered_calls, 1, "constructor (class symbol) must NOT inflate covered");
assert_eq!(report.oracle_only_calls, 0);
}

/// Finding 2: a candidate whose recorded `file_sha` no longer matches the disk bytes (content drift
/// between the index build and the `.scip`) is SKIPPED — no verdict is emitted from mismatched
/// content — and tallied in `skipped_drifted`. The same edge, with a matching `file_sha`, IS
Expand Down
Loading
Loading