diff --git a/crates/rag-rat-cli/src/cli.rs b/crates/rag-rat-cli/src/cli.rs index 858fa6b..715c137 100644 --- a/crates/rag-rat-cli/src/cli.rs +++ b/crates/rag-rat-cli/src/cli.rs @@ -285,6 +285,8 @@ pub(crate) enum OracleToolArg { RustAnalyzer, #[value(name = "scip-clang")] ScipClang, + #[value(name = "scip-python")] + ScipPython, } impl OracleToolArg { @@ -292,6 +294,7 @@ impl OracleToolArg { match self { OracleToolArg::RustAnalyzer => rag_rat_core::index::oracle::OracleTool::RustAnalyzer, OracleToolArg::ScipClang => rag_rat_core::index::oracle::OracleTool::ScipClang, + OracleToolArg::ScipPython => rag_rat_core::index::oracle::OracleTool::ScipPython, } } } diff --git a/crates/rag-rat-cli/src/main.rs b/crates/rag-rat-cli/src/main.rs index 153f488..5a188a9 100644 --- a/crates/rag-rat-cli/src/main.rs +++ b/crates/rag-rat-cli/src/main.rs @@ -199,7 +199,20 @@ fn spawn_detached_oracle_auto_run(config: &rag_rat_core::Config) { None => return Ok(()), } }; + // The languages this checkout actually indexes. Gating background runs to these (#176) + // stops the auto-run loop from invoking a backend whose language isn't present — + // e.g. scip-python installed but no Python target: it would index nothing, fail, + // the error would be swallowed with no `oracle_runs` row recorded, and the loop + // would retry the doomed run every poll. + let configured_languages: std::collections::HashSet<&str> = + config.targets.iter().map(|target| target.language.as_str()).collect(); for &tool in OracleTool::ALL { + // Skip a backend whose language this checkout doesn't index — never auto-run it here + // (the status registry stays broad; only background runs are gated). + let manifest = oracle::ToolManifest::for_tool(tool); + if !manifest.languages.iter().any(|lang| configured_languages.contains(lang)) { + continue; + } // Cheap probe before any decision: an uninstalled tool can never run, so don't even // read its run history. if matches!(oracle::probe_oracle_tool(tool), oracle::ToolAvailability::Blocked { .. }) { diff --git a/crates/rag-rat-core/src/index/oracle/manifest.rs b/crates/rag-rat-core/src/index/oracle/manifest.rs index bd2678f..ff342f3 100644 --- a/crates/rag-rat-core/src/index/oracle/manifest.rs +++ b/crates/rag-rat-core/src/index/oracle/manifest.rs @@ -68,6 +68,15 @@ impl ToolManifest { scripts/clang-tools/gen_compile_commands.py), or pass a pre-built \ index with `--scip `.", }, + OracleTool::ScipPython => ToolManifest { + tool, + program: "scip-python", + languages: &["python"], + install_hint: "scip-python not found on PATH. Install it (e.g. `npm install -g \ + @sourcegraph/scip-python`) AND install the project's dependencies \ + (e.g. into a virtualenv) so imports resolve, or pass a pre-built \ + index with `--scip `.", + }, } } @@ -109,6 +118,13 @@ impl ToolManifest { .output() .is_ok_and(|output| output.status.success()), OracleTool::ScipClang => true, + // scip-python emits via an `index` subcommand; `index --help` exiting 0 is the analog + // of rust-analyzer's `scip --help` capability check. + OracleTool::ScipPython => Command::new(self.program) + .arg("index") + .arg("--help") + .output() + .is_ok_and(|output| output.status.success()), } } @@ -119,7 +135,11 @@ impl ToolManifest { /// pre-built `--scip` path never reaches here. pub fn prerequisite_blocked(&self, root: &Path) -> Option { match self.tool { - OracleTool::RustAnalyzer => None, + // scip-python's "deps must be installed" prerequisite has no single sentinel file to + // check (it's whatever the corpus `prepare` venv installs); a failed environment shows + // up as a near-zero moniker count the report health gate catches, so there's nothing to + // block on here. + OracleTool::RustAnalyzer | OracleTool::ScipPython => None, OracleTool::ScipClang => (!root.join("compile_commands.json").exists()).then(|| { format!( "scip-clang requires a compile_commands.json at {} — generate one (e.g. `bear \ @@ -152,6 +172,32 @@ impl ToolManifest { .arg(format!("--index-output-path={}", output.display())); cmd }, + // scip-python indexes a working directory (not a source root arg) via its `index` + // subcommand. `--cwd ` is where it resolves the project + its installed deps; + // `--project-name` (the root's dir name) becomes the package component of in-corpus + // monikers, so a non-empty name is what lets `count_symbols_with_moniker` see them. + // `--project-version _` is PINNED (Codex on #176): scip-python otherwise defaults the + // version to the checkout's git revision, which is embedded in every SCIP symbol + // string, so every commit would churn all Python monikers — breaking + // moniker-anchored memory relocation (which resolves by exact moniker per + // tool). A constant version keeps a symbol's moniker stable across commits + // (and sidesteps scip-python's crash on a non-git checkout, where the + // git-rev default is undefined). `--output` is absolute, so it's unaffected + // by `--cwd`. + OracleTool::ScipPython => { + let project_name = root.file_name().and_then(|n| n.to_str()).unwrap_or("project"); + let mut cmd = Command::new(self.program); + cmd.arg("index") + .arg("--project-name") + .arg(project_name) + .arg("--project-version") + .arg("_") + .arg("--cwd") + .arg(root) + .arg("--output") + .arg(output); + cmd + }, } } } @@ -177,8 +223,8 @@ mod tests { #[test] fn every_tool_has_a_manifest_entry() { // Exhaustive over the OracleTool registry: each variant must resolve to a manifest with a - // non-empty program + hint, so `oracle run`/`status` can always describe it. (One variant - // today; the `match` is the exhaustiveness guard a new variant trips.) + // non-empty program + hint, so `oracle run`/`status` can always describe it. (The `match` + // in `for_tool` is the exhaustiveness guard a new variant trips.) for &tool in OracleTool::ALL { let manifest = ToolManifest::for_tool(tool); assert_eq!(manifest.tool, tool); @@ -270,4 +316,30 @@ mod tests { assert_eq!(cmd.get_program().to_string_lossy(), "rust-analyzer"); assert_eq!(args, vec!["scip", "/repo", "--output", "/tmp/out.scip"]); } + + #[test] + fn scip_python_indexes_a_cwd_with_a_project_name() { + // scip-python's invocation: `scip-python index --project-name --cwd + // --output `. The project name (the root's dir name) is what gives in-corpus symbols + // a non-empty moniker package, and `--cwd` is where it resolves the installed deps. No + // compile_commands.json prerequisite (the venv install is the corpus `prepare` step's job). + let manifest = ToolManifest::for_tool(OracleTool::ScipPython); + assert_eq!(manifest.program, "scip-python"); + assert_eq!(manifest.languages, &["python"]); + let cmd = manifest.scip_command(Path::new("/work/requests"), Path::new("/tmp/out.scip")); + let args: Vec<_> = cmd.get_args().map(|a| a.to_string_lossy().into_owned()).collect(); + assert_eq!(args, vec![ + "index", + "--project-name", + "requests", + // Pinned constant version (Codex #176): keeps monikers stable across commits. + "--project-version", + "_", + "--cwd", + "/work/requests", + "--output", + "/tmp/out.scip", + ]); + assert!(manifest.prerequisite_blocked(Path::new("/no/such/repo/xyzzy")).is_none()); + } } diff --git a/crates/rag-rat-core/src/index/oracle/mod.rs b/crates/rag-rat-core/src/index/oracle/mod.rs index 27c74f7..34fabb4 100644 --- a/crates/rag-rat-core/src/index/oracle/mod.rs +++ b/crates/rag-rat-core/src/index/oracle/mod.rs @@ -463,6 +463,27 @@ pub fn latest_run_tool_version( store::latest_run_tool_version(conn, tool, commit_sha, worktree_id) } +/// Every oracle tool that has at least one run in this checkout, paired with its latest +/// `tool_version`. The multi-language surfacing seam (#176): the graph read paths +/// (`enrich_hops_with_oracle`, `compare_graph_to_scip`) iterate THIS rather than hardcoding +/// `RustAnalyzer`, so a repo indexed in several languages surfaces each backend's verdicts on its +/// own edges. An edge belongs to one language, so at most one tool ever has a verdict for it — the +/// per-tool verdict sets are disjoint and merge cleanly. Tools with no run in scope are skipped. +pub fn latest_runs_in_scope( + conn: &Connection, + commit_sha: &str, + worktree_id: &str, +) -> anyhow::Result> { + let mut runs = Vec::new(); + for &tool in OracleTool::ALL { + if let Some(version) = store::latest_run_tool_version(conn, tool, commit_sha, worktree_id)? + { + runs.push((tool, version)); + } + } + Ok(runs) +} + /// The `started_at` (Unix-epoch ms) of the most recent run for `tool` in the active checkout, or /// `None` when no run exists — the staleness clock the background auto-fresh oracle compares /// against the index's `indexed_at_ms`. See [`auto_run_decision`]. @@ -499,17 +520,22 @@ pub enum OracleTool { /// database rather than a source root, and is the SCIP emitter directly (no `scip` /// subcommand), so its probe + invocation differ from rust-analyzer's — see `ToolManifest`. ScipClang, + /// `scip-python index` — Python (#164 B6). Resolves imports against the project's INSTALLED + /// dependencies, so the corpus must install them (a venv) first; an unresolved environment + /// shows up as a near-zero moniker count the report's health gate catches. + ScipPython, } impl OracleTool { /// Every known oracle tool, for "report on all tools" surfaces (`oracle status` with no /// `--tool`). Later language backends (#72 Kotlin) extend this alongside the enum. - pub const ALL: &[OracleTool] = &[Self::RustAnalyzer, Self::ScipClang]; + pub const ALL: &[OracleTool] = &[Self::RustAnalyzer, Self::ScipClang, Self::ScipPython]; pub fn as_db_str(self) -> &'static str { match self { Self::RustAnalyzer => "rust-analyzer", Self::ScipClang => "scip-clang", + Self::ScipPython => "scip-python", } } @@ -517,6 +543,7 @@ impl OracleTool { match value { "rust-analyzer" => Some(Self::RustAnalyzer), "scip-clang" => Some(Self::ScipClang), + "scip-python" => Some(Self::ScipPython), _ => None, } } diff --git a/crates/rag-rat-core/src/index/oracle/run.rs b/crates/rag-rat-core/src/index/oracle/run.rs index 345953d..fc5aa74 100644 --- a/crates/rag-rat-core/src/index/oracle/run.rs +++ b/crates/rag-rat-core/src/index/oracle/run.rs @@ -277,12 +277,23 @@ pub(crate) fn run(conn: &Connection, input: &OracleRunInput<'_>) -> anyhow::Resu // join selected (reference-preferred, full containment), not a re-derived start-only match // — on overlapping occurrences the two could pick different occurrences. Every // verdict marks `matched_occurrences` (so it's never a recall gap); only a CALL - // (`calls_name`) edge also marks `covered_call_occurrences` (the recall numerator - // population — finding 1). + // (`calls_name`) edge whose matched SCIP symbol is itself callable also marks + // `covered_call_occurrences` (the recall numerator population — finding 1). + // + // The `symbol_is_callable` filter keeps the covered side over the SAME population as the + // uncovered side (`count_uncovered_calls`), which already requires it (#176 review): + // without it, a constructor call our extractor emits as `calls_name` but SCIP + // represents as a reference to the CLASS symbol (`…Foo#`, not `…).`) — e.g. + // scip-python's `Foo()` — counted as covered while a MISSED such call could never + // be counted as oracle-only, inflating recall. Requiring callability on both sides + // confines recall to method/function calls (the `).` kind) uniformly; + // constructor-via-class-symbol refs are out of the recall population because SCIP + // can't distinguish a constructor *call* from a type *annotation* of the same + // `Foo#` symbol. let (occ_start, occ_end) = verdict.matched_occurrence; let key = (candidate.source_path.clone(), occ_start, occ_end); matched_occurrences.insert(key.clone()); - if candidate.edge_kind == CALL_EDGE_KIND { + if candidate.edge_kind == CALL_EDGE_KIND && scip::symbol_is_callable(&verdict.scip_symbol) { covered_call_occurrences.insert(key); } diff --git a/crates/rag-rat-core/src/index/oracle/tests.rs b/crates/rag-rat-core/src/index/oracle/tests.rs index 0bd41a0..7d6aa74 100644 --- a/crates/rag-rat-core/src/index/oracle/tests.rs +++ b/crates/rag-rat-core/src/index/oracle/tests.rs @@ -2228,6 +2228,63 @@ fn covered_side_ignores_references_type_confirmation() { assert_eq!(m.covered_calls, 1); } +/// #176 (covered side): the covered side requires the matched SCIP symbol be CALLABLE (`).`) — the +/// same filter `count_uncovered_calls` applies. A `calls_name` edge a verdict matched to a CLASS +/// symbol (`…Thing#`, e.g. scip-python's `Thing()` constructor, which our extractor emits as +/// `CallsName` but SCIP records as a reference to the class) must NOT inflate `covered_calls`. +/// Otherwise the two sides measure different populations and a MISSED constructor — invisible to +/// the callable-filtered uncovered side — would never offset a covered one, inflating recall. +#[test] +fn covered_side_requires_a_callable_scip_symbol() { + let h = Harness::new(); + // `caller.rs`: a method call `target` at 14..20 and a constructor call `Thing` at 24..29. + let caller = h.add_file("caller.rs", "fn caller() { target(); Thing(); }\n"); + let defs = h.add_file("defs.rs", "fn target() {}\nstruct Thing;\n"); + let target_sym = h.add_symbol(defs, "target", 3, 9); + let thing_sym = h.add_symbol(defs, "Thing", 22, 27); + // BOTH are `calls_name` edges (a constructor call is a `CallsName` in our extractor). + let call_edge = h.add_edge(caller, "target", 14, 20, "Exact", Some(target_sym)); + let ctor_edge = h.add_edge(caller, "Thing", 24, 29, "Exact", Some(thing_sym)); + + let call_sym = "scip-rust crate v1 `target`()."; + // Class symbol — ends `#`, NOT `).`: not callable (how scip-python records a constructor ref). + let class_sym = "scip-rust crate v1 `Thing`#"; + let mut index = Index { + documents: vec![Document { + relative_path: "caller.rs".to_string(), + occurrences: vec![ + occurrence(0, 14, 20, call_sym, SymbolRole::UnspecifiedSymbolRole as i32), + occurrence(0, 24, 29, class_sym, SymbolRole::UnspecifiedSymbolRole as i32), + ], + position_encoding: EnumOrUnknown::new( + PositionEncoding::UTF8CodeUnitOffsetFromLineStart, + ), + ..Default::default() + }], + ..Default::default() + }; + index.documents.push(Document { + relative_path: "defs.rs".to_string(), + occurrences: vec![ + occurrence(0, 3, 9, call_sym, SymbolRole::Definition as i32), + occurrence(1, 7, 12, class_sym, SymbolRole::Definition as i32), + ], + position_encoding: EnumOrUnknown::new(PositionEncoding::UTF8CodeUnitOffsetFromLineStart), + ..Default::default() + }); + let bytes = index.write_to_bytes().unwrap(); + + let report = + run_oracle(&h.conn, TOOL, VERSION, COMMIT, WORKTREE, &bytes, h.root(), None, None).unwrap(); + // Both edges still get verdicts (both join + resolve in-corpus)… + assert!(h.verdict(call_edge).is_some(), "call edge verdicted"); + assert!(h.verdict(ctor_edge).is_some(), "constructor edge verdicted"); + // …but only the callable-symbol call counts as covered; the class-symbol constructor does not, + // and the uncovered side excludes it too → no phantom recall gap. + assert_eq!(report.covered_calls, 1, "constructor (class symbol) must NOT inflate covered"); + assert_eq!(report.oracle_only_calls, 0); +} + /// Finding 2: a candidate whose recorded `file_sha` no longer matches the disk bytes (content drift /// between the index build and the `.scip`) is SKIPPED — no verdict is emitted from mismatched /// content — and tallied in `skipped_drifted`. The same edge, with a matching `file_sha`, IS diff --git a/crates/rag-rat-core/src/index/query_api/graph.rs b/crates/rag-rat-core/src/index/query_api/graph.rs index 02bd137..a161b8d 100644 --- a/crates/rag-rat-core/src/index/query_api/graph.rs +++ b/crates/rag-rat-core/src/index/query_api/graph.rs @@ -64,26 +64,30 @@ impl IndexDatabase { if hops.is_empty() { return Ok(false); } - let tool = oracle::OracleTool::RustAnalyzer; - let Some(tool_version) = oracle::latest_run_tool_version( - self.storage.connection(), - tool, - &self.active_commit_sha, - &self.active_worktree_id, - )? - else { + // Merge verdicts from EVERY oracle backend that has a run in this checkout, not just + // rust-analyzer (#176): a mixed-language repo has rust-analyzer/scip-clang/scip-python + // runs, and a Python (or C) edge's `compiler` tier lives under that tool's verdicts. An + // edge belongs to one language, so the per-tool verdict sets are disjoint — merging can't + // collide. + let conn = self.storage.connection(); + let runs = + oracle::latest_runs_in_scope(conn, &self.active_commit_sha, &self.active_worktree_id)?; + if runs.is_empty() { // No oracle run for this checkout — nothing to surface, all hops stay heuristic. return Ok(false); - }; + } let edge_ids = hops.iter().map(|hop| hop.edge_id).collect::>(); - let verdicts = oracle::current_oracle_verdicts_for_edges( - self.storage.connection(), - tool, - &tool_version, - &self.active_commit_sha, - &self.active_worktree_id, - &edge_ids, - )?; + let mut verdicts = std::collections::HashMap::new(); + for (tool, tool_version) in &runs { + verdicts.extend(oracle::current_oracle_verdicts_for_edges( + conn, + *tool, + tool_version, + &self.active_commit_sha, + &self.active_worktree_id, + &edge_ids, + )?); + } if verdicts.is_empty() { return Ok(false); } @@ -428,17 +432,16 @@ impl IndexDatabase { pub fn compare_graph_to_scip( &self, ) -> anyhow::Result { - let tool = oracle::OracleTool::RustAnalyzer; let conn = self.storage.connection(); - let tool_version = oracle::latest_run_tool_version( - conn, - tool, - &self.active_commit_sha, - &self.active_worktree_id, - )?; + // Compare against EVERY backend with a run in this checkout, not just rust-analyzer (#176): + // a mixed-language repo's contradictions span tools (a C edge under scip-clang, a Python + // edge under scip-python). Verdict sets are disjoint by edge language, so aggregating is a + // plain concatenation. + let runs = + oracle::latest_runs_in_scope(conn, &self.active_commit_sha, &self.active_worktree_id)?; let mut summary = crate::query::graph::CompareGraphScipSummary::default(); let mut contradictions = Vec::new(); - let Some(version) = tool_version.clone() else { + if runs.is_empty() { summary.no_oracle_data = true; summary.warnings.push( "no oracle run for this checkout; run `rag-rat oracle run` to populate compiler \ @@ -447,7 +450,7 @@ impl IndexDatabase { ); return Ok(crate::query::graph::CompareGraphScipReport { query: crate::query::graph::CompareGraphScipQuery { - tool: tool.as_db_str().to_string(), + tool: String::new(), tool_version: None, commit_sha: self.active_commit_sha.clone(), worktree_id: self.active_worktree_id.clone(), @@ -455,15 +458,51 @@ impl IndexDatabase { summary, contradictions, }); - }; - let comparisons = oracle::current_oracle_comparisons( - conn, - tool, - &version, - &self.active_commit_sha, - &self.active_worktree_id, - )?; - summary.verdicts_examined = u64::try_from(comparisons.len()).unwrap_or(u64::MAX); + } + for (tool, version) in &runs { + let comparisons = oracle::current_oracle_comparisons( + conn, + *tool, + version, + &self.active_commit_sha, + &self.active_worktree_id, + )?; + summary.verdicts_examined += u64::try_from(comparisons.len()).unwrap_or(u64::MAX); + for comparison in comparisons { + if comparison.kind != oracle::OracleResolutionKind::Contradict { + continue; + } + contradictions.push(crate::query::graph::GraphScipContradiction { + edge_id: comparison.edge_id, + edge_kind: comparison.edge_kind, + heuristic_confidence: crate::query::graph::normalize_confidence( + &comparison.heuristic_confidence, + ) + .to_string(), + heuristic_target: comparison.heuristic_target, + callee_name: comparison.callee_name, + // Label `resolved-external` ONLY for a contradiction the compiler resolved + // OUTSIDE the corpus (`resolved_symbol_id IS NULL`). A Rust SCIP symbol carries + // a crate/package component even for the LOCAL crate (`scip-rust crate + // held-mini …`), so deriving the label from `scip_symbol` + // alone would mislabel an IN-CORPUS contradiction (the + // compiler resolved to a *different* in-corpus symbol) as + // `resolved-external()` (#82 finding 1). An in-corpus + // contradiction is a same-corpus disagreement, not an external placement. + resolved_external: comparison + .resolved_symbol_id + .is_none() + .then(|| resolved_external_label(&comparison.scip_symbol)) + .flatten(), + scip_symbol: comparison.scip_symbol, + callsite: Some(crate::query::graph::Callsite { + path: comparison.callsite_path, + line: comparison.callsite_line, + span: [comparison.callsite_line, comparison.callsite_line], + }), + }); + } + } // A run exists for this checkout but produced ZERO in-scope verdicts to compare. This is // NOT "the compiler agrees with the graph" — it is "the run found nothing in this // checkout's scope," which is exactly the silent-no-op symptom the #82 P0 scope bug @@ -477,44 +516,15 @@ impl IndexDatabase { .to_string(), ); } - for comparison in comparisons { - if comparison.kind != oracle::OracleResolutionKind::Contradict { - continue; - } - contradictions.push(crate::query::graph::GraphScipContradiction { - edge_id: comparison.edge_id, - edge_kind: comparison.edge_kind, - heuristic_confidence: crate::query::graph::normalize_confidence( - &comparison.heuristic_confidence, - ) - .to_string(), - heuristic_target: comparison.heuristic_target, - callee_name: comparison.callee_name, - // Label `resolved-external` ONLY for a contradiction the compiler resolved OUTSIDE - // the corpus (`resolved_symbol_id IS NULL`). A Rust SCIP symbol carries a - // crate/package component even for the LOCAL crate (`scip-rust crate held-mini …`), - // so deriving the label from `scip_symbol` alone would mislabel an IN-CORPUS - // contradiction (the compiler resolved to a *different* in-corpus symbol) as - // `resolved-external()` (#82 finding 1). An in-corpus contradiction is - // a same-corpus disagreement, not an external placement. - resolved_external: comparison - .resolved_symbol_id - .is_none() - .then(|| resolved_external_label(&comparison.scip_symbol)) - .flatten(), - scip_symbol: comparison.scip_symbol, - callsite: Some(crate::query::graph::Callsite { - path: comparison.callsite_path, - line: comparison.callsite_line, - span: [comparison.callsite_line, comparison.callsite_line], - }), - }); - } summary.contradictions = u64::try_from(contradictions.len()).unwrap_or(u64::MAX); Ok(crate::query::graph::CompareGraphScipReport { query: crate::query::graph::CompareGraphScipQuery { - tool: tool.as_db_str().to_string(), - tool_version, + // The tools (and their versions) that contributed verdicts, joined — the report now + // spans every backend with a run, not a single hardcoded tool. + tool: runs.iter().map(|(tool, _)| tool.as_db_str()).collect::>().join(","), + tool_version: Some( + runs.iter().map(|(_, version)| version.clone()).collect::>().join(","), + ), commit_sha: self.active_commit_sha.clone(), worktree_id: self.active_worktree_id.clone(), }, diff --git a/crates/rag-rat-core/src/index/query_api/oracle_surfacing_tests.rs b/crates/rag-rat-core/src/index/query_api/oracle_surfacing_tests.rs index a79085a..be2c785 100644 --- a/crates/rag-rat-core/src/index/query_api/oracle_surfacing_tests.rs +++ b/crates/rag-rat-core/src/index/query_api/oracle_surfacing_tests.rs @@ -329,6 +329,50 @@ fn compare_graph_to_scip_reports_contradiction() { let _ = fs::remove_dir_all(&root); } +/// #176: surfacing must NOT be hardcoded to rust-analyzer. A verdict written under another backend +/// (here scip-clang) must still be reported by `compare_graph_to_scip`, and the report's `tool` +/// must name the contributing backend — proving the multi-tool `latest_runs_in_scope` seam, not the +/// old single-`RustAnalyzer` query. +#[test] +fn compare_graph_to_scip_surfaces_non_rust_analyzer_tools() { + let root = temp_root(); + fs::write(root.join("src/lib.rs"), "fn caller() { target(); } fn target() {}\n").unwrap(); + let config = rust_config(root.clone()); + let db = IndexDatabase::rebuild(&config).unwrap(); + + let (edge_id, cs, ce, path) = call_edge(&db); + let target_sym: i64 = db + .storage + .connection() + .query_row("SELECT id FROM symbols WHERE name = 'target' LIMIT 1", [], |r| r.get(0)) + .unwrap(); + db.storage + .connection() + .execute( + "UPDATE edges SET confidence = 'Exact', resolution = 'exact', to_symbol_id = ?2 WHERE \ + id = ?1", + params![edge_id, target_sym], + ) + .unwrap(); + + let symbol = "scip-rust cargo other 1.0 `target`()."; + let scip = scip_with(&path, cs, ce, symbol, None, None); + // Write the verdict under scip-clang, NOT rust-analyzer. + db.run_oracle_from_scip(OracleTool::ScipClang, "clang-vtest", &scip).unwrap(); + + let compare = db.compare_graph_to_scip().unwrap(); + assert!(!compare.summary.no_oracle_data, "a scip-clang run exists: {compare:?}"); + assert_eq!(compare.summary.contradictions, 1, "scip-clang verdict must surface: {compare:?}"); + assert_eq!(compare.contradictions[0].edge_id, edge_id); + assert!( + compare.query.tool.contains("scip-clang"), + "the report must name the contributing backend, got `{}`", + compare.query.tool + ); + + let _ = fs::remove_dir_all(&root); +} + /// #82 P0: when a run EXISTS but examined 0 in-scope verdicts, `compare_graph_to_scip` must WARN /// — that is "run-but-empty" (the silent symptom of the scope bug), not "compiler agrees". Here /// a run writes a verdict, then the callsite file drifts so the current-content gate diff --git a/docs/oracle.md b/docs/oracle.md index db21296..79d0b7a 100644 --- a/docs/oracle.md +++ b/docs/oracle.md @@ -3,8 +3,8 @@ The code graph is tree-sitter-derived, so edges are heuristic and confidence-labeled (`Exact` / `Syntactic` / `NameOnly` / `Ambiguous`). The **oracle** is an opt-in pass that consumes a pre-built [SCIP](https://docs.sourcegraph.com/code_navigation/explanations/scip) index from a real -language tool (`rust-analyzer scip`, `scip-typescript`, …) and uses it as a *resolution oracle* to -upgrade those edges to a `Compiler` confidence tier. +language tool (`rust-analyzer scip` for Rust, `scip-clang` for C/C++, `scip-python` for Python) and +uses it as a *resolution oracle* to upgrade those edges to a `Compiler` confidence tier. It is opt-in, batch, content-addressed, and network-free — indexing stays fast and dependency-free without it. @@ -39,11 +39,18 @@ One-shot, by hand: ```bash rag-rat oracle run # uses rust-analyzer by default -rag-rat oracle run --tool rust-analyzer +rag-rat oracle run --tool rust-analyzer # Rust +rag-rat oracle run --tool scip-clang # C/C++ (needs a compile_commands.json) +rag-rat oracle run --tool scip-python # Python (resolves against installed deps) rag-rat oracle run --scip path/to/index.scip # consume a pre-built SCIP index directly rag-rat oracle status ``` +`scip-python` resolves imports against the project's **installed** dependencies, so the checkout's +deps must be importable (e.g. installed into a virtualenv) for cross-package edges to resolve. Its +SCIP project version is pinned to a constant (not the git revision) so a symbol's moniker stays +stable across commits — keeping moniker-anchored memory relocation working. + A missing/unrunnable tool degrades to `Blocked` with an install hint and exit 0 — never an error. ## Keeping it fresh automatically