diff --git a/crates/rag-rat-core/src/index/edges/extract.rs b/crates/rag-rat-core/src/index/edges/extract.rs index 76ee12e..36ac626 100644 --- a/crates/rag-rat-core/src/index/edges/extract.rs +++ b/crates/rag-rat-core/src/index/edges/extract.rs @@ -688,21 +688,29 @@ pub(crate) fn python_edges( { continue; } + // A call-shaped base (`class Sub(factory())`, even parenthesized + // `class Sub((factory()))`) is a DYNAMIC base — its head is a callable, not the + // base class — so don't claim an Implements edge to it (the resolver's Python + // class preference would otherwise mis-bind it to a same-named class, #172 + // review). ReferencesType below still records the reference. + // // Implements points at the base HEAD only (`Generic`, not `T`); ReferencesType // covers the head AND every type argument (`Mapping[str, Api]` → `Mapping`, // `str`, `Api`). - let head = python_type_head(base); - if let Some(name) = last_identifier_text(head, text) { - out.push(symbol_edge( - symbols, - base, - name, - EdgeKind::Implements, - EdgeConfidence::NameOnly, - last_identifier_node(head) - .map(final_segment_node) - .map(CalleeRange::of_node), - )); + if !python_base_is_dynamic(base) { + let head = python_type_head(base); + if let Some(name) = last_identifier_text(head, text) { + out.push(symbol_edge( + symbols, + base, + name, + EdgeKind::Implements, + EdgeConfidence::NameOnly, + last_identifier_node(head) + .map(final_segment_node) + .map(CalleeRange::of_node), + )); + } } emit_python_type_refs(base, symbols, text, out); } @@ -818,6 +826,36 @@ fn python_type_head(node: Node<'_>) -> Node<'_> { head } +/// Whether a Python base-class expression is DYNAMIC (call-shaped) — a `call`, possibly wrapped in +/// parentheses (`class Sub(factory())`, `class Sub((factory()))`). Such a base's head is a +/// callable, not a class, so it must not claim an Implements edge (the resolver's Python class +/// preference would otherwise mis-bind it to a same-named class, #172 review). The immediate-kind +/// check missed the parenthesized form, where tree-sitter nests the `call` under a +/// `parenthesized_expression`; unwrap those layers before deciding. Bounded loop guards against a +/// pathological tree. +fn python_base_is_dynamic(base: Node<'_>) -> bool { + let mut node = base; + for _ in 0..8 { + match node.kind() { + "call" => return true, + // Unwrap the SAME wrappers `python_type_head` strips to find the Implements head, plus + // parentheses — so a call hidden under any of them (`(factory())`, `factory()[T]`) is + // still seen as dynamic and doesn't get a bogus Implements edge (#172 review). If the + // Implements path would land on a `call`, this must catch it. + "parenthesized_expression" | "type" | "generic_type" => match node.named_child(0) { + Some(inner) => node = inner, + None => return false, + }, + "subscript" => match node.child_by_field_name("value") { + Some(value) => node = value, + None => return false, + }, + _ => return false, + } + } + false +} + /// Emit an Imports edge for one import clause (`dotted_name` or `aliased_import`), targeting the /// imported name's dotted tail — NEVER the `as` alias (which is a local binding, not the import). A /// comma / parenthesized list (`from pkg import A, B`) nests its clauses under an `import_list`, so @@ -1073,6 +1111,65 @@ mod python_edge_tests { assert!(!has(&e, EdgeKind::Implements, "T"), "must not resolve to type arg T: {e:?}"); } + #[test] + fn call_shaped_base_emits_no_implements() { + // `class Sub(factory())` — a DYNAMIC base (the head is a callable, not the base class). No + // Implements edge (the resolver's class preference would mis-bind it), but the base call is + // still captured as a CallsName so the dependency on `factory` isn't lost (#172 review). + let e = edges("class Sub(factory()):\n pass\n"); + assert!( + !has(&e, EdgeKind::Implements, "factory"), + "a call-shaped base must NOT emit Implements: {e:?}" + ); + assert!( + has(&e, EdgeKind::CallsName, "factory"), + "the base call is still captured as a CallsName: {e:?}" + ); + } + + #[test] + fn parenthesized_call_shaped_base_emits_no_implements() { + // `class Sub((factory()))` — tree-sitter nests the `call` under a + // `parenthesized_expression`, so the immediate-kind check missed it (#172 review round 2). + // Still a DYNAMIC base: no Implements, but the call dependency is captured. + let e = edges("class Sub((factory())):\n pass\n"); + assert!( + !has(&e, EdgeKind::Implements, "factory"), + "a parenthesized call-shaped base must NOT emit Implements: {e:?}" + ); + assert!( + has(&e, EdgeKind::CallsName, "factory"), + "the base call is still captured as a CallsName: {e:?}" + ); + } + + #[test] + fn subscript_call_shaped_base_emits_no_implements() { + // `class Sub(factory()[T])` — tree-sitter exposes the base as a `subscript` whose value is + // the `call`; `python_type_head` unwraps the subscript to that call, so the dynamic-base + // check must unwrap it too (#172 review round 3). Still dynamic: no Implements. + let e = edges("class Sub(factory()[T]):\n pass\n"); + assert!( + !has(&e, EdgeKind::Implements, "factory"), + "a subscript-on-call base must NOT emit Implements: {e:?}" + ); + assert!( + has(&e, EdgeKind::CallsName, "factory"), + "the base call is still captured as a CallsName: {e:?}" + ); + } + + #[test] + fn parenthesized_class_base_still_emits_implements() { + // `class Sub((Base))` — a parenthesized but STATIC base is a real superclass, so the + // Implements edge must survive the dynamic-base guard. + let e = edges("class Sub((Base)):\n pass\n"); + assert!( + has(&e, EdgeKind::Implements, "Base"), + "a parenthesized class base must still emit Implements: {e:?}" + ); + } + #[test] fn metaclass_keyword_argument_is_not_a_base_class() { // `class Model(Base, metaclass=Meta)` — `Base` is a base; `metaclass=Meta` is not. diff --git a/crates/rag-rat-core/src/index/edges/resolve/mod.rs b/crates/rag-rat-core/src/index/edges/resolve/mod.rs index fdc1a83..ce9927d 100644 --- a/crates/rag-rat-core/src/index/edges/resolve/mod.rs +++ b/crates/rag-rat-core/src/index/edges/resolve/mod.rs @@ -590,7 +590,7 @@ pub(crate) fn resolve_symbol<'a>( .copied() .filter(|symbol| kind_matches(symbol)) .collect::>(); - let preferred = preferred_matches(request.edge_kind, &matches); + let preferred = preferred_matches(request.edge_kind, request.source_language, &matches); // In languages with separate type/value namespaces (Rust, C, C++), a `references_type` // reference must resolve to a type DEFINITION (struct/enum/trait/type/…). If none of the // same-named candidates is one, do NOT fall back to a non-type symbol (an `impl` block, a @@ -605,6 +605,15 @@ pub(crate) fn resolve_symbol<'a>( { return None; } + // A Python `implements` (base class) that found no in-corpus PYTHON class must NOT fall back to + // the all-matches set: that would bind the base to a same-named non-class or a foreign-language + // class (#172 review). Leave it unresolved — the ReferencesType edge still records the base. + if preferred.is_empty() + && request.edge_kind == EdgeKind::Implements.as_str() + && request.source_language == Some(Language::Python.as_str()) + { + return None; + } let matches = if preferred.is_empty() { matches.as_slice() } else { preferred.as_slice() }; match matches { [symbol] => Some((*symbol, EdgeConfidence::Syntactic, "target_name_fallback")), @@ -736,8 +745,25 @@ pub(crate) fn is_common_member_name(value: &str) -> bool { } pub(crate) fn preferred_matches<'a>( edge_kind: &str, + source_language: Option<&str>, matches: &[&'a IndexedSymbol], ) -> Vec<&'a IndexedSymbol> { + // Python base-class inheritance emits an `implements` edge to a CLASS — Python has no + // traits/interfaces — so prefer class-like kinds for Python (#172). This case ALSO filters by + // LANGUAGE: the name buckets are repo-wide, so without it a Python base would bind to a + // same-named TS/Kotlin/C++ class or Kotlin `object` in a mixed-language index (#172 review). + // Kept language-scoped so Kotlin/TS are otherwise UNCHANGED (`implements`/`: I` targets an + // interface, and a same-named class must not be preferred over it). + if edge_kind == "implements" && source_language == Some(Language::Python.as_str()) { + return matches + .iter() + .copied() + .filter(|symbol| { + matches!(symbol.kind.as_str(), "class" | "object") + && symbol.language.as_str() == Language::Python.as_str() + }) + .collect(); + } let preferred_kinds: &[&str] = match edge_kind { "calls_name" => &["function", "method"], "constructs" => &["struct", "class", "object"], diff --git a/crates/rag-rat-core/src/index/edges/resolve/tests.rs b/crates/rag-rat-core/src/index/edges/resolve/tests.rs index bbae81c..d4f91d4 100644 --- a/crates/rag-rat-core/src/index/edges/resolve/tests.rs +++ b/crates/rag-rat-core/src/index/edges/resolve/tests.rs @@ -572,3 +572,96 @@ fn oracle_unaffected_by_import_scope_columns() { scope columns must not pull it in" ); } + +/// #172: a Python `class Sub(Base)` emits an `implements` edge to `Base`, which is a CLASS (Python +/// has no traits/interfaces). The resolver must prefer the base CLASS over a same-named non-class +/// (e.g. a function) — language-scoped, so Kotlin/TS `implements` still prefers an interface. +#[test] +fn python_implements_prefers_a_base_class_over_a_non_class() { + let conn = seeded_conn(); + let sub = py_source(&conn, "sub.py"); + let base = py_source(&conn, "base.py"); + let other = py_source(&conn, "other.py"); + // A symbol in sub.py so the resolver knows the reference's source language is Python (a file's + // language is inferred from its symbols, and the language-scoped `implements` preference is + // what this exercises). + py_sym(&conn, sub, "Sub", "sub.py::Sub", "class"); + // The real base class, and a DECOY same-named non-class (a module-level function `Base`). + let base_class = py_sym(&conn, base, "Base", "base.py::Base", "class"); + let _decoy_fn = py_sym(&conn, other, "Base", "other.py::Base", "function"); + conn.execute( + "INSERT INTO edges(source_file_id, to_name, edge_kind, confidence, resolution, \ + source_start_byte) VALUES (?1, 'Base', 'implements', 'NameOnly', 'unresolved', 10)", + params![sub], + ) + .unwrap(); + let edge: i64 = conn.query_row("SELECT MAX(id) FROM edges_data", [], |r| r.get(0)).unwrap(); + + crate::index::install_scope_view(&conn, NEW, "").unwrap(); + resolve_all_edges(&conn).unwrap(); + + let (to, _confidence, _resolution) = edge_state(&conn, edge); + assert_eq!( + to, + Some(base_class), + "implements must prefer the base CLASS, not the decoy function" + ); +} + +fn py_source(conn: &Connection, path: &str) -> i64 { + conn.execute( + "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, indexed_at_ms, \ + commit_sha, worktree_id) VALUES (?1, 'python', 'source', ?2, 0, 0, ?3, '')", + params![path, format!("sha-{path}"), NEW], + ) + .unwrap(); + conn.last_insert_rowid() +} + +fn py_sym(conn: &Connection, file_id: i64, name: &str, qualified: &str, kind: &str) -> i64 { + conn.execute( + "INSERT INTO symbols(file_id, language, name, qualified_name, kind, start_byte, end_byte, \ + start_line, end_line) VALUES (?1, 'python', ?2, ?3, ?4, 0, 10, 1, 1)", + params![file_id, name, qualified, kind], + ) + .unwrap(); + conn.last_insert_rowid() +} + +/// #172 review: a Python `implements` (base class) must NOT bind to a same-named class in another +/// language. With only a TypeScript `class Base` in the index (the Python base is external), the +/// edge stays unresolved rather than wrongly binding cross-language. +#[test] +fn python_implements_ignores_a_foreign_language_class() { + let conn = seeded_conn(); + let sub = py_source(&conn, "sub.py"); + py_sym(&conn, sub, "Sub", "sub.py::Sub", "class"); + // The only same-named `Base` is a TYPESCRIPT class. + conn.execute( + "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, indexed_at_ms, \ + commit_sha, worktree_id) VALUES ('w.ts', 'typescript', 'source', 'sha-w', 0, 0, ?1, '')", + params![NEW], + ) + .unwrap(); + let ts = conn.last_insert_rowid(); + conn.execute( + "INSERT INTO symbols(file_id, language, name, qualified_name, kind, start_byte, end_byte, \ + start_line, end_line) VALUES (?1, 'typescript', 'Base', 'w.ts::Base', 'class', 0, 10, 1, \ + 1)", + params![ts], + ) + .unwrap(); + conn.execute( + "INSERT INTO edges(source_file_id, to_name, edge_kind, confidence, resolution, \ + source_start_byte) VALUES (?1, 'Base', 'implements', 'NameOnly', 'unresolved', 10)", + params![sub], + ) + .unwrap(); + let edge: i64 = conn.query_row("SELECT MAX(id) FROM edges_data", [], |r| r.get(0)).unwrap(); + + crate::index::install_scope_view(&conn, NEW, "").unwrap(); + resolve_all_edges(&conn).unwrap(); + + let (to, _confidence, _resolution) = edge_state(&conn, edge); + assert_eq!(to, None, "a Python base must not bind to a foreign-language class"); +}