From acd8d90f994c35b48031643a45c1a5eaf4947c0f Mon Sep 17 00:00:00 2001 From: Kristaps Karlsons Date: Tue, 16 Jun 2026 01:38:48 +0300 Subject: [PATCH 1/4] fix(resolve): prefer a base CLASS for Python implements edges (#172) Python base-class inheritance emits an `implements` edge to the base, which is a CLASS (Python has no traits/interfaces). The resolver's `implements` preference was `[trait, interface]`, so a Python base never matched the preference and could mis-resolve to a same-named non-class or fall back ambiguously. `preferred_matches` is now language-aware: for Python, `implements` prefers `[class, object]`; Kotlin/TS/Rust are UNCHANGED (`implements`/`: I` targets an interface, and a same-named class must not be preferred over it). Added a resolve test (the base class is preferred over a decoy same-named function). --- .../src/index/edges/resolve/mod.rs | 8 ++- .../src/index/edges/resolve/tests.rs | 55 +++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/crates/rag-rat-core/src/index/edges/resolve/mod.rs b/crates/rag-rat-core/src/index/edges/resolve/mod.rs index fdc1a83..2f68259 100644 --- a/crates/rag-rat-core/src/index/edges/resolve/mod.rs +++ b/crates/rag-rat-core/src/index/edges/resolve/mod.rs @@ -590,7 +590,7 @@ pub(crate) fn resolve_symbol<'a>( .copied() .filter(|symbol| kind_matches(symbol)) .collect::>(); - let preferred = preferred_matches(request.edge_kind, &matches); + let preferred = preferred_matches(request.edge_kind, request.source_language, &matches); // In languages with separate type/value namespaces (Rust, C, C++), a `references_type` // reference must resolve to a type DEFINITION (struct/enum/trait/type/…). If none of the // same-named candidates is one, do NOT fall back to a non-type symbol (an `impl` block, a @@ -736,12 +736,18 @@ pub(crate) fn is_common_member_name(value: &str) -> bool { } pub(crate) fn preferred_matches<'a>( edge_kind: &str, + source_language: Option<&str>, matches: &[&'a IndexedSymbol], ) -> Vec<&'a IndexedSymbol> { let preferred_kinds: &[&str] = match edge_kind { "calls_name" => &["function", "method"], "constructs" => &["struct", "class", "object"], "uses_macro" => &["macro"], + // Python base-class inheritance emits an `implements` edge to a CLASS — Python has no + // traits/interfaces — so prefer class-like kinds for Python (#172). Kept language-scoped so + // Kotlin/TS are UNCHANGED: there `implements`/`: I` targets an interface, and a same-named + // class must not be preferred over it. + "implements" if source_language == Some(Language::Python.as_str()) => &["class", "object"], "implements" => &["trait", "interface"], "references_type" => &["struct", "enum", "trait", "type", "class", "interface", "object"], _ => &[], diff --git a/crates/rag-rat-core/src/index/edges/resolve/tests.rs b/crates/rag-rat-core/src/index/edges/resolve/tests.rs index bbae81c..9d2f386 100644 --- a/crates/rag-rat-core/src/index/edges/resolve/tests.rs +++ b/crates/rag-rat-core/src/index/edges/resolve/tests.rs @@ -572,3 +572,58 @@ fn oracle_unaffected_by_import_scope_columns() { scope columns must not pull it in" ); } + +/// #172: a Python `class Sub(Base)` emits an `implements` edge to `Base`, which is a CLASS (Python +/// has no traits/interfaces). The resolver must prefer the base CLASS over a same-named non-class +/// (e.g. a function) — language-scoped, so Kotlin/TS `implements` still prefers an interface. +#[test] +fn python_implements_prefers_a_base_class_over_a_non_class() { + let conn = seeded_conn(); + let sub = py_source(&conn, "sub.py"); + let base = py_source(&conn, "base.py"); + let other = py_source(&conn, "other.py"); + // A symbol in sub.py so the resolver knows the reference's source language is Python (a file's + // language is inferred from its symbols, and the language-scoped `implements` preference is + // what this exercises). + py_sym(&conn, sub, "Sub", "sub.py::Sub", "class"); + // The real base class, and a DECOY same-named non-class (a module-level function `Base`). + let base_class = py_sym(&conn, base, "Base", "base.py::Base", "class"); + let _decoy_fn = py_sym(&conn, other, "Base", "other.py::Base", "function"); + conn.execute( + "INSERT INTO edges(source_file_id, to_name, edge_kind, confidence, resolution, \ + source_start_byte) VALUES (?1, 'Base', 'implements', 'NameOnly', 'unresolved', 10)", + params![sub], + ) + .unwrap(); + let edge: i64 = conn.query_row("SELECT MAX(id) FROM edges_data", [], |r| r.get(0)).unwrap(); + + crate::index::install_scope_view(&conn, NEW, "").unwrap(); + resolve_all_edges(&conn).unwrap(); + + let (to, _confidence, _resolution) = edge_state(&conn, edge); + assert_eq!( + to, + Some(base_class), + "implements must prefer the base CLASS, not the decoy function" + ); +} + +fn py_source(conn: &Connection, path: &str) -> i64 { + conn.execute( + "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, indexed_at_ms, \ + commit_sha, worktree_id) VALUES (?1, 'python', 'source', ?2, 0, 0, ?3, '')", + params![path, format!("sha-{path}"), NEW], + ) + .unwrap(); + conn.last_insert_rowid() +} + +fn py_sym(conn: &Connection, file_id: i64, name: &str, qualified: &str, kind: &str) -> i64 { + conn.execute( + "INSERT INTO symbols(file_id, language, name, qualified_name, kind, start_byte, end_byte, \ + start_line, end_line) VALUES (?1, 'python', ?2, ?3, ?4, 0, 10, 1, 1)", + params![file_id, name, qualified, kind], + ) + .unwrap(); + conn.last_insert_rowid() +} From a528135e412ef468f2a3cb0e035fd40d170fb6d2 Mon Sep 17 00:00:00 2001 From: Kristaps Karlsons Date: Tue, 16 Jun 2026 09:21:20 +0300 Subject: [PATCH 2/4] fix(resolve): restrict Python implements preference to in-corpus Python classes (#172 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Codex P2s on the Python implements class-preference: - Cross-language: preferred_matches keyed by kind only over repo-wide name buckets, so a Python base could bind to a same-named TS/Kotlin/C++ class or Kotlin object. The Python implements branch now filters by LANGUAGE too (python class/object only), and a Python implements that finds no in-corpus python class no longer falls back to the all-matches set — it stays unresolved (the ReferencesType edge still records the base) rather than binding a foreign / non-class symbol. - Call-shaped bases: `class Sub(factory())` is a DYNAMIC base (head is a callable). Extraction no longer emits an Implements edge for a call-shaped base (the class preference would mis-bind it to a same-named class); the base call is still captured as a CallsName. Tests: foreign-language class is not bound; a call-shaped base emits no Implements (but keeps the CallsName). Full core suite (484) green. --- .../rag-rat-core/src/index/edges/extract.rs | 48 ++++++++++++++----- .../src/index/edges/resolve/mod.rs | 30 ++++++++++-- .../src/index/edges/resolve/tests.rs | 38 +++++++++++++++ 3 files changed, 99 insertions(+), 17 deletions(-) diff --git a/crates/rag-rat-core/src/index/edges/extract.rs b/crates/rag-rat-core/src/index/edges/extract.rs index 76ee12e..84c0251 100644 --- a/crates/rag-rat-core/src/index/edges/extract.rs +++ b/crates/rag-rat-core/src/index/edges/extract.rs @@ -688,21 +688,29 @@ pub(crate) fn python_edges( { continue; } + // A call-shaped base (`class Sub(factory())`) is a DYNAMIC base — its head is a + // callable, not the base class — so don't claim an Implements edge to it (the + // resolver's Python class preference would otherwise mis-bind it to a + // same-named class, #172 review). ReferencesType below + // still records the reference. + // // Implements points at the base HEAD only (`Generic`, not `T`); ReferencesType // covers the head AND every type argument (`Mapping[str, Api]` → `Mapping`, // `str`, `Api`). - let head = python_type_head(base); - if let Some(name) = last_identifier_text(head, text) { - out.push(symbol_edge( - symbols, - base, - name, - EdgeKind::Implements, - EdgeConfidence::NameOnly, - last_identifier_node(head) - .map(final_segment_node) - .map(CalleeRange::of_node), - )); + if base.kind() != "call" { + let head = python_type_head(base); + if let Some(name) = last_identifier_text(head, text) { + out.push(symbol_edge( + symbols, + base, + name, + EdgeKind::Implements, + EdgeConfidence::NameOnly, + last_identifier_node(head) + .map(final_segment_node) + .map(CalleeRange::of_node), + )); + } } emit_python_type_refs(base, symbols, text, out); } @@ -1073,6 +1081,22 @@ mod python_edge_tests { assert!(!has(&e, EdgeKind::Implements, "T"), "must not resolve to type arg T: {e:?}"); } + #[test] + fn call_shaped_base_emits_no_implements() { + // `class Sub(factory())` — a DYNAMIC base (the head is a callable, not the base class). No + // Implements edge (the resolver's class preference would mis-bind it), but the base call is + // still captured as a CallsName so the dependency on `factory` isn't lost (#172 review). + let e = edges("class Sub(factory()):\n pass\n"); + assert!( + !has(&e, EdgeKind::Implements, "factory"), + "a call-shaped base must NOT emit Implements: {e:?}" + ); + assert!( + has(&e, EdgeKind::CallsName, "factory"), + "the base call is still captured as a CallsName: {e:?}" + ); + } + #[test] fn metaclass_keyword_argument_is_not_a_base_class() { // `class Model(Base, metaclass=Meta)` — `Base` is a base; `metaclass=Meta` is not. diff --git a/crates/rag-rat-core/src/index/edges/resolve/mod.rs b/crates/rag-rat-core/src/index/edges/resolve/mod.rs index 2f68259..ce9927d 100644 --- a/crates/rag-rat-core/src/index/edges/resolve/mod.rs +++ b/crates/rag-rat-core/src/index/edges/resolve/mod.rs @@ -605,6 +605,15 @@ pub(crate) fn resolve_symbol<'a>( { return None; } + // A Python `implements` (base class) that found no in-corpus PYTHON class must NOT fall back to + // the all-matches set: that would bind the base to a same-named non-class or a foreign-language + // class (#172 review). Leave it unresolved — the ReferencesType edge still records the base. + if preferred.is_empty() + && request.edge_kind == EdgeKind::Implements.as_str() + && request.source_language == Some(Language::Python.as_str()) + { + return None; + } let matches = if preferred.is_empty() { matches.as_slice() } else { preferred.as_slice() }; match matches { [symbol] => Some((*symbol, EdgeConfidence::Syntactic, "target_name_fallback")), @@ -739,15 +748,26 @@ pub(crate) fn preferred_matches<'a>( source_language: Option<&str>, matches: &[&'a IndexedSymbol], ) -> Vec<&'a IndexedSymbol> { + // Python base-class inheritance emits an `implements` edge to a CLASS — Python has no + // traits/interfaces — so prefer class-like kinds for Python (#172). This case ALSO filters by + // LANGUAGE: the name buckets are repo-wide, so without it a Python base would bind to a + // same-named TS/Kotlin/C++ class or Kotlin `object` in a mixed-language index (#172 review). + // Kept language-scoped so Kotlin/TS are otherwise UNCHANGED (`implements`/`: I` targets an + // interface, and a same-named class must not be preferred over it). + if edge_kind == "implements" && source_language == Some(Language::Python.as_str()) { + return matches + .iter() + .copied() + .filter(|symbol| { + matches!(symbol.kind.as_str(), "class" | "object") + && symbol.language.as_str() == Language::Python.as_str() + }) + .collect(); + } let preferred_kinds: &[&str] = match edge_kind { "calls_name" => &["function", "method"], "constructs" => &["struct", "class", "object"], "uses_macro" => &["macro"], - // Python base-class inheritance emits an `implements` edge to a CLASS — Python has no - // traits/interfaces — so prefer class-like kinds for Python (#172). Kept language-scoped so - // Kotlin/TS are UNCHANGED: there `implements`/`: I` targets an interface, and a same-named - // class must not be preferred over it. - "implements" if source_language == Some(Language::Python.as_str()) => &["class", "object"], "implements" => &["trait", "interface"], "references_type" => &["struct", "enum", "trait", "type", "class", "interface", "object"], _ => &[], diff --git a/crates/rag-rat-core/src/index/edges/resolve/tests.rs b/crates/rag-rat-core/src/index/edges/resolve/tests.rs index 9d2f386..d4f91d4 100644 --- a/crates/rag-rat-core/src/index/edges/resolve/tests.rs +++ b/crates/rag-rat-core/src/index/edges/resolve/tests.rs @@ -627,3 +627,41 @@ fn py_sym(conn: &Connection, file_id: i64, name: &str, qualified: &str, kind: &s .unwrap(); conn.last_insert_rowid() } + +/// #172 review: a Python `implements` (base class) must NOT bind to a same-named class in another +/// language. With only a TypeScript `class Base` in the index (the Python base is external), the +/// edge stays unresolved rather than wrongly binding cross-language. +#[test] +fn python_implements_ignores_a_foreign_language_class() { + let conn = seeded_conn(); + let sub = py_source(&conn, "sub.py"); + py_sym(&conn, sub, "Sub", "sub.py::Sub", "class"); + // The only same-named `Base` is a TYPESCRIPT class. + conn.execute( + "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, indexed_at_ms, \ + commit_sha, worktree_id) VALUES ('w.ts', 'typescript', 'source', 'sha-w', 0, 0, ?1, '')", + params![NEW], + ) + .unwrap(); + let ts = conn.last_insert_rowid(); + conn.execute( + "INSERT INTO symbols(file_id, language, name, qualified_name, kind, start_byte, end_byte, \ + start_line, end_line) VALUES (?1, 'typescript', 'Base', 'w.ts::Base', 'class', 0, 10, 1, \ + 1)", + params![ts], + ) + .unwrap(); + conn.execute( + "INSERT INTO edges(source_file_id, to_name, edge_kind, confidence, resolution, \ + source_start_byte) VALUES (?1, 'Base', 'implements', 'NameOnly', 'unresolved', 10)", + params![sub], + ) + .unwrap(); + let edge: i64 = conn.query_row("SELECT MAX(id) FROM edges_data", [], |r| r.get(0)).unwrap(); + + crate::index::install_scope_view(&conn, NEW, "").unwrap(); + resolve_all_edges(&conn).unwrap(); + + let (to, _confidence, _resolution) = edge_state(&conn, edge); + assert_eq!(to, None, "a Python base must not bind to a foreign-language class"); +} From 3b20178f6017f3b4d1cd7576d90ab3a3b40ca220 Mon Sep 17 00:00:00 2001 From: Kristaps Karlsons Date: Tue, 16 Jun 2026 09:43:09 +0300 Subject: [PATCH 3/4] fix(extract): treat parenthesized call-shaped Python bases as dynamic (#172 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call-shaped-base guard only checked the base's immediate kind, so `class Sub((factory()))` — where tree-sitter nests the `call` under a `parenthesized_expression` — still entered the Implements path and mis-bound to `factory`. Replace the `base.kind() != "call"` check with `python_base_is_dynamic`, which unwraps parenthesized layers before deciding. A parenthesized STATIC base (`class Sub((Base))`) still emits Implements. --- .../rag-rat-core/src/index/edges/extract.rs | 61 +++++++++++++++++-- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/crates/rag-rat-core/src/index/edges/extract.rs b/crates/rag-rat-core/src/index/edges/extract.rs index 84c0251..bc6d39e 100644 --- a/crates/rag-rat-core/src/index/edges/extract.rs +++ b/crates/rag-rat-core/src/index/edges/extract.rs @@ -688,16 +688,16 @@ pub(crate) fn python_edges( { continue; } - // A call-shaped base (`class Sub(factory())`) is a DYNAMIC base — its head is a - // callable, not the base class — so don't claim an Implements edge to it (the - // resolver's Python class preference would otherwise mis-bind it to a - // same-named class, #172 review). ReferencesType below - // still records the reference. + // A call-shaped base (`class Sub(factory())`, even parenthesized + // `class Sub((factory()))`) is a DYNAMIC base — its head is a callable, not the + // base class — so don't claim an Implements edge to it (the resolver's Python + // class preference would otherwise mis-bind it to a same-named class, #172 + // review). ReferencesType below still records the reference. // // Implements points at the base HEAD only (`Generic`, not `T`); ReferencesType // covers the head AND every type argument (`Mapping[str, Api]` → `Mapping`, // `str`, `Api`). - if base.kind() != "call" { + if !python_base_is_dynamic(base) { let head = python_type_head(base); if let Some(name) = last_identifier_text(head, text) { out.push(symbol_edge( @@ -826,6 +826,28 @@ fn python_type_head(node: Node<'_>) -> Node<'_> { head } +/// Whether a Python base-class expression is DYNAMIC (call-shaped) — a `call`, possibly wrapped in +/// parentheses (`class Sub(factory())`, `class Sub((factory()))`). Such a base's head is a +/// callable, not a class, so it must not claim an Implements edge (the resolver's Python class +/// preference would otherwise mis-bind it to a same-named class, #172 review). The immediate-kind +/// check missed the parenthesized form, where tree-sitter nests the `call` under a +/// `parenthesized_expression`; unwrap those layers before deciding. Bounded loop guards against a +/// pathological tree. +fn python_base_is_dynamic(base: Node<'_>) -> bool { + let mut node = base; + for _ in 0..8 { + match node.kind() { + "call" => return true, + "parenthesized_expression" => match node.named_child(0) { + Some(inner) => node = inner, + None => return false, + }, + _ => return false, + } + } + false +} + /// Emit an Imports edge for one import clause (`dotted_name` or `aliased_import`), targeting the /// imported name's dotted tail — NEVER the `as` alias (which is a local binding, not the import). A /// comma / parenthesized list (`from pkg import A, B`) nests its clauses under an `import_list`, so @@ -1097,6 +1119,33 @@ mod python_edge_tests { ); } + #[test] + fn parenthesized_call_shaped_base_emits_no_implements() { + // `class Sub((factory()))` — tree-sitter nests the `call` under a + // `parenthesized_expression`, so the immediate-kind check missed it (#172 review round 2). + // Still a DYNAMIC base: no Implements, but the call dependency is captured. + let e = edges("class Sub((factory())):\n pass\n"); + assert!( + !has(&e, EdgeKind::Implements, "factory"), + "a parenthesized call-shaped base must NOT emit Implements: {e:?}" + ); + assert!( + has(&e, EdgeKind::CallsName, "factory"), + "the base call is still captured as a CallsName: {e:?}" + ); + } + + #[test] + fn parenthesized_class_base_still_emits_implements() { + // `class Sub((Base))` — a parenthesized but STATIC base is a real superclass, so the + // Implements edge must survive the dynamic-base guard. + let e = edges("class Sub((Base)):\n pass\n"); + assert!( + has(&e, EdgeKind::Implements, "Base"), + "a parenthesized class base must still emit Implements: {e:?}" + ); + } + #[test] fn metaclass_keyword_argument_is_not_a_base_class() { // `class Model(Base, metaclass=Meta)` — `Base` is a base; `metaclass=Meta` is not. From 024df5fa4734522e13c781d5d64fba0e0bf249ee Mon Sep 17 00:00:00 2001 From: Kristaps Karlsons Date: Tue, 16 Jun 2026 09:58:58 +0300 Subject: [PATCH 4/4] fix(extract): unwrap subscript/generic wrappers when detecting dynamic Python bases (#172 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `class Sub(factory()[T])` exposes the base as a `subscript` whose value is the `call`; the Implements path runs `python_type_head`, which unwraps the subscript to that call and mis-bound to `factory`. Align `python_base_is_dynamic` with `python_type_head` — unwrap `subscript` (→ value) and `type`/`generic_type` (→ first child) in addition to parentheses — so any call reachable as the Implements head is recognized as dynamic and gets no Implements edge. --- .../rag-rat-core/src/index/edges/extract.rs | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/crates/rag-rat-core/src/index/edges/extract.rs b/crates/rag-rat-core/src/index/edges/extract.rs index bc6d39e..36ac626 100644 --- a/crates/rag-rat-core/src/index/edges/extract.rs +++ b/crates/rag-rat-core/src/index/edges/extract.rs @@ -838,10 +838,18 @@ fn python_base_is_dynamic(base: Node<'_>) -> bool { for _ in 0..8 { match node.kind() { "call" => return true, - "parenthesized_expression" => match node.named_child(0) { + // Unwrap the SAME wrappers `python_type_head` strips to find the Implements head, plus + // parentheses — so a call hidden under any of them (`(factory())`, `factory()[T]`) is + // still seen as dynamic and doesn't get a bogus Implements edge (#172 review). If the + // Implements path would land on a `call`, this must catch it. + "parenthesized_expression" | "type" | "generic_type" => match node.named_child(0) { Some(inner) => node = inner, None => return false, }, + "subscript" => match node.child_by_field_name("value") { + Some(value) => node = value, + None => return false, + }, _ => return false, } } @@ -1135,6 +1143,22 @@ mod python_edge_tests { ); } + #[test] + fn subscript_call_shaped_base_emits_no_implements() { + // `class Sub(factory()[T])` — tree-sitter exposes the base as a `subscript` whose value is + // the `call`; `python_type_head` unwraps the subscript to that call, so the dynamic-base + // check must unwrap it too (#172 review round 3). Still dynamic: no Implements. + let e = edges("class Sub(factory()[T]):\n pass\n"); + assert!( + !has(&e, EdgeKind::Implements, "factory"), + "a subscript-on-call base must NOT emit Implements: {e:?}" + ); + assert!( + has(&e, EdgeKind::CallsName, "factory"), + "the base call is still captured as a CallsName: {e:?}" + ); + } + #[test] fn parenthesized_class_base_still_emits_implements() { // `class Sub((Base))` — a parenthesized but STATIC base is a real superclass, so the