diff --git a/build.zig b/build.zig index 203f929..abdbddd 100644 --- a/build.zig +++ b/build.zig @@ -53,28 +53,44 @@ pub fn build(b: *std.Build) void { const run_step = b.step("run", "Run codedb daemon"); run_step.dependOn(&run_cmd.step); - // ── Tests ── + // ── Tests (split into independent binaries for faster compilation) ── const test_filter = b.option([]const u8, "test-filter", "Only run tests whose name contains this substring"); - const tests = b.addTest(.{ - .root_module = b.createModule(.{ - .root_source_file = b.path("src/tests.zig"), - .target = target, - .optimize = optimize, - .link_libc = true, - }), - }); - tests.root_module.addImport("mcp", mcp_dep.module("mcp")); - tests.root_module.addImport("nanoregex", nanoregex_dep.module("nanoregex")); - if (test_filter) |f| { - const filters = b.allocator.alloc([]const u8, 1) catch @panic("oom"); - filters[0] = f; - tests.filters = filters; + const test_step = b.step("test", "Run all tests"); + + const test_files = [_]struct { name: []const u8, path: []const u8, needs_mcp: bool, needs_nanoregex: bool }{ + .{ .name = "test-core", .path = "src/test_core.zig", .needs_mcp = false, .needs_nanoregex = false }, + .{ .name = "test-explore", .path = "src/test_explore.zig", .needs_mcp = false, .needs_nanoregex = true }, + .{ .name = "test-index", .path = "src/test_index.zig", .needs_mcp = true, .needs_nanoregex = true }, + .{ .name = "test-parser", .path = "src/test_parser.zig", .needs_mcp = false, .needs_nanoregex = true }, + .{ .name = "test-search", .path = "src/test_search.zig", .needs_mcp = true, .needs_nanoregex = true }, + .{ .name = "test-snapshot", .path = "src/test_snapshot.zig", .needs_mcp = false, .needs_nanoregex = true }, + .{ .name = "test-mcp", .path = "src/test_mcp.zig", .needs_mcp = true, .needs_nanoregex = true }, + .{ .name = "test-query", .path = "src/test_query.zig", .needs_mcp = true, .needs_nanoregex = true }, + }; + + for (test_files) |tf| { + const t = b.addTest(.{ + .root_module = b.createModule(.{ + .root_source_file = b.path(tf.path), + .target = target, + .optimize = optimize, + .link_libc = true, + }), + }); + if (tf.needs_mcp) t.root_module.addImport("mcp", mcp_dep.module("mcp")); + if (tf.needs_nanoregex) t.root_module.addImport("nanoregex", nanoregex_dep.module("nanoregex")); + if (test_filter) |f| { + const filters = b.allocator.alloc([]const u8, 1) catch @panic("oom"); + filters[0] = f; + t.filters = filters; + } + const run = b.addRunArtifact(t); + test_step.dependOn(&run.step); + + const individual_step = b.step(tf.name, b.fmt("Run {s}", .{tf.name})); + individual_step.dependOn(&run.step); } - const test_step = b.step("test", "Run tests"); - const tests_run = b.addRunArtifact(tests); - test_step.dependOn(&tests_run.step); - // ── Library tests (verify the module root compiles) ── const lib_tests = b.addTest(.{ diff --git a/codedb.snapshot b/codedb.snapshot index 466b0a9..49a7b5c 100644 Binary files a/codedb.snapshot and b/codedb.snapshot differ diff --git a/src/cio.zig b/src/cio.zig index 69c6aef..1cf1950 100644 --- a/src/cio.zig +++ b/src/cio.zig @@ -16,6 +16,15 @@ extern "c" fn clock_gettime(id: c_int, ts: *std.c.timespec) c_int; extern "c" fn pipe(fds: *[2]c_int) c_int; extern "c" fn close(fd: c_int) c_int; +pub fn ignoreSigpipe() void { + var act: std.posix.Sigaction = .{ + .handler = .{ .handler = std.posix.SIG.IGN }, + .mask = 0, + .flags = 0, + }; + std.posix.sigaction(std.posix.SIG.PIPE, &act, null); +} + const CLOCK_REALTIME: c_int = 0; const CLOCK_MONOTONIC: c_int = if (builtin.os.tag == .macos) 6 else 1; diff --git a/src/explore.zig b/src/explore.zig index 81f4648..08a1023 100644 --- a/src/explore.zig +++ b/src/explore.zig @@ -181,6 +181,20 @@ pub const SearchResult = struct { score: f32 = 0.0, }; +pub const SearchBreakdown = struct { + tier0_ns: i128 = 0, + tier05_ns: i128 = 0, + tier1_ns: i128 = 0, + tier2_ns: i128 = 0, + tier3_ns: i128 = 0, + tier4_ns: i128 = 0, + tier5_ns: i128 = 0, + rerank_ns: i128 = 0, + tier_reached: u8 = 0, + candidate_count: u32 = 0, + result_count: u32 = 0, +}; + pub const DependencyGraph = struct { forward: std.StringHashMap(std.ArrayList([]const u8)), reverse: std.StringHashMap(std.StringHashMap(void)), @@ -522,6 +536,7 @@ pub const Explorer = struct { /// assert the short-circuit holds (issue: negative-query slow path). /// Production code does not read this field. search_tier5_count: u64 = 0, + last_search_breakdown: SearchBreakdown = .{}, pub const DEFAULT_CONTENT_CACHE_CAPACITY: u32 = 16384; @@ -646,8 +661,8 @@ pub const Explorer = struct { persistent_outline.path = stable_path; + const prior_content = self.contents.get(stable_path); try self.contents.put(stable_path, content); - const prior_content: ?[]const u8 = null; if (full_index) { if (!self.word_index_complete) { @@ -1520,6 +1535,9 @@ pub const Explorer = struct { if (max_results == 0) return try allocator.alloc(SearchResult, 0); + var breakdown: SearchBreakdown = .{}; + defer self.last_search_breakdown = breakdown; + var result_list: std.ArrayList(SearchResult) = .empty; errdefer result_list.deinit(allocator); @@ -1533,6 +1551,7 @@ pub const Explorer = struct { // docs, and files with more exact word hits are considered first so // popular identifiers and skip-trigram canonical files are not hidden // behind earlier low-signal posting-list entries. + const t0_start = cio.nanoTimestamp(); const word_hits = self.word_index.search(query); if (word_hits.len > 0) { const Tier0File = struct { @@ -1587,13 +1606,19 @@ pub const Explorer = struct { searched.put(stats.path, {}) catch {}; try searchInContent(stats.path, ref.data, query, allocator, tier0_per_file_cap, max_results, &result_list); } - if (result_list.items.len >= max_results) - return self.rerankAndFinalize(&result_list, query, allocator); + if (result_list.items.len >= max_results) { + breakdown.tier0_ns = cio.nanoTimestamp() - t0_start; + breakdown.tier_reached = 0; + breakdown.result_count = @intCast(result_list.items.len); + const t_rerank = cio.nanoTimestamp(); + const res = self.rerankAndFinalize(&result_list, query, allocator); + breakdown.rerank_ns = cio.nanoTimestamp() - t_rerank; + return res; + } } + breakdown.tier0_ns = cio.nanoTimestamp() - t0_start; - // Tier 0.5: prefix expansion — find all indexed keys that begin with the query. - // Activates when Tier 0 found nothing and query is ≥3 chars, catching partial - // identifier queries like "searchC" that match "searchContent" in the word index. + const t05_start = cio.nanoTimestamp(); if (result_list.items.len == 0 and query.len >= 3) { const prefix_hits = try self.word_index.searchPrefix(query, allocator, max_results); defer allocator.free(prefix_hits); @@ -1616,14 +1641,23 @@ pub const Explorer = struct { searched.put(hit_path, {}) catch {}; if (result_list.items.len >= max_results) break; } - if (result_list.items.len >= max_results) - return self.rerankAndFinalize(&result_list, query, allocator); + if (result_list.items.len >= max_results) { + breakdown.tier05_ns = cio.nanoTimestamp() - t05_start; + breakdown.tier_reached = 1; + breakdown.result_count = @intCast(result_list.items.len); + const t_rerank = cio.nanoTimestamp(); + const res = self.rerankAndFinalize(&result_list, query, allocator); + breakdown.rerank_ns = cio.nanoTimestamp() - t_rerank; + return res; + } } + breakdown.tier05_ns = cio.nanoTimestamp() - t05_start; + const t1_start = cio.nanoTimestamp(); const candidate_paths = self.trigram_index.candidates(query, allocator); defer if (candidate_paths) |cp| allocator.free(cp); + if (candidate_paths) |cp| breakdown.candidate_count = @intCast(cp.len); - // Tier 1: trigram candidates — fast path, skips files already found by Tier 0. if (candidate_paths) |cp| { if (cp.len > 0) { // Issue #427: rank candidates by per-file word-index hit count @@ -1662,18 +1696,25 @@ pub const Explorer = struct { const ref = self.readContentForSearch(path, allocator) orelse continue; defer ref.deinit(); try searchInContent(path, ref.data, query, allocator, max_per_file, max_results, &result_list); - if (result_list.items.len >= max_results) - return self.rerankAndFinalize(&result_list, query, allocator); + if (result_list.items.len >= max_results) { + breakdown.tier1_ns = cio.nanoTimestamp() - t1_start; + breakdown.tier_reached = 2; + breakdown.result_count = @intCast(result_list.items.len); + const t_rerank = cio.nanoTimestamp(); + const res = self.rerankAndFinalize(&result_list, query, allocator); + breakdown.rerank_ns = cio.nanoTimestamp() - t_rerank; + return res; + } } } } - // Mark all Tier 1 candidates as searched. if (candidate_paths) |cp| { for (cp) |p| searched.put(p, {}) catch {}; } + breakdown.tier1_ns = cio.nanoTimestamp() - t1_start; - // Tier 2: sparse candidates — LAZY, only computed when Tier 1 found nothing. + const t2_start = cio.nanoTimestamp(); if (result_list.items.len == 0) { const sparse_paths = self.sparse_ngram_index.candidates(query, allocator); defer if (sparse_paths) |sp| allocator.free(sp); @@ -1688,8 +1729,9 @@ pub const Explorer = struct { } } } + breakdown.tier2_ns = cio.nanoTimestamp() - t2_start; - // Tier 3: skip_trigram_files not already searched. + const t3_start = cio.nanoTimestamp(); if (result_list.items.len < max_results) { var skip_iter = self.skip_trigram_files.keyIterator(); while (skip_iter.next()) |key_ptr| { @@ -1701,8 +1743,9 @@ pub const Explorer = struct { if (result_list.items.len >= max_results) break; } } + breakdown.tier3_ns = cio.nanoTimestamp() - t3_start; - // Tier 4: word index scan — for files not yet searched. + const t4_start = cio.nanoTimestamp(); if (result_list.items.len < max_results) { const tier4_hits = self.word_index.search(query); if (tier4_hits.len > 0) { @@ -1720,21 +1763,9 @@ pub const Explorer = struct { } } } + breakdown.tier4_ns = cio.nanoTimestamp() - t4_start; - // Tier 5: full scan fallback — only when NO results from any tier. - // Avoids 100ms+ scans on large repos when indices already found matches. - // - // Short-circuit Tier 5 whenever the trigram index was consulted with - // a query long enough to fully cover it (query.len >= 3). The trigram - // filter returns a SUPERSET of files containing the substring (every - // file containing the substring necessarily contains all its - // trigrams). If Tier 1 scanned that superset and found 0 results, no - // other trigram-indexed file can match either; skip_trigram_files - // were handled separately by Tier 3. Tier 5 would otherwise re-scan - // every indexed file for nothing — a measurable 2–3 ms p50 cost on - // queries whose constituent trigrams are common-but-not-co-occurring - // syllables (e.g. `Suspense` on a Rust corpus). The cp.len == 0 - // sub-case of this was already short-circuited before this change. + const t5_start = cio.nanoTimestamp(); const trigram_ruled_out = if (candidate_paths) |_| (query.len >= 3) else @@ -1750,7 +1781,23 @@ pub const Explorer = struct { if (result_list.items.len >= max_results) break; } } - return self.rerankAndFinalize(&result_list, query, allocator); + breakdown.tier5_ns = cio.nanoTimestamp() - t5_start; + + if (result_list.items.len > 0) { + breakdown.tier_reached = if (breakdown.tier5_ns > 0 and result_list.items.len > 0) 7 + else if (breakdown.tier4_ns > 0 and result_list.items.len > 0) 6 + else if (breakdown.tier3_ns > 0) 5 + else if (breakdown.tier2_ns > 0) 4 + else if (breakdown.tier1_ns > 0) 3 + else if (breakdown.tier05_ns > 0) 1 + else 0; + } + breakdown.result_count = @intCast(result_list.items.len); + + const t_rerank = cio.nanoTimestamp(); + const res = self.rerankAndFinalize(&result_list, query, allocator); + breakdown.rerank_ns = cio.nanoTimestamp() - t_rerank; + return res; } /// Run the multi-signal rerank in place, then transfer ownership of @@ -1770,7 +1817,9 @@ pub const Explorer = struct { if (result_list.items.len > 1) { std.sort.block(SearchResult, result_list.items, {}, struct { pub fn lessThan(_: void, a: SearchResult, b: SearchResult) bool { - if (a.score != b.score) return a.score > b.score; + const sa = if (a.score == a.score) a.score else 0; + const sb = if (b.score == b.score) b.score else 0; + if (sa != sb) return sa > sb; const ord = std.mem.order(u8, a.path, b.path); if (ord != .eq) return ord == .lt; return a.line_num < b.line_num; diff --git a/src/index.zig b/src/index.zig index 711233b..a4e71aa 100644 --- a/src/index.zig +++ b/src/index.zig @@ -351,7 +351,8 @@ pub const WordIndex = struct { pub fn avgDocLength(self: *const WordIndex) f32 { const n = self.doc_lengths.count(); if (n == 0) return 1.0; - return @as(f32, @floatFromInt(self.total_tokens)) / @as(f32, @floatFromInt(n)); + const avg = @as(f32, @floatFromInt(self.total_tokens)) / @as(f32, @floatFromInt(n)); + return if (avg > 0) avg else 1.0; } /// Shrink all hit lists and per-file word sets to release excess capacity. diff --git a/src/main.zig b/src/main.zig index 041ce5c..3873c9e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -80,6 +80,7 @@ fn mainInner() void { fn mainImpl() !void { // Use c_allocator (libc malloc) — better page reclamation than GPA const allocator = std.heap.c_allocator; + cio.ignoreSigpipe(); // 0.16: single Threaded I/O instance passed down through every subsystem // that touches fs/subprocess. See issue #282. `io` flows into mcp.run, @@ -1048,7 +1049,7 @@ fn mainImpl() !void { std.log.info("codedb mcp: root={s} files={d} data={s} scan={s}", .{ abs_root, store.currentSeq(), data_dir, mcp_server.getScanState().name() }); - mcp_server.run(io, allocator, &store, &explorer, &agents, abs_root, cfg.max_cached, &telem, maybe_deferred); + mcp_server.run(io, allocator, &store, &explorer, &agents, abs_root, cfg.max_cached, &telem, maybe_deferred, &shutdown); shutdown.store(true, .release); if (scan_thread) |st| st.join(); diff --git a/src/mcp.zig b/src/mcp.zig index 839d22b..76bc144 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -651,6 +651,8 @@ pub var last_activity: std.atomic.Value(i64) = std.atomic.Value(i64).init(0); /// How often the watchdog checks whether the MCP client disconnected. pub const dead_client_poll_ms: u64 = 1000; +pub var stdout_broken: std.atomic.Value(bool) = std.atomic.Value(bool).init(false); + // ── Serve-first scan state (issue #207) ───────────────────────────────────── // // MCP serves immediately on startup; the file walk + index build runs in a @@ -732,6 +734,7 @@ pub fn run( content_cache_capacity: u32, telem: *telemetry_mod.Telemetry, deferred_scan: ?*DeferredScan, + shutdown: *std.atomic.Value(bool), ) void { const stdout = cio.File.stdout(); const stdin = std.Io.File.stdin(); @@ -778,7 +781,7 @@ pub fn run( var read_buf: [4096]u8 = undefined; var stdin_reader = stdin.reader(io, &read_buf); - while (true) { + while (!stdout_broken.load(.acquire) and !shutdown.load(.acquire)) { const msg = mcpj.readLineBuf(alloc, &stdin_reader.interface) orelse break; last_activity.store(cio.milliTimestamp(), .release); defer alloc.free(msg); @@ -935,7 +938,10 @@ fn writeRequest(alloc: std.mem.Allocator, stdout: cio.File, id: i64, method: []c buf.appendSlice(alloc, "\",\"params\":") catch return; buf.appendSlice(alloc, params) catch return; buf.appendSlice(alloc, "}\n") catch return; - stdout.writeAll(buf.items) catch {}; + stdout.writeAll(buf.items) catch { + stdout_broken.store(true, .release); + return; + }; } fn handleCall( @@ -989,6 +995,10 @@ fn handleCall( const is_error = std.mem.startsWith(u8, out.items, "error:"); telem.recordToolCall(name, elapsed, is_error, out.items.len); + if (std.mem.eql(u8, name, "codedb_search") or std.mem.eql(u8, name, "codedb_find") or std.mem.eql(u8, name, "codedb_word")) { + telem.recordSearchBreakdown(explorer.last_search_breakdown); + } + // Query + file access tracking WAL if (!is_error) { if (std.mem.eql(u8, name, "codedb_search") or std.mem.eql(u8, name, "codedb_find") or std.mem.eql(u8, name, "codedb_word")) { @@ -3890,7 +3900,10 @@ fn writeResult(alloc: std.mem.Allocator, stdout: cio.File, id: ?std.json.Value, if (i < result.len) i += 1; } buf.appendSlice(alloc, "}\n") catch return; - stdout.writeAll(buf.items) catch return; + stdout.writeAll(buf.items) catch { + stdout_broken.store(true, .release); + return; + }; } fn writeError(alloc: std.mem.Allocator, stdout: cio.File, id: ?std.json.Value, code: i32, msg: []const u8) void { @@ -3905,8 +3918,14 @@ fn writeError(alloc: std.mem.Allocator, stdout: cio.File, id: ?std.json.Value, c buf.appendSlice(alloc, ",\"message\":\"") catch return; mcpj.writeEscaped(alloc, &buf, msg); buf.appendSlice(alloc, "\"}}") catch return; - stdout.writeAll(buf.items) catch return; - stdout.writeAll("\n") catch return; + stdout.writeAll(buf.items) catch { + stdout_broken.store(true, .release); + return; + }; + stdout.writeAll("\n") catch { + stdout_broken.store(true, .release); + return; + }; } /// Fast JSON string escaper: batch-copies runs of safe characters via /// appendSlice instead of the per-byte append in mcpj.writeEscaped. diff --git a/src/release_info.zig b/src/release_info.zig index 4e07c33..f5b117d 100644 --- a/src/release_info.zig +++ b/src/release_info.zig @@ -1 +1 @@ -pub const semver = "0.2.5817"; +pub const semver = "0.2.5818"; diff --git a/src/snapshot.zig b/src/snapshot.zig index 8480d30..347c185 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -798,7 +798,7 @@ fn loadSnapshotFast( stale_outline.deinit(); } - explorer.indexFileOutlineOnly(path_buf, dc) catch { + explorer.indexFile(path_buf, dc) catch { allocator.free(path_buf); allocator.free(content); continue; @@ -932,7 +932,7 @@ fn isSensitivePath(path: []const u8) bool { // Catch .env, .env.anything; do NOT match .envoy, .envrc, .environment, etc. if (basename.len >= 4 and std.mem.eql(u8, basename[0..4], ".env") and - (basename.len == 4 or basename[4] == '.')) return true; + (basename.len == 4 or basename[4] == '.' or basename[4] == '-' or basename[4] == '_')) return true; // Check extensions if (endsWith(basename, ".pem")) return true; diff --git a/src/telemetry.zig b/src/telemetry.zig index 4e33ec5..bd8ea55 100644 --- a/src/telemetry.zig +++ b/src/telemetry.zig @@ -28,6 +28,19 @@ pub const Event = struct { index_size_bytes: u64, startup_time_ms: u64, }, + search_breakdown: struct { + tier0_ns: i64, + tier05_ns: i64, + tier1_ns: i64, + tier2_ns: i64, + tier3_ns: i64, + tier4_ns: i64, + tier5_ns: i64, + rerank_ns: i64, + tier_reached: u8, + candidate_count: u32, + result_count: u32, + }, }; }; @@ -169,6 +182,28 @@ pub const Telemetry = struct { self.record(tc); } + pub fn recordSearchBreakdown(self: *Telemetry, bd: explore.SearchBreakdown) void { + if (!self.enabled) return; + const clamp = struct { + fn f(v: i128) i64 { + return @intCast(@min(v, std.math.maxInt(i64))); + } + }.f; + self.record(.{ .search_breakdown = .{ + .tier0_ns = clamp(bd.tier0_ns), + .tier05_ns = clamp(bd.tier05_ns), + .tier1_ns = clamp(bd.tier1_ns), + .tier2_ns = clamp(bd.tier2_ns), + .tier3_ns = clamp(bd.tier3_ns), + .tier4_ns = clamp(bd.tier4_ns), + .tier5_ns = clamp(bd.tier5_ns), + .rerank_ns = clamp(bd.rerank_ns), + .tier_reached = bd.tier_reached, + .candidate_count = bd.candidate_count, + .result_count = bd.result_count, + } }); + } + pub fn recordCodebaseStats(self: *Telemetry, explorer: *explore.Explorer, startup_time_ms: u64) void { if (!self.enabled) return; @@ -275,6 +310,14 @@ pub const Telemetry = struct { stats.startup_time_ms, }); }, + .search_breakdown => |sb| { + try w.print(",\"event_type\":\"search_breakdown\",\"tier0_ns\":{d},\"tier05_ns\":{d},\"tier1_ns\":{d},\"tier2_ns\":{d},\"tier3_ns\":{d},\"tier4_ns\":{d},\"tier5_ns\":{d},\"rerank_ns\":{d},\"tier_reached\":{d},\"candidates\":{d},\"results\":{d}", .{ + sb.tier0_ns, sb.tier05_ns, sb.tier1_ns, + sb.tier2_ns, sb.tier3_ns, sb.tier4_ns, + sb.tier5_ns, sb.rerank_ns, sb.tier_reached, + sb.candidate_count, sb.result_count, + }); + }, } try w.writeAll("}\n"); return w.end; diff --git a/src/test_core.zig b/src/test_core.zig new file mode 100644 index 0000000..94bd30f --- /dev/null +++ b/src/test_core.zig @@ -0,0 +1,437 @@ +const std = @import("std"); +const cio = @import("cio.zig"); +const testing = std.testing; +const io = std.testing.io; +const Store = @import("store.zig").Store; +const ChangeEntry = @import("store.zig").ChangeEntry; +const AgentRegistry = @import("agent.zig").AgentRegistry; +const Config = @import("config.zig").Config; +const edit_mod = @import("edit.zig"); +const explore = @import("explore.zig"); +const Explorer = explore.Explorer; + + +test "store: record and retrieve snapshots" { + var store = Store.init(testing.allocator); + defer store.deinit(); + + const seq1 = try store.recordSnapshot("foo.zig", 100, 0xABC); + const seq2 = try store.recordSnapshot("bar.zig", 200, 0xDEF); + + try testing.expect(seq1 == 1); + try testing.expect(seq2 == 2); + try testing.expect(store.currentSeq() == 2); +} + + +test "store: getLatest returns most recent version" { + var store = Store.init(testing.allocator); + defer store.deinit(); + + _ = try store.recordSnapshot("foo.zig", 100, 0x111); + _ = try store.recordSnapshot("foo.zig", 200, 0x222); + + const latest = store.getLatest("foo.zig").?; + try testing.expect(latest.seq == 2); + try testing.expect(latest.size == 200); + try testing.expect(latest.hash == 0x222); +} + + +test "store: getLatest returns null for unknown file" { + var store = Store.init(testing.allocator); + defer store.deinit(); + + try testing.expect(store.getLatest("nope.zig") == null); +} + + +test "store: changesSince counts correctly" { + var store = Store.init(testing.allocator); + defer store.deinit(); + + _ = try store.recordSnapshot("a.zig", 10, 0); + _ = try store.recordSnapshot("b.zig", 20, 0); + _ = try store.recordSnapshot("c.zig", 30, 0); + + try testing.expect(store.changesSince(0) == 3); + try testing.expect(store.changesSince(1) == 2); + try testing.expect(store.changesSince(3) == 0); +} + + +test "store: changesSinceDetailed" { + var store = Store.init(testing.allocator); + defer store.deinit(); + + _ = try store.recordSnapshot("a.zig", 10, 0); + _ = try store.recordSnapshot("b.zig", 20, 0); + _ = try store.recordSnapshot("a.zig", 15, 0); + + const changes = try store.changesSinceDetailed(1, testing.allocator); + defer testing.allocator.free(changes); + + try testing.expect(changes.len == 2); // a.zig and b.zig both changed +} + + +test "store: recordDelete creates tombstone" { + var store = Store.init(testing.allocator); + defer store.deinit(); + + _ = try store.recordSnapshot("del.zig", 50, 0); + _ = try store.recordDelete("del.zig", 0); + + const latest = store.getLatest("del.zig").?; + try testing.expect(latest.op == .tombstone); + try testing.expect(latest.size == 0); +} + + +test "store: getAtCursor" { + var store = Store.init(testing.allocator); + defer store.deinit(); + + _ = try store.recordSnapshot("f.zig", 10, 0x10); + _ = try store.recordSnapshot("f.zig", 20, 0x20); + _ = try store.recordSnapshot("f.zig", 30, 0x30); + + const at1 = store.getAtCursor("f.zig", 1).?; + try testing.expect(at1.size == 10); + + const at2 = store.getAtCursor("f.zig", 2).?; + try testing.expect(at2.size == 20); + + const at3 = store.getAtCursor("f.zig", 99).?; + try testing.expect(at3.size == 30); +} + + +test "store: recordEdit persists diff data to data log" { + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + var dir_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf); + const dir_path = dir_buf[0..dir_path_len]; + + const log_path = try std.fmt.allocPrint(testing.allocator, "{s}/data.log", .{dir_path}); + defer testing.allocator.free(log_path); + + var store = Store.init(testing.allocator); + defer store.deinit(); + + try store.openDataLog(io, log_path); + + const diff = "replace body"; + _ = try store.recordEdit("foo.zig", 1, .replace, 0x1234, diff.len, diff); + + const latest = store.getLatest("foo.zig").?; + try testing.expectEqual(@as(?u64, 0), latest.data_offset); + try testing.expectEqual(@as(u32, diff.len), latest.data_len); + + const log_file = try std.Io.Dir.cwd().openFile(io, log_path, .{}); + defer log_file.close(io); + + var buf: [32]u8 = undefined; + const read_len = try log_file.readPositionalAll(io, buf[0..diff.len], 0); + try testing.expectEqual(diff.len, read_len); + try testing.expectEqualStrings(diff, buf[0..diff.len]); +} + + +test "agent: register and heartbeat" { + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + + const id = try agents.register("test-agent"); + try testing.expect(id == 1); + + agents.heartbeat(id); + // No crash = success +} + + +test "agent: register multiple agents" { + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + + const a = try agents.register("alpha"); + const b = try agents.register("beta"); + try testing.expect(a == 1); + try testing.expect(b == 2); +} + + +test "agent: lock and unlock" { + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + + const id = try agents.register("locker"); + + const got = try agents.tryLock(id, "file.zig", 60_000); + try testing.expect(got == true); + + agents.releaseLock(id, "file.zig"); +} + + +test "agent: lock contention between agents" { + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + + const a = try agents.register("agent-a"); + const b = try agents.register("agent-b"); + + // A locks the file + const got_a = try agents.tryLock(a, "shared.zig", 60_000); + try testing.expect(got_a == true); + + // B should be denied + const got_b = try agents.tryLock(b, "shared.zig", 60_000); + try testing.expect(got_b == false); + + // A releases + agents.releaseLock(a, "shared.zig"); + + // B can now lock + const got_b2 = try agents.tryLock(b, "shared.zig", 60_000); + try testing.expect(got_b2 == true); +} + + +test "agent: same-agent relock does not duplicate lock key" { + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + + const id = try agents.register("agent-relock"); + + try testing.expect(try agents.tryLock(id, "shared.zig", 60_000)); + try testing.expect(try agents.tryLock(id, "shared.zig", 60_000)); + + const agent = agents.agents.getPtr(id) orelse return error.TestUnexpectedResult; + try testing.expect(agent.locked_paths.count() == 1); + + agents.releaseLock(id, "shared.zig"); + try testing.expect(agent.locked_paths.count() == 0); +} + + +test "agent: reapStale frees lock keys and clears map" { + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + + const id = try agents.register("agent-stale"); + try testing.expect(try agents.tryLock(id, "a.zig", 60_000)); + try testing.expect(try agents.tryLock(id, "b.zig", 60_000)); + + const agent = agents.agents.getPtr(id) orelse return error.TestUnexpectedResult; + agent.last_seen = 0; + agents.reapStale(0); + + try testing.expect(agent.state == .crashed); + try testing.expect(agent.locked_paths.count() == 0); +} + + +test "issue-411: tryLock grants new locks to a crashed agent" { + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + + const id = try agents.register("zombie"); + + // Force the agent into the crashed state via reapStale. + const a = agents.agents.getPtr(id) orelse return error.TestUnexpectedResult; + a.last_seen = 0; + agents.reapStale(0); + try testing.expectEqual(@as(@TypeOf(a.state), .crashed), a.state); + + // A crashed agent should not be allowed to acquire new advisory locks + // until it heartbeats back to .active. Today tryLock ignores .state and + // happily grants the lock — leaving the registry inconsistent (a + // .crashed agent suddenly holds fresh locks again). + const got = try agents.tryLock(id, "post-crash.zig", 60_000); + try testing.expect(got == false); +} + + +test "issue-401: insert with after=null is a no-op but consumes seq and writes file" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + const rel_path = try std.fmt.allocPrint(testing.allocator, ".zig-cache/tmp/{s}/edit-401.txt", .{tmp.sub_path}); + defer testing.allocator.free(rel_path); + + const original = "line 1\nline 2\nline 3\n"; + var file = try tmp.dir.createFile(io, "edit-401.txt", .{}); + defer file.close(io); + try file.writeStreamingAll(io, original); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + const agent_id = try agents.register("issue-401-agent"); + + // insert without after must not silently succeed and must not consume a seq. + const res = edit_mod.applyEdit(io, testing.allocator, &store, &agents, null, .{ + .path = rel_path, + .agent_id = agent_id, + .op = .insert, + .after = null, + .content = "INJECT\n", + }); + // Either explicit error, or — at minimum — must not increment the store seq + // for an operation that did nothing. + if (res) |ok| { + _ = ok; + try testing.expectEqual(@as(u64, 0), store.currentSeq()); + } else |_| { + try testing.expectEqual(@as(u64, 0), store.currentSeq()); + } +} + + +test "issue-404: applyEdit corrupts CRLF line endings into mixed LF/CRLF" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + const rel_path = try std.fmt.allocPrint(testing.allocator, ".zig-cache/tmp/{s}/edit-404.txt", .{tmp.sub_path}); + defer testing.allocator.free(rel_path); + + // Windows-style CRLF original + const original = "alpha\r\nbeta\r\ngamma\r\n"; + var file = try tmp.dir.createFile(io, "edit-404.txt", .{}); + defer file.close(io); + try file.writeStreamingAll(io, original); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + const agent_id = try agents.register("issue-404-agent"); + + // Replace line 1 with new content (no trailing newline in replacement). + _ = try edit_mod.applyEdit(io, testing.allocator, &store, &agents, null, .{ + .path = rel_path, + .agent_id = agent_id, + .op = .replace, + .range = .{ 1, 1 }, + .content = "ALPHA", + }); + + const after = try std.Io.Dir.cwd().readFileAlloc(io, rel_path, testing.allocator, .limited(10 * 1024)); + defer testing.allocator.free(after); + + // The original file used CRLF line endings. After a single-line replace + // the file must still be a valid CRLF file: every '\n' must be preceded + // by '\r'. Currently splitScalar on '\n' leaves the '\r' attached to the + // *unchanged* lines (e.g. "beta\r"), and the rejoin uses bare "\n", so + // the new line 1 lacks its CR while the surviving line 2 still has it — + // mixed line endings. + var i: usize = 0; + while (i < after.len) : (i += 1) { + if (after[i] == '\n') { + try testing.expect(i > 0); + try testing.expectEqual(@as(u8, '\r'), after[i - 1]); + } + } +} + + +test "issue-409: replacing whole file with empty content leaves a stray newline" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + const rel_path = try std.fmt.allocPrint(testing.allocator, ".zig-cache/tmp/{s}/edit-409.txt", .{tmp.sub_path}); + defer testing.allocator.free(rel_path); + + // Single-line file with trailing newline. + const original = "abc\n"; + var file = try tmp.dir.createFile(io, "edit-409.txt", .{}); + defer file.close(io); + try file.writeStreamingAll(io, original); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + const agent_id = try agents.register("issue-409-agent"); + + // Replace the only line with empty content. The caller's intent is "make + // this file empty" — content has zero bytes. + const result = try edit_mod.applyEdit(io, testing.allocator, &store, &agents, null, .{ + .path = rel_path, + .agent_id = agent_id, + .op = .replace, + .range = .{ 1, 1 }, + .content = "", + }); + + const after = try std.Io.Dir.cwd().readFileAlloc(io, rel_path, testing.allocator, .limited(10 * 1024)); + defer testing.allocator.free(after); + + // Expectation: the file is empty. Currently the file ends up as "\n" + // because applyEdit unconditionally restores the trailing newline that + // existed in the source, even after the replacement reduced the file + // to a single empty line. + try testing.expectEqual(@as(usize, 0), after.len); + try testing.expectEqual(@as(u64, 0), result.new_size); +} + + +test "issue-101: Store.max_versions is configurable (caps per-file history)" { + // Default cap is 100. After setting max_versions = 3, writing 5 versions + // of the same file must leave exactly 3 in-memory. + var store = Store.init(testing.allocator); + defer store.deinit(); + + store.max_versions = 3; + + _ = try store.recordSnapshot("foo.zig", 10, 0x111); + _ = try store.recordSnapshot("foo.zig", 20, 0x222); + _ = try store.recordSnapshot("foo.zig", 30, 0x333); + _ = try store.recordSnapshot("foo.zig", 40, 0x444); + _ = try store.recordSnapshot("foo.zig", 50, 0x555); + + const entry = store.files.get("foo.zig") orelse return error.MissingFile; + try testing.expectEqual(@as(usize, 3), entry.versions.items.len); + // Oldest two dropped — newest survives. + try testing.expectEqual(@as(u64, 0x555), entry.versions.items[2].hash); +} + + +test "issue-102: Explorer.init capacity flows to ContentCache" { + // Verifies that the capacity arg to Explorer.init actually sets the + // ContentCache capacity — the bug that issue-102 was filed for. + var explorer = Explorer.init(testing.allocator, 8); + defer explorer.deinit(); + + try testing.expectEqual(@as(u32, 8), explorer.contents.capacity); +} + + +test "issue-101+102: .codedbrc max_cached threads through to ContentCache capacity" { + // End-to-end: parse a .codedbrc body, construct Explorer with the parsed + // max_cached, verify the ContentCache capacity matches. + const body = + \\# test config + \\max_versions = 7 + \\max_cached = 32 + \\ + ; + const cfg = try Config.parse(body); + try testing.expectEqual(@as(usize, 7), cfg.max_versions); + try testing.expectEqual(@as(u32, 32), cfg.max_cached); + + var store = Store.init(testing.allocator); + defer store.deinit(); + store.max_versions = cfg.max_versions; + + var explorer = Explorer.init(testing.allocator, cfg.max_cached); + defer explorer.deinit(); + + try testing.expectEqual(@as(usize, 7), store.max_versions); + try testing.expectEqual(@as(u32, 32), explorer.contents.capacity); +} + diff --git a/src/test_explore.zig b/src/test_explore.zig new file mode 100644 index 0000000..3f379cf --- /dev/null +++ b/src/test_explore.zig @@ -0,0 +1,1943 @@ +const std = @import("std"); +const cio = @import("cio.zig"); +const testing = std.testing; +const io = std.testing.io; +const Store = @import("store.zig").Store; +const Explorer = @import("explore.zig").Explorer; +const SearchResult = @import("explore.zig").SearchResult; +const WordIndex = @import("index.zig").WordIndex; +const TrigramIndex = @import("index.zig").TrigramIndex; +const SparseNgramIndex = @import("index.zig").SparseNgramIndex; +const WordTokenizer = @import("index.zig").WordTokenizer; +const splitIdentifier = @import("index.zig").splitIdentifier; +const explore = @import("explore.zig"); +const extractLines = explore.extractLines; +const isCommentOrBlank = explore.isCommentOrBlank; +const Language = explore.Language; +const SymbolKind = explore.SymbolKind; +const DependencyGraph = explore.DependencyGraph; +const SymbolLocation = explore.SymbolLocation; +const watcher = @import("watcher.zig"); +const git_mod = @import("git.zig"); +const snapshot_mod = @import("snapshot.zig"); + + +test "word tokenizer" { + var tok = WordTokenizer{ .buf = "pub fn main() !void {" }; + const w1 = tok.next().?; + try testing.expectEqualStrings("pub", w1); + const w2 = tok.next().?; + try testing.expectEqualStrings("fn", w2); + const w3 = tok.next().?; + try testing.expectEqualStrings("main", w3); + const w4 = tok.next().?; + try testing.expectEqualStrings("void", w4); + try testing.expect(tok.next() == null); +} + + +test "word index: index and search" { + var wi = WordIndex.init(testing.allocator); + defer wi.deinit(); + + try wi.indexFile("src/foo.zig", "pub fn hello() void {\n const x = 42;\n}\n"); + + const hits = wi.search("hello"); + try testing.expect(hits.len > 0); + try testing.expectEqualStrings("src/foo.zig", wi.hitPath(hits[0])); + try testing.expect(hits[0].line_num == 1); + + // "x" is only 1 char, should be skipped + const x_hits = wi.search("x"); + try testing.expect(x_hits.len == 0); + + // "const" should be found + const const_hits = wi.search("const"); + try testing.expect(const_hits.len > 0); + try testing.expect(const_hits[0].line_num == 2); +} + + +test "word index: re-index clears old entries" { + var wi = WordIndex.init(testing.allocator); + defer wi.deinit(); + + try wi.indexFile("f.zig", "fn old_func() void {}"); + try testing.expect(wi.search("old_func").len > 0); + + try wi.indexFile("f.zig", "fn new_func() void {}"); + try testing.expect(wi.search("old_func").len == 0); + try testing.expect(wi.search("new_func").len > 0); +} + + +test "word index: removeFile" { + var wi = WordIndex.init(testing.allocator); + defer wi.deinit(); + + try wi.indexFile("a.zig", "fn hello() void {}"); + try testing.expect(wi.search("hello").len > 0); + + wi.removeFile("a.zig"); + try testing.expect(wi.search("hello").len == 0); +} + + +test "word index: deduped search" { + var wi = WordIndex.init(testing.allocator); + defer wi.deinit(); + + // "hello" appears twice on the same line — should dedup + try wi.indexFile("f.zig", "hello hello world"); + + const hits = try wi.searchDeduped("hello", testing.allocator); + defer testing.allocator.free(hits); + try testing.expect(hits.len == 1); +} + + +test "explorer: sparse ngram index integrated into searchContent" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("src/alpha.zig", "pub fn processRequest(req: *Request) void {}"); + try explorer.indexFile("src/beta.zig", "pub fn handleResponse(res: *Response) void {}"); + + const results = try explorer.searchContent("processRequest", arena.allocator(), 10); + try testing.expectEqual(@as(usize, 1), results.len); + try testing.expectEqualStrings("src/alpha.zig", results[0].path); +} + + +test "explorer: searchContent finds query embedded in longer identifier" { + // Verify that searchContent correctly finds files whose content contains + // the query string. The sparse index (sliding-window) and trigram index + // are both used; the intersection narrows results without false negatives. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + // "alpha.zig" content contains "record"; "beta.zig" does not. + try explorer.indexFile("alpha.zig", "const record_count: usize = 0;"); + try explorer.indexFile("beta.zig", "const unrelated_data: usize = 0;"); + + const results = try explorer.searchContent("record", arena.allocator(), 10); + var found = false; + for (results) |r| if (std.mem.eql(u8, r.path, "alpha.zig")) { + found = true; + }; + try testing.expect(found); +} + + +test "explorer: index file and get outline" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("test.zig", + \\const std = @import("std"); + \\pub fn main() !void {} + \\pub const Store = struct {}; + ); + + var outline = (try explorer.getOutline("test.zig", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + try testing.expect(outline.line_count == 3); + try testing.expect(outline.symbols.items.len == 3); +} + + +test "explorer: findSymbol" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("a.zig", "pub fn alpha() void {}"); + try explorer.indexFile("b.zig", "pub fn beta() void {}"); + + const result = try explorer.findSymbol("alpha", arena.allocator()); + try testing.expect(result != null); + try testing.expectEqualStrings("a.zig", result.?.path); +} + + +test "explorer: findAllSymbols returns multiple" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("a.zig", "const Store = @import(\"store.zig\").Store;"); + try explorer.indexFile("b.zig", "pub const Store = struct {};"); + + const results = try explorer.findAllSymbols("Store", arena.allocator()); + defer arena.allocator().free(results); + try testing.expect(results.len == 2); +} + + +test "explorer: searchContent with trigram acceleration" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("store.zig", "pub fn recordSnapshot(self: *Store) void {}\npub fn init() void {}"); + try explorer.indexFile("agent.zig", "pub fn register(self: *Agent) void {}"); + + const results = try explorer.searchContent("recordSnapshot", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + try testing.expect(results.len == 1); + try testing.expectEqualStrings("store.zig", results[0].path); + try testing.expect(results[0].line_num == 1); +} + + +test "explorer: searchWord via inverted index" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("math.zig", "pub fn add(a: i32, b: i32) i32 { return a + b; }"); + + const hits = try explorer.searchWord("add", testing.allocator); + defer testing.allocator.free(hits); + try testing.expect(hits.len > 0); + try testing.expectEqualStrings("math.zig", explorer.word_index.hitPath(hits[0])); +} + + +test "explorer: removeFile cleans up everything" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("gone.zig", "pub fn doStuff() void {}"); + var before_remove = (try explorer.getOutline("gone.zig", testing.allocator)) orelse return error.TestUnexpectedResult; + before_remove.deinit(); + + explorer.removeFile("gone.zig"); + try testing.expect((try explorer.getOutline("gone.zig", testing.allocator)) == null); + try testing.expect((try explorer.findSymbol("doStuff", testing.allocator)) == null); +} + + +test "explorer: python parser" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("app.py", + \\import os + \\class Server: + \\ def handle(self): + \\ pass + ); + + var outline = (try explorer.getOutline("app.py", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + try testing.expect(outline.symbols.items.len == 3); // import, class, def +} + + +test "explorer: typescript parser" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("index.ts", + \\import { foo } from './foo'; + \\export function handleRequest() {} + \\export const PORT = 3000; + ); + + var outline = (try explorer.getOutline("index.ts", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + try testing.expect(outline.symbols.items.len >= 3); +} + + +test "explorer: reindex OOM keeps prior outline reachable" { + // Use a real allocator for the explorer so the first indexFile always succeeds. + // We can't use FailingAllocator for the whole explorer because deinit would crash. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("oom.zig", "pub fn oldName() void {}"); + + // Now try re-indexing the same file. Since the explorer uses testing.allocator, + // we can't make individual internal allocs fail without a custom allocator wrapper. + // Instead, verify the errdefer rollback logic by confirming a successful reindex + // replaces the old outline, and that data is consistent. + try explorer.indexFile("oom.zig", "pub fn newName() void {}\nconst VALUE = 1;"); + + var outline = (try explorer.getOutline("oom.zig", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + try testing.expectEqualStrings("oom.zig", outline.path); + try testing.expect(outline.symbols.items.len == 2); // newName + VALUE + + // Old content should be replaced + const old_results = try explorer.searchContent("oldName", testing.allocator, 10); + defer { + for (old_results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(old_results); + } + try testing.expect(old_results.len == 0); + + // New content should be searchable + const new_results = try explorer.searchContent("newName", testing.allocator, 10); + defer { + for (new_results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(new_results); + } + try testing.expect(new_results.len == 1); +} + + +test "explorer: getOutline clone OOM preserves source outline" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile( + "clone-oom.zig", + "pub fn keepA() void {}\nconst dep = @import(\"dep.zig\");\npub const Value = 1;", + ); + + var induced_oom = false; + var fail_index: usize = 0; + while (fail_index < 512 and !induced_oom) : (fail_index += 1) { + var failing = std.testing.FailingAllocator.init(testing.allocator, .{ .fail_index = fail_index }); + const result = explorer.getOutline("clone-oom.zig", failing.allocator()); + + if (result) |maybe_outline| { + var outline = maybe_outline orelse return error.TestUnexpectedResult; + outline.deinit(); + continue; + } else |err| { + if (err != error.OutOfMemory) return err; + induced_oom = true; + + var stable = (try explorer.getOutline("clone-oom.zig", testing.allocator)) orelse return error.TestUnexpectedResult; + defer stable.deinit(); + try testing.expect(stable.symbols.items.len >= 2); + try testing.expect(stable.imports.items.len == 1); + try testing.expectEqualStrings("dep.zig", stable.imports.items[0]); + } + } + + try testing.expect(induced_oom); +} + + +test "explorer: outline copy survives source removal" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("persist.zig", "pub fn keep() void {}"); + var outline = (try explorer.getOutline("persist.zig", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + + explorer.removeFile("persist.zig"); + + try testing.expectEqualStrings("persist.zig", outline.path); + try testing.expect(outline.symbols.items.len > 0); +} + + +test "explorer: removeFile frees owned map key" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + var i: usize = 0; + while (i < 128) : (i += 1) { + var path_buf: [48]u8 = undefined; + const path = try std.fmt.bufPrint(&path_buf, "tmp/remove-{d}.zig", .{i}); + try explorer.indexFile(path, "pub fn x() void {}"); + explorer.removeFile(path); + } + + try testing.expect(explorer.outlines.count() == 0); + try testing.expect(explorer.contents.count() == 0); + try testing.expect(explorer.dep_graph.count() == 0); +} + + +test "word index: removeFile prunes empty buckets" { + var wi = WordIndex.init(testing.allocator); + defer wi.deinit(); + + try wi.indexFile("a.zig", "uniqueWordOnlyHere anotherUnique"); + // Words should exist + try testing.expect(wi.search("uniqueWordOnlyHere").len > 0); + + wi.removeFile("a.zig"); + // After removal, buckets should be pruned (not just emptied) + try testing.expect(wi.search("uniqueWordOnlyHere").len == 0); +} + + +test "extractLines: basic range with line numbers" { + const content = "line1\nline2\nline3\nline4\nline5"; + const result = try extractLines(content, 2, 4, true, false, .unknown, testing.allocator); + defer testing.allocator.free(result); + try testing.expect(std.mem.indexOf(u8, result, " 2 | line2") != null); + try testing.expect(std.mem.indexOf(u8, result, " 3 | line3") != null); + try testing.expect(std.mem.indexOf(u8, result, " 4 | line4") != null); + try testing.expect(std.mem.indexOf(u8, result, "line1") == null); + try testing.expect(std.mem.indexOf(u8, result, "line5") == null); +} + + +test "extractLines: start beyond file returns empty" { + const content = "line1\nline2"; + const result = try extractLines(content, 10, 20, true, false, .unknown, testing.allocator); + defer testing.allocator.free(result); + try testing.expect(result.len == 0); +} + + +test "extractLines: compact skips comments and blanks" { + const content = "fn main() void {}\n// this is a comment\n\n return 0;\n}"; + const result = try extractLines(content, 1, 5, false, true, .zig, testing.allocator); + defer testing.allocator.free(result); + // Should contain code lines but not the comment or blank line + try testing.expect(std.mem.indexOf(u8, result, "fn main") != null); + try testing.expect(std.mem.indexOf(u8, result, "// this is a comment") == null); + try testing.expect(std.mem.indexOf(u8, result, "return 0") != null); +} + + +test "isCommentOrBlank: detects language-specific comments" { + try testing.expect(isCommentOrBlank(" // zig comment", .zig)); + try testing.expect(isCommentOrBlank(" # python comment", .python)); + try testing.expect(isCommentOrBlank(" /* c comment */", .c)); + try testing.expect(isCommentOrBlank(" * continuation", .javascript)); + try testing.expect(isCommentOrBlank(" ", .zig)); + try testing.expect(isCommentOrBlank("", .zig)); + try testing.expect(!isCommentOrBlank(" const x = 1;", .zig)); + try testing.expect(!isCommentOrBlank(" x = 1", .python)); + // unknown language: never strips + try testing.expect(!isCommentOrBlank("// comment", .unknown)); +} + + +test "explorer: getSymbolBody returns source lines" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try exp.indexFile("test.zig", "const std = @import(\"std\");\npub fn main() !void {}\npub const Store = struct {};"); + + const body = try exp.getSymbolBody("test.zig", 2, 2, testing.allocator); + if (body) |b| { + defer testing.allocator.free(b); + try testing.expect(std.mem.indexOf(u8, b, "pub fn main") != null); + } else { + return error.TestUnexpectedResult; + } +} + + +test "explorer: getSymbolBody returns null for unknown file" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + const body = try exp.getSymbolBody("nonexistent.zig", 1, 5, testing.allocator); + try testing.expect(body == null); +} + + +test "explorer: searchContentWithScope annotates results" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + // Use content where the search match line has no symbol definition itself + try exp.indexFile("auth.zig", "pub fn handleAuth() void {\n validate(token);\n}"); + + const results = try exp.searchContentWithScope("validate", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + if (r.scope_name) |n| testing.allocator.free(n); + } + testing.allocator.free(results); + } + + try testing.expect(results.len == 1); + try testing.expectEqualStrings("auth.zig", results[0].path); + try testing.expect(results[0].line_num == 2); + // Should have scope annotation — nearest preceding symbol is handleAuth + try testing.expect(results[0].scope_name != null); + try testing.expectEqualStrings("handleAuth", results[0].scope_name.?); +} + + +test "explorer: searchContentWithScope no scope for standalone line" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + // Content with no symbols — scope should be null + try exp.indexFile("data.txt", "hello world\nfoo bar"); + + const results = try exp.searchContentWithScope("hello", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + if (r.scope_name) |n| testing.allocator.free(n); + } + testing.allocator.free(results); + } + + try testing.expect(results.len == 1); + try testing.expect(results[0].scope_name == null); +} + + +test "content hash: Wyhash produces consistent hash" { + const content = "pub fn main() void {}"; + const hash1 = std.hash.Wyhash.hash(0, content); + const hash2 = std.hash.Wyhash.hash(0, content); + try testing.expect(hash1 == hash2); + // Different content produces different hash + const hash3 = std.hash.Wyhash.hash(0, "different content"); + try testing.expect(hash1 != hash3); +} + + +test "detectLanguage: public access and correct detection" { + try testing.expect(explore.detectLanguage("src/main.zig") == .zig); + try testing.expect(explore.detectLanguage("app.py") == .python); + try testing.expect(explore.detectLanguage("index.ts") == .typescript); + try testing.expect(explore.detectLanguage("style.css") == .css); +} + + +test "extractLines: without line numbers" { + const content = "alpha\nbeta\ngamma"; + const result = try extractLines(content, 1, 3, false, false, .unknown, testing.allocator); + defer testing.allocator.free(result); + try testing.expectEqualStrings("alpha\nbeta\ngamma\n", result); +} + + +test "extractLines: start only reads to EOF" { + const content = "a\nb\nc\nd\ne"; + const result = try extractLines(content, 3, std.math.maxInt(u32), true, false, .unknown, testing.allocator); + defer testing.allocator.free(result); + try testing.expect(std.mem.indexOf(u8, result, " 3 | c") != null); + try testing.expect(std.mem.indexOf(u8, result, " 4 | d") != null); + try testing.expect(std.mem.indexOf(u8, result, " 5 | e") != null); + try testing.expect(std.mem.indexOf(u8, result, "| a") == null); + try testing.expect(std.mem.indexOf(u8, result, "| b") == null); +} + + +test "extractLines: end beyond file clamps to EOF" { + const content = "x\ny\nz"; + const result = try extractLines(content, 2, 999, true, false, .unknown, testing.allocator); + defer testing.allocator.free(result); + try testing.expect(std.mem.indexOf(u8, result, " 2 | y") != null); + try testing.expect(std.mem.indexOf(u8, result, " 3 | z") != null); + // No crash, no garbage — just the available lines + try testing.expect(std.mem.count(u8, result, "\n") == 2); +} + + +test "extractLines: single line range (start == end)" { + const content = "one\ntwo\nthree"; + const result = try extractLines(content, 2, 2, true, false, .unknown, testing.allocator); + defer testing.allocator.free(result); + try testing.expect(std.mem.indexOf(u8, result, " 2 | two") != null); + try testing.expect(std.mem.count(u8, result, "\n") == 1); +} + + +test "extractLines: empty content returns single empty line" { + const result = try extractLines("", 1, 10, true, false, .unknown, testing.allocator); + defer testing.allocator.free(result); + // Empty string splits to one empty line, which is line 1 + try testing.expect(result.len > 0); +} + + +test "extractLines: compact with Python comments" { + const content = "# comment\nimport os\n\ndef hello():\n # inline comment\n print('hi')"; + const result = try extractLines(content, 1, 6, false, true, .python, testing.allocator); + defer testing.allocator.free(result); + try testing.expect(std.mem.indexOf(u8, result, "# comment") == null); + try testing.expect(std.mem.indexOf(u8, result, "# inline comment") == null); + try testing.expect(std.mem.indexOf(u8, result, "import os") != null); + try testing.expect(std.mem.indexOf(u8, result, "def hello") != null); + try testing.expect(std.mem.indexOf(u8, result, "print('hi')") != null); +} + + +test "extractLines: compact with JS/TS comments" { + const content = "// header\nconst x = 1;\n/* block */\n* star line\nexport default x;"; + const result = try extractLines(content, 1, 5, false, true, .typescript, testing.allocator); + defer testing.allocator.free(result); + try testing.expect(std.mem.indexOf(u8, result, "// header") == null); + try testing.expect(std.mem.indexOf(u8, result, "/* block */") == null); + try testing.expect(std.mem.indexOf(u8, result, "* star line") == null); + try testing.expect(std.mem.indexOf(u8, result, "const x = 1;") != null); + try testing.expect(std.mem.indexOf(u8, result, "export default x;") != null); +} + + +test "isCommentOrBlank: rust double-slash" { + try testing.expect(isCommentOrBlank(" // rust comment", .rust)); + try testing.expect(!isCommentOrBlank(" let x = 1;", .rust)); +} + + +test "isCommentOrBlank: go double-slash" { + try testing.expect(isCommentOrBlank(" // go comment", .go_lang)); + try testing.expect(!isCommentOrBlank(" func main() {", .go_lang)); +} + + +test "isCommentOrBlank: dart comments" { + try testing.expect(isCommentOrBlank(" // dart comment", .dart)); + try testing.expect(isCommentOrBlank(" /* dart block comment */", .dart)); + try testing.expect(!isCommentOrBlank(" class WidgetBuilder {}", .dart)); +} + + +test "isCommentOrBlank: cpp block and line comments" { + try testing.expect(isCommentOrBlank(" // cpp line comment", .cpp)); + try testing.expect(isCommentOrBlank(" /* cpp block comment */", .cpp)); + try testing.expect(isCommentOrBlank(" * continued block comment", .cpp)); + try testing.expect(!isCommentOrBlank(" int x = 0;", .cpp)); +} + + +test "isCommentOrBlank: detected extension language comments" { + try testing.expect(isCommentOrBlank(" // java line comment", .java)); + try testing.expect(isCommentOrBlank(" // kotlin line comment", .kotlin)); + try testing.expect(isCommentOrBlank(" ", .svelte)); + try testing.expect(isCommentOrBlank(" ", .vue)); + try testing.expect(isCommentOrBlank(" ", .astro)); + try testing.expect(isCommentOrBlank(" # shell comment", .shell)); + try testing.expect(isCommentOrBlank(" /* css block comment */", .css)); + try testing.expect(isCommentOrBlank(" // scss line comment", .scss)); + try testing.expect(isCommentOrBlank(" -- sql comment", .sql)); + try testing.expect(isCommentOrBlank(" // proto comment", .protobuf)); + try testing.expect(isCommentOrBlank(" ! fortran comment", .fortran)); + try testing.expect(isCommentOrBlank(" ; llvm ir comment", .llvm_ir)); + try testing.expect(isCommentOrBlank(" // mlir comment", .mlir)); + try testing.expect(isCommentOrBlank(" // tablegen comment", .tablegen)); + try testing.expect(!isCommentOrBlank(" SELECT * FROM users;", .sql)); +} + + +test "isCommentOrBlank: tabs and mixed whitespace" { + try testing.expect(isCommentOrBlank("\t\t// tabbed comment", .zig)); + try testing.expect(isCommentOrBlank(" \t \t ", .zig)); + try testing.expect(isCommentOrBlank("\t", .python)); +} + + +test "isCommentOrBlank: markdown and json never strip" { + try testing.expect(!isCommentOrBlank("# heading", .markdown)); + try testing.expect(!isCommentOrBlank("// not a comment in json", .json)); + try testing.expect(!isCommentOrBlank("# not a comment in yaml", .yaml)); +} + + +test "explorer: getSymbolBody multi-line range" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + const content = "line1\nline2\nline3\nline4\nline5"; + try exp.indexFile("multi.zig", content); + + const body = try exp.getSymbolBody("multi.zig", 2, 4, testing.allocator); + if (body) |b| { + defer testing.allocator.free(b); + try testing.expect(std.mem.indexOf(u8, b, "line2") != null); + try testing.expect(std.mem.indexOf(u8, b, "line3") != null); + try testing.expect(std.mem.indexOf(u8, b, "line4") != null); + try testing.expect(std.mem.indexOf(u8, b, "line1") == null); + try testing.expect(std.mem.indexOf(u8, b, "line5") == null); + } else { + return error.TestUnexpectedResult; + } +} + + +test "explorer: getSymbolBody range beyond file length" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try exp.indexFile("short.zig", "only\ntwo"); + const body = try exp.getSymbolBody("short.zig", 1, 100, testing.allocator); + if (body) |b| { + defer testing.allocator.free(b); + try testing.expect(std.mem.indexOf(u8, b, "only") != null); + try testing.expect(std.mem.indexOf(u8, b, "two") != null); + } else { + return error.TestUnexpectedResult; + } +} + + +test "explorer: searchContentWithScope across multiple files" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try exp.indexFile("a.zig", "pub fn foo() void {\n doWork();\n}"); + try exp.indexFile("b.zig", "pub fn bar() void {\n doWork();\n}"); + + const results = try exp.searchContentWithScope("doWork", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + if (r.scope_name) |n| testing.allocator.free(n); + } + testing.allocator.free(results); + } + + try testing.expect(results.len == 2); + for (results) |r| { + try testing.expect(r.scope_name != null); + try testing.expect(r.line_num == 2); + } +} + + +test "explorer: searchContentWithScope respects max_results" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try exp.indexFile("many.zig", "pub fn a() void {\n target();\n target();\n target();\n target();\n}"); + + const results = try exp.searchContentWithScope("target", testing.allocator, 2); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + if (r.scope_name) |n| testing.allocator.free(n); + } + testing.allocator.free(results); + } + + try testing.expect(results.len == 2); +} + + +test "explorer: searchContentWithScope no results for missing query" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try exp.indexFile("empty.zig", "pub fn main() void {}"); + + const results = try exp.searchContentWithScope("nonexistent_xyz", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + if (r.scope_name) |n| testing.allocator.free(n); + } + testing.allocator.free(results); + } + + try testing.expect(results.len == 0); +} + + +test "content hash: format as hex string" { + const content = "hello world"; + const hash = std.hash.Wyhash.hash(0, content); + var buf: [16]u8 = undefined; + const hex = std.fmt.bufPrint(&buf, "{x}", .{hash}) catch unreachable; + for (hex) |c| { + try testing.expect((c >= '0' and c <= '9') or (c >= 'a' and c <= 'f')); + } + // Consistent on same content + const hash2 = std.hash.Wyhash.hash(0, content); + var buf2: [16]u8 = undefined; + const hex2 = std.fmt.bufPrint(&buf2, "{x}", .{hash2}) catch unreachable; + try testing.expectEqualStrings(hex, hex2); +} + + +test "content hash: empty content hashes consistently" { + const h1 = std.hash.Wyhash.hash(0, ""); + const h2 = std.hash.Wyhash.hash(0, ""); + try testing.expect(h1 == h2); +} + + +test "detectLanguage: all supported extensions" { + try testing.expect(explore.detectLanguage("main.zig") == .zig); + try testing.expect(explore.detectLanguage("lib.c") == .c); + try testing.expect(explore.detectLanguage("util.h") == .c); + try testing.expect(explore.detectLanguage("app.cpp") == .cpp); + try testing.expect(explore.detectLanguage("app.hpp") == .cpp); + try testing.expect(explore.detectLanguage("app.cc") == .cpp); + try testing.expect(explore.detectLanguage("app.hh") == .cpp); + try testing.expect(explore.detectLanguage("app.cxx") == .cpp); + try testing.expect(explore.detectLanguage("app.hxx") == .cpp); + try testing.expect(explore.detectLanguage("bridge.mm") == .cpp); + try testing.expect(explore.detectLanguage("script.py") == .python); + try testing.expect(explore.detectLanguage("app.js") == .javascript); + try testing.expect(explore.detectLanguage("comp.jsx") == .javascript); + try testing.expect(explore.detectLanguage("app.ts") == .typescript); + try testing.expect(explore.detectLanguage("comp.tsx") == .typescript); + try testing.expect(explore.detectLanguage("main.rs") == .rust); + try testing.expect(explore.detectLanguage("main.go") == .go_lang); + try testing.expect(explore.detectLanguage("app.dart") == .dart); + try testing.expect(explore.detectLanguage("README.md") == .markdown); + try testing.expect(explore.detectLanguage("pkg.json") == .json); + try testing.expect(explore.detectLanguage("config.yaml") == .yaml); + try testing.expect(explore.detectLanguage("config.yml") == .yaml); + try testing.expect(explore.detectLanguage("Main.java") == .java); + try testing.expect(explore.detectLanguage("App.kt") == .kotlin); + try testing.expect(explore.detectLanguage("Widget.svelte") == .svelte); + try testing.expect(explore.detectLanguage("Widget.vue") == .vue); + try testing.expect(explore.detectLanguage("Page.astro") == .astro); + try testing.expect(explore.detectLanguage("bootstrap.sh") == .shell); + try testing.expect(explore.detectLanguage("styles.css") == .css); + try testing.expect(explore.detectLanguage("styles.scss") == .scss); + try testing.expect(explore.detectLanguage("schema.sql") == .sql); + try testing.expect(explore.detectLanguage("service.proto") == .protobuf); + try testing.expect(explore.detectLanguage("solver.f90") == .fortran); + try testing.expect(explore.detectLanguage("module.ll") == .llvm_ir); + try testing.expect(explore.detectLanguage("dialect.mlir") == .mlir); + try testing.expect(explore.detectLanguage("records.td") == .tablegen); + try testing.expect(explore.detectLanguage("Makefile") == .unknown); + try testing.expect(explore.detectLanguage("no_ext") == .unknown); +} + + +test "explorer: getSymbolBody with line number format" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try exp.indexFile("fmt.zig", "const a = 1;\npub fn format() void {\n write();\n}\nconst b = 2;"); + + const body = try exp.getSymbolBody("fmt.zig", 2, 4, testing.allocator); + if (body) |b| { + defer testing.allocator.free(b); + try testing.expect(std.mem.indexOf(u8, b, " 2 |") != null); + try testing.expect(std.mem.indexOf(u8, b, " 3 |") != null); + try testing.expect(std.mem.indexOf(u8, b, " 4 |") != null); + try testing.expect(std.mem.indexOf(u8, b, "const a") == null); + try testing.expect(std.mem.indexOf(u8, b, "const b") == null); + } else { + return error.TestUnexpectedResult; + } +} + + +test "extractLines: compact preserves brace-only lines" { + const content = "fn main() void {\n // comment\n doWork();\n}"; + const result = try extractLines(content, 1, 4, false, true, .zig, testing.allocator); + defer testing.allocator.free(result); + try testing.expect(std.mem.indexOf(u8, result, "fn main") != null); + try testing.expect(std.mem.indexOf(u8, result, "}") != null); + try testing.expect(std.mem.indexOf(u8, result, "doWork") != null); + try testing.expect(std.mem.indexOf(u8, result, "// comment") == null); +} + + +test "extractLines: compact on all-comment file returns empty" { + const content = "// comment 1\n// comment 2\n// comment 3"; + const result = try extractLines(content, 1, 3, false, true, .zig, testing.allocator); + defer testing.allocator.free(result); + try testing.expect(result.len == 0); +} + + +test "explorer: searchContentRegex end-to-end" { + var explorer_inst = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer_inst.deinit(); + + try explorer_inst.indexFile("test1.zig", "pub fn recordSnapshot() void {}\nconst x = 42;"); + try explorer_inst.indexFile("test2.zig", "pub fn recordState() void {}\nconst y = 99;"); + try explorer_inst.indexFile("test3.zig", "const z = 0;\nfn other() void {}"); + + const results = try explorer_inst.searchContentRegex("record\\w+", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + try testing.expect(results.len >= 2); + // Both test1 and test2 should have matches + var found1 = false; + var found2 = false; + for (results) |r| { + if (std.mem.eql(u8, r.path, "test1.zig")) found1 = true; + if (std.mem.eql(u8, r.path, "test2.zig")) found2 = true; + } + try testing.expect(found1); + try testing.expect(found2); +} + + +test "explorer: searchContentRegex no match" { + var explorer_inst = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer_inst.deinit(); + + try explorer_inst.indexFile("only.zig", "const x = 42;"); + + const results = try explorer_inst.searchContentRegex("zzz\\d+qqq", testing.allocator, 50); + defer testing.allocator.free(results); + + try testing.expectEqual(@as(usize, 0), results.len); +} + + +test "git: getGitHead returns 40-char hex SHA in a git repo" { + // codedb itself is a git repo, so this should succeed + const head = try git_mod.getGitHead(".", testing.allocator); + try testing.expect(head != null); + const sha = head.?; + try testing.expectEqual(@as(usize, 40), sha.len); + for (sha) |c| { + try testing.expect(std.ascii.isHex(c)); + } +} + + +test "git: getGitHead returns null for non-git directory" { + // /tmp is not a git repo + const head = try git_mod.getGitHead("/tmp", testing.allocator); + try testing.expect(head == null); +} + + +test "thread-safe: concurrent TrigramIndex.candidates() with per-thread allocators" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + try ti.indexFile("a.zig", "pub fn handleRequest(ctx: *Context) void {}"); + try ti.indexFile("b.zig", "pub fn processData(buf: []u8) void {}"); + try ti.indexFile("c.zig", "pub fn handleRequest(req: Request) !void {}"); + const ThreadCtx = struct { + ti: *TrigramIndex, + errors: std.atomic.Value(u32) = std.atomic.Value(u32).init(0), + fn run(ctx: *@This()) void { + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + for (0..200) |_| { + const cands = ctx.ti.candidates("handleRequest", alloc) orelse continue; + defer alloc.free(cands); + var found = false; + for (cands) |p| { + if (std.mem.eql(u8, p, "a.zig") or std.mem.eql(u8, p, "c.zig")) found = true; + } + if (!found) _ = ctx.errors.fetchAdd(1, .monotonic); + } + } + }; + var ctx = ThreadCtx{ .ti = &ti }; + var threads: [4]std.Thread = undefined; + for (&threads) |*t| t.* = try std.Thread.spawn(.{}, ThreadCtx.run, .{&ctx}); + for (threads) |t| t.join(); + try testing.expectEqual(@as(u32, 0), ctx.errors.load(.monotonic)); +} + + +test "thread-safe: concurrent SparseNgramIndex.candidates() with per-thread allocators" { + var sni = SparseNgramIndex.init(testing.allocator); + defer sni.deinit(); + try sni.indexFile("x.zig", "pub fn handleRequest(ctx: *Context) void {}"); + try sni.indexFile("y.zig", "pub fn processData(buf: []u8) void {}"); + const ThreadCtx = struct { + sni: *SparseNgramIndex, + errors: std.atomic.Value(u32) = std.atomic.Value(u32).init(0), + fn run(ctx: *@This()) void { + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + for (0..200) |_| { + const cands = ctx.sni.candidates("handleRequest", alloc) orelse continue; + defer alloc.free(cands); + var found = false; + for (cands) |p| { + if (std.mem.eql(u8, p, "x.zig")) found = true; + } + if (!found) _ = ctx.errors.fetchAdd(1, .monotonic); + } + } + }; + var ctx = ThreadCtx{ .sni = &sni }; + var threads: [4]std.Thread = undefined; + for (&threads) |*t| t.* = try std.Thread.spawn(.{}, ThreadCtx.run, .{&ctx}); + for (threads) |t| t.join(); +} + + +test "issue-43: trigram_index swap in scanBg races with concurrent MCP queries" { + // Regression: the scanBg disk-load path must serialize trigram_index swaps + // with readers by taking exp.mu.lock() before replacing the index. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try exp.indexFile("a.zig", "pub fn handleAuth(token: []const u8) bool { return token.len > 0; }"); + + exp.mu.lockShared(); + + const SwapCtx = struct { + exp: *Explorer, + swapped: std.atomic.Value(bool) = std.atomic.Value(bool).init(false), + fn run(ctx: *@This()) void { + ctx.exp.mu.lock(); + defer ctx.exp.mu.unlock(); + ctx.exp.trigram_index.deinit(); + ctx.exp.trigram_index = .{ .heap = TrigramIndex.init(ctx.exp.allocator) }; + ctx.swapped.store(true, .release); + } + }; + var sctx = SwapCtx{ .exp = &exp }; + const t = try std.Thread.spawn(.{}, SwapCtx.run, .{&sctx}); + cio.sleepMs(10); + const raced = sctx.swapped.load(.acquire); + exp.mu.unlockShared(); + t.join(); + try testing.expect(!raced); +} + + +test "issue-116: getGitHead returns valid SHA for git repos" { + const git = @import("git.zig"); + + // This test runs inside the codedb repo itself + const head = git.getGitHead(".", testing.allocator) catch null; + + if (head) |h| { + try testing.expect(h.len == 40); + for (h) |c| { + try testing.expect(std.ascii.isHex(c)); + } + } +} + + +test "issue-224: codedb_symbol body=true returns full body — line_end populated" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("t.zig", + \\pub fn foo() u32 { + \\ const a: u32 = 1; + \\ const b: u32 = 2; + \\ return a + b; + \\} + ); + + const results = try explorer.findAllSymbols("foo", alloc); + defer alloc.free(results); + try testing.expect(results.len == 1); + + const sym = results[0].symbol; + try testing.expectEqual(@as(u32, 1), sym.line_start); + try testing.expectEqual(@as(u32, 5), sym.line_end); + + const body = (try explorer.getSymbolBody("t.zig", sym.line_start, sym.line_end, alloc)) orelse + return error.TestUnexpectedResult; + try testing.expect(std.mem.indexOf(u8, body, "pub fn foo()") != null); + try testing.expect(std.mem.indexOf(u8, body, "return a + b;") != null); +} + + +test "issue-224: Python def line_end covers full body" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("t.py", + \\def greet(name): + \\ msg = "hello" + \\ return msg + name + ); + + const results = try explorer.findAllSymbols("greet", alloc); + defer alloc.free(results); + try testing.expect(results.len == 1); + + const sym = results[0].symbol; + try testing.expectEqual(@as(u32, 1), sym.line_start); + try testing.expectEqual(@as(u32, 3), sym.line_end); +} + + +test "issue-108: detectLanguage handles .tf and .tfvars" { + try testing.expectEqual(Language.hcl, explore.detectLanguage("main.tf")); + try testing.expectEqual(Language.hcl, explore.detectLanguage("prod.tfvars")); + try testing.expectEqual(Language.hcl, explore.detectLanguage("config.hcl")); +} + + +test "issue-215: detectLanguage handles .r and .R" { + try testing.expectEqual(Language.r, explore.detectLanguage("script.r")); + try testing.expectEqual(Language.r, explore.detectLanguage("analysis.R")); +} + + +test "dep-graph: reverse index gives O(1) imported_by lookup" { + var graph = DependencyGraph.init(testing.allocator); + defer graph.deinit(); + + // main.zig imports store.zig and utils.zig + var deps1: std.ArrayList([]const u8) = .empty; + try deps1.append(testing.allocator, "store.zig"); + try deps1.append(testing.allocator, "utils.zig"); + try graph.setDeps("main.zig", deps1); + + // server.zig imports store.zig + var deps2: std.ArrayList([]const u8) = .empty; + try deps2.append(testing.allocator, "store.zig"); + try graph.setDeps("server.zig", deps2); + + // store.zig is imported by main.zig and server.zig + const imported_by = try graph.getImportedBy("store.zig", testing.allocator); + defer { + for (imported_by) |p| testing.allocator.free(p); + testing.allocator.free(imported_by); + } + try testing.expectEqual(@as(usize, 2), imported_by.len); + + // utils.zig is imported by main.zig only + const imported_by2 = try graph.getImportedBy("utils.zig", testing.allocator); + defer { + for (imported_by2) |p| testing.allocator.free(p); + testing.allocator.free(imported_by2); + } + try testing.expectEqual(@as(usize, 1), imported_by2.len); + try testing.expectEqualStrings("main.zig", imported_by2[0]); +} + + +test "dep-graph: setDeps removes old reverse edges" { + var graph = DependencyGraph.init(testing.allocator); + defer graph.deinit(); + + // main.zig initially imports store.zig + var deps1: std.ArrayList([]const u8) = .empty; + try deps1.append(testing.allocator, "store.zig"); + try graph.setDeps("main.zig", deps1); + + const before = try graph.getImportedBy("store.zig", testing.allocator); + defer { + for (before) |p| testing.allocator.free(p); + testing.allocator.free(before); + } + try testing.expectEqual(@as(usize, 1), before.len); + + // main.zig re-indexed, now imports utils.zig instead + var deps2: std.ArrayList([]const u8) = .empty; + try deps2.append(testing.allocator, "utils.zig"); + try graph.setDeps("main.zig", deps2); + + // store.zig should no longer have main.zig as a dependent + const after = try graph.getImportedBy("store.zig", testing.allocator); + defer { + for (after) |p| testing.allocator.free(p); + testing.allocator.free(after); + } + try testing.expectEqual(@as(usize, 0), after.len); + + // utils.zig should now have main.zig + const utils_deps = try graph.getImportedBy("utils.zig", testing.allocator); + defer { + for (utils_deps) |p| testing.allocator.free(p); + testing.allocator.free(utils_deps); + } + try testing.expectEqual(@as(usize, 1), utils_deps.len); +} + + +test "dep-graph: transitive dependents via BFS" { + var graph = DependencyGraph.init(testing.allocator); + defer graph.deinit(); + + // Build chain: app.zig -> server.zig -> store.zig -> utils.zig + var deps1: std.ArrayList([]const u8) = .empty; + try deps1.append(testing.allocator, "server.zig"); + try graph.setDeps("app.zig", deps1); + + var deps2: std.ArrayList([]const u8) = .empty; + try deps2.append(testing.allocator, "store.zig"); + try graph.setDeps("server.zig", deps2); + + var deps3: std.ArrayList([]const u8) = .empty; + try deps3.append(testing.allocator, "utils.zig"); + try graph.setDeps("store.zig", deps3); + + // Changing utils.zig affects store.zig, server.zig, app.zig transitively + const blast = try graph.getTransitiveDependents("utils.zig", testing.allocator, null); + defer { + for (blast) |p| testing.allocator.free(p); + testing.allocator.free(blast); + } + try testing.expectEqual(@as(usize, 3), blast.len); + + // With max_depth=1, only direct dependents + const shallow = try graph.getTransitiveDependents("utils.zig", testing.allocator, 1); + defer { + for (shallow) |p| testing.allocator.free(p); + testing.allocator.free(shallow); + } + try testing.expectEqual(@as(usize, 1), shallow.len); + try testing.expectEqualStrings("store.zig", shallow[0]); +} + + +test "dep-graph: transitive dependencies (forward BFS)" { + var graph = DependencyGraph.init(testing.allocator); + defer graph.deinit(); + + // app.zig -> server.zig -> store.zig -> utils.zig + var deps1: std.ArrayList([]const u8) = .empty; + try deps1.append(testing.allocator, "server.zig"); + try graph.setDeps("app.zig", deps1); + + var deps2: std.ArrayList([]const u8) = .empty; + try deps2.append(testing.allocator, "store.zig"); + try graph.setDeps("server.zig", deps2); + + var deps3: std.ArrayList([]const u8) = .empty; + try deps3.append(testing.allocator, "utils.zig"); + try graph.setDeps("store.zig", deps3); + + // app.zig transitively depends on server.zig, store.zig, utils.zig + const deps_all = try graph.getTransitiveDependencies("app.zig", testing.allocator, null); + defer { + for (deps_all) |p| testing.allocator.free(p); + testing.allocator.free(deps_all); + } + try testing.expectEqual(@as(usize, 3), deps_all.len); + + // Depth=2: app.zig -> server.zig -> store.zig (not utils.zig) + const deps_shallow = try graph.getTransitiveDependencies("app.zig", testing.allocator, 2); + defer { + for (deps_shallow) |p| testing.allocator.free(p); + testing.allocator.free(deps_shallow); + } + try testing.expectEqual(@as(usize, 2), deps_shallow.len); +} + + +test "dep-graph: remove cleans forward and reverse edges" { + var graph = DependencyGraph.init(testing.allocator); + defer graph.deinit(); + + var deps1: std.ArrayList([]const u8) = .empty; + try deps1.append(testing.allocator, "store.zig"); + try graph.setDeps("main.zig", deps1); + + var deps2: std.ArrayList([]const u8) = .empty; + try deps2.append(testing.allocator, "store.zig"); + try graph.setDeps("server.zig", deps2); + + try testing.expectEqual(@as(usize, 2), graph.count()); + + // Remove main.zig + graph.remove("main.zig"); + try testing.expectEqual(@as(usize, 1), graph.count()); + + // store.zig should only be imported by server.zig now + const imported_by = try graph.getImportedBy("store.zig", testing.allocator); + defer { + for (imported_by) |p| testing.allocator.free(p); + testing.allocator.free(imported_by); + } + try testing.expectEqual(@as(usize, 1), imported_by.len); + try testing.expectEqualStrings("server.zig", imported_by[0]); +} + + +test "dep-graph: cycle does not cause infinite BFS" { + var graph = DependencyGraph.init(testing.allocator); + defer graph.deinit(); + + // Create a cycle: a.zig -> b.zig -> c.zig -> a.zig + var deps1: std.ArrayList([]const u8) = .empty; + try deps1.append(testing.allocator, "b.zig"); + try graph.setDeps("a.zig", deps1); + + var deps2: std.ArrayList([]const u8) = .empty; + try deps2.append(testing.allocator, "c.zig"); + try graph.setDeps("b.zig", deps2); + + var deps3: std.ArrayList([]const u8) = .empty; + try deps3.append(testing.allocator, "a.zig"); + try graph.setDeps("c.zig", deps3); + + // Transitive dependents of a.zig — should terminate despite cycle + const blast = try graph.getTransitiveDependents("a.zig", testing.allocator, null); + defer { + for (blast) |p| testing.allocator.free(p); + testing.allocator.free(blast); + } + // b.zig and c.zig both transitively depend on a.zig + try testing.expectEqual(@as(usize, 2), blast.len); + + // Forward transitive deps from a.zig — should also terminate + const fwd = try graph.getTransitiveDependencies("a.zig", testing.allocator, null); + defer { + for (fwd) |p| testing.allocator.free(p); + testing.allocator.free(fwd); + } + try testing.expectEqual(@as(usize, 2), fwd.len); +} + + +test "dep-graph: Explorer integration — getImportedBy uses reverse index" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("store.zig", "pub const Store = struct {};"); + try explorer.indexFile("main.zig", "const store = @import(\"store.zig\");\npub fn main() void {}"); + try explorer.indexFile("server.zig", "const store = @import(\"store.zig\");\npub fn serve() void {}"); + + const deps = try explorer.getImportedBy("store.zig", testing.allocator); + defer { + for (deps) |d| testing.allocator.free(d); + testing.allocator.free(deps); + } + try testing.expectEqual(@as(usize, 2), deps.len); +} + + +test "dep-graph: Explorer transitive dependents" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("utils.zig", "pub fn helper() void {}"); + try explorer.indexFile("store.zig", "const utils = @import(\"utils.zig\");\npub const Store = struct {};"); + try explorer.indexFile("main.zig", "const store = @import(\"store.zig\");\npub fn main() void {}"); + + // Transitive: changing utils.zig affects store.zig and main.zig + const blast = try explorer.getTransitiveDependents("utils.zig", testing.allocator, null); + defer { + for (blast) |b| testing.allocator.free(b); + testing.allocator.free(blast); + } + try testing.expectEqual(@as(usize, 2), blast.len); +} + + +test "issue-445: dep-graph dedupes multi-aliased forward imports" { + // A file that imports the same dep under multiple aliases + // const idx = @import("index.zig"); + // const Index = @import("index.zig").Foo; + // const reset = @import("index.zig").resetFrequencyTable; + // produces multiple "index.zig" entries in outline.imports, which + // rebuildDepsFor previously appended verbatim — so getForwardDeps + // returned "index.zig" 5 times for src/main.zig in this very repo. + // The depends_on list should be unique by path. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("index.zig", "pub fn build() void {}"); + try explorer.indexFile("main.zig", + \\const idx = @import("index.zig"); + \\const Index = @import("index.zig").Foo; + \\const reset = @import("index.zig").resetFrequencyTable; + \\pub fn main() void {} + ); + + explorer.mu.lockShared(); + const fwd_opt = explorer.dep_graph.getForwardDeps("main.zig"); + explorer.mu.unlockShared(); + + try testing.expect(fwd_opt != null); + const fwd = fwd_opt.?; + try testing.expectEqual(@as(usize, 1), fwd.len); + try testing.expectEqualStrings("index.zig", fwd[0]); +} + + +test "symbol-index: O(1) findSymbol via symbol_index" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("math.zig", "pub fn add(a: i32, b: i32) i32 { return a + b; }\npub fn subtract(a: i32, b: i32) i32 { return a - b; }\n"); + try explorer.indexFile("utils.zig", "pub fn add(x: f64, y: f64) f64 { return x + y; }\npub fn format() void {}\n"); + + // findSymbol should return first match via index + const result = try explorer.findSymbol("add", testing.allocator); + try testing.expect(result != null); + const r = result.?; + defer { + testing.allocator.free(r.path); + testing.allocator.free(r.symbol.name); + if (r.symbol.detail) |d| testing.allocator.free(d); + } + try testing.expectEqualStrings("add", r.symbol.name); + + // findAllSymbols should return both + const all = try explorer.findAllSymbols("add", testing.allocator); + defer { + for (all) |s| { + testing.allocator.free(s.path); + testing.allocator.free(s.symbol.name); + if (s.symbol.detail) |d| testing.allocator.free(d); + } + testing.allocator.free(all); + } + try testing.expectEqual(@as(usize, 2), all.len); +} + + +test "symbol-index: removeFile cleans symbol_index" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("a.zig", "pub fn unique_func() void {}"); + const before = try explorer.findSymbol("unique_func", testing.allocator); + try testing.expect(before != null); + testing.allocator.free(before.?.path); + testing.allocator.free(before.?.symbol.name); + if (before.?.symbol.detail) |d| testing.allocator.free(d); + + explorer.removeFile("a.zig"); + + const after = try explorer.findSymbol("unique_func", testing.allocator); + try testing.expect(after == null); +} + + +test "symbol-index: re-index updates symbol_index" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("a.zig", "pub fn old_name() void {}"); + const r1 = try explorer.findSymbol("old_name", testing.allocator); + try testing.expect(r1 != null); + testing.allocator.free(r1.?.path); + testing.allocator.free(r1.?.symbol.name); + if (r1.?.symbol.detail) |d| testing.allocator.free(d); + + // Re-index same file with different content + try explorer.indexFile("a.zig", "pub fn new_name() void {}"); + const r2 = try explorer.findSymbol("old_name", testing.allocator); + try testing.expect(r2 == null); + + const r3 = try explorer.findSymbol("new_name", testing.allocator); + try testing.expect(r3 != null); + testing.allocator.free(r3.?.path); + testing.allocator.free(r3.?.symbol.name); + if (r3.?.symbol.detail) |d| testing.allocator.free(d); +} + + +test "word-index: splitIdentifier snake_case" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + var out: std.ArrayList([]const u8) = .empty; + defer out.deinit(a); + try splitIdentifier("get_or_put", &out, a); + + try testing.expectEqual(@as(usize, 3), out.items.len); + try testing.expectEqualStrings("get", out.items[0]); + try testing.expectEqualStrings("or", out.items[1]); + try testing.expectEqualStrings("put", out.items[2]); +} + + +test "word-index: splitIdentifier camelCase" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + var out: std.ArrayList([]const u8) = .empty; + defer out.deinit(a); + try splitIdentifier("validateToken", &out, a); + + try testing.expectEqual(@as(usize, 2), out.items.len); + try testing.expectEqualStrings("validate", out.items[0]); + try testing.expectEqualStrings("token", out.items[1]); +} + + +test "word-index: splitIdentifier acronym (HTTPHandler)" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + var out: std.ArrayList([]const u8) = .empty; + defer out.deinit(a); + try splitIdentifier("HTTPHandler", &out, a); + + try testing.expectEqual(@as(usize, 2), out.items.len); + try testing.expectEqualStrings("http", out.items[0]); + try testing.expectEqualStrings("handler", out.items[1]); +} + + +test "word-index: splitIdentifier simple word emits itself" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + var out: std.ArrayList([]const u8) = .empty; + defer out.deinit(a); + try splitIdentifier("handler", &out, a); + + try testing.expectEqual(@as(usize, 1), out.items.len); + try testing.expectEqualStrings("handler", out.items[0]); +} + + +test "word-index: sub-token search finds camelCase components" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("a.zig", "fn validateToken(x: u32) void {}"); + try explorer.indexFile("b.zig", "fn processRequest() void {}"); + + // "validate" should find validateToken via sub-token splitting + const r1 = try explorer.searchContent("validate", testing.allocator, 10); + defer { + for (r1) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(r1); + } + try testing.expectEqual(@as(usize, 1), r1.len); + try testing.expectEqualStrings("a.zig", r1[0].path); + + // "process" should find processRequest + const r2 = try explorer.searchContent("process", testing.allocator, 10); + defer { + for (r2) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(r2); + } + try testing.expectEqual(@as(usize, 1), r2.len); + try testing.expectEqualStrings("b.zig", r2[0].path); +} + + +test "word-index: sub-token search finds snake_case components" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("a.zig", "const http_handler = null;"); + + // "http" should find http_handler + const r1 = try explorer.searchContent("http", testing.allocator, 10); + defer { + for (r1) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(r1); + } + try testing.expect(r1.len >= 1); + + // "handler" should find http_handler + const r2 = try explorer.searchContent("handler", testing.allocator, 10); + defer { + for (r2) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(r2); + } + try testing.expect(r2.len >= 1); +} + + +test "word-index: case-insensitive lookup finds exact identifiers" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("a.zig", "fn validateToken() void {}"); + + // Case-insensitive search for the full identifier + const r1 = try explorer.searchContent("validatetoken", testing.allocator, 10); + defer { + for (r1) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(r1); + } + try testing.expectEqual(@as(usize, 1), r1.len); +} + + +test "word-index: searchPrefix finds extensions of a prefix" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + var wi = WordIndex.init(a); + + // Index a file with camelCase identifiers — splits produce sub-tokens + try wi.indexFile("a.zig", "fn searchContent() void {} fn searchConfig() void {}"); + + // "searchco" is a strict prefix of "searchcontent" and "searchconfig" + const hits = try wi.searchPrefix("searchco", a, 32); + try testing.expect(hits.len >= 1); +} + + +test "word-index: searchPrefix skips exact match (Tier 0 responsibility)" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + var wi = WordIndex.init(a); + + try wi.indexFile("a.zig", "fn searchContent() void {}"); + + // Exact key "search" exists (sub-token). searchPrefix should return 0 for exact key. + const hits_exact = try wi.searchPrefix("search", a, 32); + // "search" itself is in the index. Only keys STRICTLY longer are returned. + // "searchcontent" is longer, so we expect ≥1 result. + try testing.expect(hits_exact.len >= 1); + + // The hits must come from keys other than "search" itself. + // Verify by checking "searchc..." style prefix: + const hits_prefix = try wi.searchPrefix("searchco", a, 32); + try testing.expect(hits_prefix.len >= 1); +} + + +test "word-index: searchPrefix respects max_results cap" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + var wi = WordIndex.init(a); + + // Index many distinct files producing many keys that share the "fooBar" prefix. + var i: usize = 0; + while (i < 50) : (i += 1) { + const path = try std.fmt.allocPrint(a, "f{d}.zig", .{i}); + const content = try std.fmt.allocPrint(a, "fn fooBar{d}() void {{}}\n", .{i}); + try wi.indexFile(path, content); + } + + const cap: usize = 5; + const hits = try wi.searchPrefix("foobar", a, cap); + try testing.expect(hits.len <= cap); + try testing.expect(hits.len > 0); +} + + +test "integration: Tier 0.5 prefix expansion finds partial identifier" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("util.zig", "pub fn validateRequest(r: Request) bool { return true; }"); + + // "validateR" is a prefix of "validaterequest" in the word index + const results = try explorer.searchContent("validateR", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + try testing.expect(results.len >= 1); +} + + +test "issue-389: FilteredWalker yields symlinked source files" { + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + try tmp_dir.dir.createDirPath(io, "src"); + try tmp_dir.dir.writeFile(io, .{ .sub_path = "src/target.zig", .data = "pub fn linked() void {}\n// MARKER_LINE\n" }); + + // Create an in-workspace symlink: src/alias.zig -> target.zig (relative). + var src_dir = try tmp_dir.dir.openDir(io, "src", .{ .iterate = true }); + defer src_dir.close(io); + src_dir.symLink(io, "target.zig", "alias.zig", .{}) catch |err| switch (err) { + // If the OS denies symlinks (e.g. CI without privilege on Windows), + // skip the test rather than report a false negative. + error.AccessDenied => return error.SkipZigTest, + else => return err, + }; + + var root_buf: [std.fs.max_path_bytes]u8 = undefined; + const root_len = try tmp_dir.dir.realPathFile(io, ".", &root_buf); + const root = root_buf[0..root_len]; + + var store = Store.init(testing.allocator); + defer store.deinit(); + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + explorer.setRoot(io, root); + try watcher.initialScanWithWorkerCount(io, &store, &explorer, root, testing.allocator, false, 1); + + // Both the real file and the symlinked alias must be indexed. The bug at + // src/watcher.zig:319 drops every entry whose kind != .file, silently + // skipping symlinks even when they point at in-workspace source files. + try testing.expect(explorer.contents.contains("src/target.zig")); + try testing.expect(explorer.contents.contains("src/alias.zig")); +} + + +test "issue-405: FilteredWalker walks directory symlinks safely (cycle + escape)" { + // Follow-up to #389. The current FilteredWalker.next() (src/watcher.zig:319-323) + // treats sym_link entries as files when statFile reports .file, but silently + // drops sym_link entries whose target is a directory. Real repos rely on + // directory symlinks (monorepo package links, vendored deps, dotfile configs), + // so the indexer must walk them — but only safely. This test pins three things: + // 1. A file inside a symlinked subdirectory is indexed. + // 2. A symlink that introduces a cycle does not hang or duplicate entries. + // 3. (Implicit) The walker terminates in bounded time on the fixture. + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + // Real directory `pkg/` with one source file. + try tmp_dir.dir.createDirPath(io, "pkg"); + try tmp_dir.dir.writeFile(io, .{ .sub_path = "pkg/inside.zig", .data = "pub fn inside() void {}\n" }); + + // A real directory `app/` that holds a directory-symlink `linked_pkg -> ../pkg`. + // We expect the walker to descend into `linked_pkg` and yield `app/linked_pkg/inside.zig`. + try tmp_dir.dir.createDirPath(io, "app"); + var app_dir = try tmp_dir.dir.openDir(io, "app", .{ .iterate = true }); + defer app_dir.close(io); + app_dir.symLink(io, "../pkg", "linked_pkg", .{}) catch |err| switch (err) { + // Skip on platforms / CI configurations that deny symlink creation. + error.AccessDenied => return error.SkipZigTest, + else => return err, + }; + + // Cycle: `app/loop -> ..` points back at the workspace root. Without cycle + // detection a naive walker recurses forever via app/loop/app/loop/app/... + app_dir.symLink(io, "..", "loop", .{}) catch |err| switch (err) { + error.AccessDenied => return error.SkipZigTest, + else => return err, + }; + + var root_buf: [std.fs.max_path_bytes]u8 = undefined; + const root_len = try tmp_dir.dir.realPathFile(io, ".", &root_buf); + const root = root_buf[0..root_len]; + + var store = Store.init(testing.allocator); + defer store.deinit(); + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + explorer.setRoot(io, root); + try watcher.initialScanWithWorkerCount(io, &store, &explorer, root, testing.allocator, false, 1); + + // 1. The in-target file must appear under the symlinked path. This is the + // behaviour gap left by #389 — directory symlinks are currently ignored, + // so this assertion fails on main. + try testing.expect(explorer.contents.contains("app/linked_pkg/inside.zig")); + + // 2. The real path must also be indexed exactly once. + try testing.expect(explorer.contents.contains("pkg/inside.zig")); + + // 3. The cycle must not have produced a deeply-nested duplicate entry. + // If cycle detection is missing, paths like + // `app/loop/app/loop/app/linked_pkg/inside.zig` would appear (or the + // scan would never terminate). Assert no path contains "loop/app/loop". + var it = explorer.contents.iterator(); + while (it.next()) |kv| { + const p = kv.key_ptr.*; + try testing.expect(std.mem.indexOf(u8, p, "loop/app/loop") == null); + } +} + + +test "issue-405: cleanupStaleTmpFiles deletes in-flight sibling tmp files" { + // BUG: snapshot.zig:cleanupStaleTmpFiles deletes ANY file matching + // `*.tmp` in the snapshot directory with no age guard. + // If a sibling writer (another process / parallel scan) is mid-write + // — i.e. it has just created `..tmp` and is still + // streaming bytes into it before the final rename(tmp, dest) — then a + // concurrent loadSnapshotValidated() will unlink the sibling's + // in-flight tmp file. The sibling's subsequent rename then fails with + // ENOENT and the snapshot write silently aborts. + // + // Reproduces deterministically by simulating the in-flight tmp file + // and observing that loadSnapshotValidated removes it. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const aa = arena.allocator(); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + // Step 1: write a real, valid snapshot at /snap.codedb so + // loadSnapshotValidated has something legitimate to read. + var exp = Explorer.init(aa, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try exp.indexFile("a.zig", "pub fn alpha() void {}\n"); + const snap_path = try std.fs.path.join(aa, &.{ dir_path, "snap.codedb" }); + try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, aa); + + // Step 2: simulate a SIBLING writer that has just created its tmp file + // but has NOT yet renamed. This file matches the cleanup pattern + // (starts with basename, ends with ".tmp"). + const sibling_tmp = try std.fs.path.join(aa, &.{ dir_path, "snap.codedb.deadbeef.tmp" }); + { + var f = try std.Io.Dir.cwd().createFile(io, sibling_tmp, .{}); + defer f.close(io); + try f.writeStreamingAll(io, "in-flight write"); + } + + // Sanity: the sibling tmp exists. + std.Io.Dir.cwd().access(io, sibling_tmp, .{}) catch return error.TestUnexpectedResult; + + // Step 3: run loadSnapshotValidated. cleanupStaleTmpFiles is the + // first thing it does. After this, the sibling's in-flight tmp + // file MUST still exist — otherwise the sibling's rename will fail. + var exp2 = Explorer.init(aa, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + var store = Store.init(testing.allocator); + defer store.deinit(); + _ = snapshot_mod.loadSnapshotValidated(io, snap_path, null, &exp2, &store, aa); + + // Expected: the in-flight sibling tmp is preserved. + // Current (bug): cleanupStaleTmpFiles unconditionally deletes it. + std.Io.Dir.cwd().access(io, sibling_tmp, .{}) catch { + return error.TestExpectedSiblingTmpPreserved; + }; +} + + +test "issue-409: snapshot .env prefix filter wrongly excludes .envoy/.environment files" { + // BUG: snapshot.zig:isSensitivePath uses + // if (basename.len >= 4 and std.mem.eql(u8, basename[0..4], ".env")) return true; + // to catch .env, .env.local, .env.production, etc. The check is a raw + // 4-byte prefix match — so any basename whose first 4 bytes are ".env" + // is rejected, including legitimate, non-secret files such as: + // + // .envoy.json — Envoy proxy config + // .environment — generic config name + // .envconfig.yaml — anything starting with ".env" + // + // These files end up silently dropped from the snapshot's CONTENT, + // TREE, and OUTLINE_STATE sections, so a save/load round-trip loses + // them entirely. The watcher.zig copy of isSensitivePath has the same + // bug, so they are also excluded from live indexing. + // + // Reproducer: index a non-secret .envoy.json alongside a normal file, + // snapshot, load, and observe that .envoy.json is missing. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const aa = arena.allocator(); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + var exp = Explorer.init(aa, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try exp.indexFile("a.zig", "pub fn alpha() void {}\n"); + // .envoy.json is the canonical Envoy proxy config name — not a secret. + try exp.indexFile(".envoy.json", "{\"listeners\":[]}\n"); + try testing.expectEqual(@as(usize, 2), exp.outlines.count()); + + const snap_path = try std.fs.path.join(aa, &.{ dir_path, "snap.codedb" }); + try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, aa); + + var exp2 = Explorer.init(aa, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + var store = Store.init(testing.allocator); + defer store.deinit(); + try testing.expect(snapshot_mod.loadSnapshot(io, snap_path, &exp2, &store, aa)); + + // Expected: both files round-trip through the snapshot. + // Current (bug): only "a.zig" survives — ".envoy.json" was excluded by + // the .env prefix check at write time. + try testing.expect(exp2.outlines.contains("a.zig")); + try testing.expect(exp2.outlines.contains(".envoy.json")); +} + + +test "issue-208: content cache evicts cold entries under pressure" { + const ContentCache = @import("hot_cache.zig").ContentCache; + const cap = 50; + var cache = try ContentCache.initAlloc(testing.allocator, cap); + defer cache.deinit(); + + var key_buf: [32]u8 = undefined; + var val_buf: [32]u8 = undefined; + + // Insert 100 keys into a cache with capacity 50. + var i: usize = 0; + while (i < 100) : (i += 1) { + const k = std.fmt.bufPrint(&key_buf, "file_{d}.zig", .{i}) catch unreachable; + const v = std.fmt.bufPrint(&val_buf, "content_{d}", .{i}) catch unreachable; + try cache.put(k, v); + } + + // Cache must not exceed capacity. + try testing.expect(cache.len() <= cap); + + // Touch keys 0..10 to mark them hot (set ref bit). + i = 0; + while (i < 10) : (i += 1) { + const k = std.fmt.bufPrint(&key_buf, "file_{d}.zig", .{i}) catch unreachable; + _ = cache.get(k); + } + + // Insert 20 more keys to trigger further eviction. + i = 100; + while (i < 120) : (i += 1) { + const k = std.fmt.bufPrint(&key_buf, "file_{d}.zig", .{i}) catch unreachable; + const v = std.fmt.bufPrint(&val_buf, "content_{d}", .{i}) catch unreachable; + try cache.put(k, v); + } + + // Still bounded by capacity. + try testing.expect(cache.len() <= cap); + + // Evictions must have fired. + const s = cache.stats(); + try testing.expect(s.evictions > 0); +} + diff --git a/src/test_index.zig b/src/test_index.zig new file mode 100644 index 0000000..b8f3dba --- /dev/null +++ b/src/test_index.zig @@ -0,0 +1,2956 @@ +const std = @import("std"); +const cio = @import("cio.zig"); +const testing = std.testing; +const io = std.testing.io; +const Store = @import("store.zig").Store; +const Explorer = @import("explore.zig").Explorer; +const WordIndex = @import("index.zig").WordIndex; +const TrigramIndex = @import("index.zig").TrigramIndex; +const SparseNgramIndex = @import("index.zig").SparseNgramIndex; +const pairWeight = @import("index.zig").pairWeight; +const extractSparseNgrams = @import("index.zig").extractSparseNgrams; +const buildCoveringSet = @import("index.zig").buildCoveringSet; +const setFrequencyTable = @import("index.zig").setFrequencyTable; +const resetFrequencyTable = @import("index.zig").resetFrequencyTable; +const buildFrequencyTable = @import("index.zig").buildFrequencyTable; +const writeFrequencyTable = @import("index.zig").writeFrequencyTable; +const readFrequencyTable = @import("index.zig").readFrequencyTable; +const explore = @import("explore.zig"); +const Language = explore.Language; +const git_mod = @import("git.zig"); +const decomposeRegex = @import("index.zig").decomposeRegex; +const RegexQuery = @import("index.zig").RegexQuery; +const packTrigram = @import("index.zig").packTrigram; +const regexMatch = explore.regexMatch; +const PostingMask = @import("index.zig").PostingMask; +const normalizeChar = @import("index.zig").normalizeChar; +const Trigram = @import("index.zig").Trigram; +const MmapTrigramIndex = @import("index.zig").MmapTrigramIndex; +const AnyTrigramIndex = @import("index.zig").AnyTrigramIndex; +const version = @import("version.zig"); +const watcher = @import("watcher.zig"); +const AgentRegistry = @import("agent.zig").AgentRegistry; +const snapshot_mod = @import("snapshot.zig"); +const snapshot_json = @import("snapshot_json.zig"); +const mcp_mod = @import("mcp.zig"); +const SearchResult = @import("explore.zig").SearchResult; +const SymbolKind = explore.SymbolKind; +const edit_mod = @import("edit.zig"); + + + + + + + + + + + + + + + + + + + + + + +test "trigram index: index and candidate lookup" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + try ti.indexFile("src/store.zig", "pub fn recordSnapshot(self: *Store) void {}"); + try ti.indexFile("src/agent.zig", "pub fn register(self: *Agent) void {}"); + + const cands = ti.candidates("recordSnapshot", testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + try testing.expect(cands.?.len == 1); + try testing.expectEqualStrings("src/store.zig", cands.?[0]); +} + + +test "trigram index: short query returns null" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + try ti.indexFile("f.zig", "hello world"); + const cands = ti.candidates("hi", testing.allocator); + try testing.expect(cands == null); +} + + +test "trigram index: no match returns empty" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + try ti.indexFile("f.zig", "hello world"); + const cands = ti.candidates("zzzzz", testing.allocator); + try testing.expect(cands != null); + try testing.expect(cands.?.len == 0); +} + + +test "trigram index: re-index removes old trigrams" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + try ti.indexFile("f.zig", "uniqueOldContent"); + const c1 = ti.candidates("uniqueOld", testing.allocator); + defer if (c1) |c| testing.allocator.free(c); + try testing.expect(c1 != null and c1.?.len == 1); + + try ti.indexFile("f.zig", "brandNewStuff"); + const c2 = ti.candidates("uniqueOld", testing.allocator); + defer if (c2) |c| testing.allocator.free(c); + try testing.expect(c2 != null and c2.?.len == 0); + + const c3 = ti.candidates("brandNew", testing.allocator); + defer if (c3) |c| testing.allocator.free(c); + try testing.expect(c3 != null and c3.?.len == 1); +} + + +test "pairWeight: deterministic" { + const w1 = pairWeight('a', 'b'); + const w2 = pairWeight('a', 'b'); + try testing.expectEqual(w1, w2); + + const w3 = pairWeight('a', 'c'); + // Different pair must (almost certainly) produce a different weight. + // We only assert they're not trivially equal; hash collisions are acceptable. + _ = w3; // just ensure it compiles and doesn't crash +} + + +test "pairWeight: different pairs produce different values (sanity)" { + // 'ab' and 'ba' should almost never collide for a reasonable hash. + const w_ab = pairWeight('a', 'b'); + const w_ba = pairWeight('b', 'a'); + // Not a strict requirement (collisions are ok), but verify the function runs. + _ = w_ab; + _ = w_ba; +} + + +test "extractSparseNgrams: short content returns empty" { + const ng = try extractSparseNgrams("ab", testing.allocator); + defer testing.allocator.free(ng); + try testing.expectEqual(@as(usize, 0), ng.len); +} + + +test "extractSparseNgrams: minimum length content yields one ngram" { + const ng = try extractSparseNgrams("abc", testing.allocator); + defer testing.allocator.free(ng); + try testing.expect(ng.len >= 1); + try testing.expectEqual(@as(usize, 3), ng[0].len); + try testing.expectEqual(@as(usize, 0), ng[0].pos); +} + + +test "extractSparseNgrams: deterministic across calls" { + const ng1 = try extractSparseNgrams("hello world", testing.allocator); + defer testing.allocator.free(ng1); + const ng2 = try extractSparseNgrams("hello world", testing.allocator); + defer testing.allocator.free(ng2); + + try testing.expectEqual(ng1.len, ng2.len); + for (ng1, ng2) |a, b| { + try testing.expectEqual(a.hash, b.hash); + try testing.expectEqual(a.pos, b.pos); + try testing.expectEqual(a.len, b.len); + } +} + + +test "extractSparseNgrams: case-insensitive hashing" { + const ng_lower = try extractSparseNgrams("hello", testing.allocator); + defer testing.allocator.free(ng_lower); + const ng_upper = try extractSparseNgrams("HELLO", testing.allocator); + defer testing.allocator.free(ng_upper); + + try testing.expectEqual(ng_lower.len, ng_upper.len); + for (ng_lower, ng_upper) |lo, hi| { + try testing.expectEqual(lo.hash, hi.hash); + } +} + + +test "extractSparseNgrams: ngrams cover entire content" { + const content = "the quick brown fox"; + const ng = try extractSparseNgrams(content, testing.allocator); + defer testing.allocator.free(ng); + + // Verify every byte position is covered by at least one n-gram. + var covered = try testing.allocator.alloc(bool, content.len); + defer testing.allocator.free(covered); + @memset(covered, false); + + for (ng) |n| { + for (n.pos..n.pos + n.len) |p| { + covered[p] = true; + } + } + for (covered) |c| { + try testing.expect(c); + } +} + + +test "extractSparseNgrams: coverage with force-split remainder 1 (len=17)" { + // 17 identical chars → no interior local maxima → one span of length 17. + // Force-split: one MAX_NGRAM_LEN=16 chunk, remainder=1 → must still cover byte 16. + const content = "aaaaaaaaaaaaaaaaa"; // 17 'a's + const ng = try extractSparseNgrams(content, testing.allocator); + defer testing.allocator.free(ng); + + var covered = try testing.allocator.alloc(bool, content.len); + defer testing.allocator.free(covered); + @memset(covered, false); + for (ng) |n| { + for (n.pos..n.pos + n.len) |p| covered[p] = true; + } + for (covered) |c| try testing.expect(c); +} + + +test "extractSparseNgrams: coverage with force-split remainder 2 (len=18)" { + // 18 identical chars → remainder=2 → must still cover bytes 16-17. + const content = "aaaaaaaaaaaaaaaaaa"; // 18 'a's + const ng = try extractSparseNgrams(content, testing.allocator); + defer testing.allocator.free(ng); + + var covered = try testing.allocator.alloc(bool, content.len); + defer testing.allocator.free(covered); + @memset(covered, false); + for (ng) |n| { + for (n.pos..n.pos + n.len) |p| covered[p] = true; + } + for (covered) |c| try testing.expect(c); +} + + +test "extractSparseNgrams: ngram length bounds" { + const content = "abcdefghijklmnopqrstuvwxyz0123456789"; + const ng = try extractSparseNgrams(content, testing.allocator); + defer testing.allocator.free(ng); + + for (ng) |n| { + try testing.expect(n.len >= 3); + try testing.expect(n.len <= 16); + } +} + + +test "buildCoveringSet: sliding window covers all query substrings" { + // "foobar" (6 chars); lengths [3,6] yield 4+3+2+1 = 10 substrings. + const ngrams = try buildCoveringSet("foobar", testing.allocator); + defer testing.allocator.free(ngrams); + try testing.expectEqual(@as(usize, 10), ngrams.len); + for (ngrams) |ng| try testing.expect(ng.len >= 3 and ng.len <= 6); +} + + +test "buildCoveringSet: short query returns empty" { + const ngrams = try buildCoveringSet("ab", testing.allocator); + defer testing.allocator.free(ngrams); + try testing.expectEqual(@as(usize, 0), ngrams.len); +} + + +test "sparse ngram index: index and candidate lookup" { + var sni = SparseNgramIndex.init(testing.allocator); + defer sni.deinit(); + + // Index each file with content equal to the query we'll use — this + // guarantees the sparse n-gram boundaries align (same string = same weights). + const foo_query = "recordSnapshot"; + const bar_query = "registerAgent"; + try sni.indexFile("src/foo.zig", foo_query); + try sni.indexFile("src/bar.zig", bar_query); + + const cands = sni.candidates(foo_query, testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + + var found_foo = false; + var found_bar = false; + if (cands) |cs| { + for (cs) |p| { + if (std.mem.eql(u8, p, "src/foo.zig")) found_foo = true; + if (std.mem.eql(u8, p, "src/bar.zig")) found_bar = true; + } + } + try testing.expect(found_foo); + try testing.expect(!found_bar); +} + + +test "sparse ngram index: short query returns null" { + var sni = SparseNgramIndex.init(testing.allocator); + defer sni.deinit(); + + try sni.indexFile("f.zig", "hello world"); + const cands = sni.candidates("hi", testing.allocator); // length 2 < MIN_LEN + try testing.expect(cands == null); +} + + +test "sparse ngram index: re-index removes old ngrams" { + var sni = SparseNgramIndex.init(testing.allocator); + defer sni.deinit(); + + try sni.indexFile("f.zig", "uniqueOldContent"); + const c1 = sni.candidates("uniqueOldContent", testing.allocator); + defer if (c1) |c| testing.allocator.free(c); + try testing.expect(c1 != null and c1.?.len == 1); + + try sni.indexFile("f.zig", "brandNewStuff"); + const c2 = sni.candidates("uniqueOldContent", testing.allocator); + defer if (c2) |c| testing.allocator.free(c); + // After re-index the old content is gone; may return empty or null. + if (c2) |cs| try testing.expectEqual(@as(usize, 0), cs.len); +} + + +test "sparse ngram index: removeFile prunes entries" { + var sni = SparseNgramIndex.init(testing.allocator); + defer sni.deinit(); + + try sni.indexFile("a.zig", "hello world foo bar"); + try testing.expectEqual(@as(u32, 1), sni.fileCount()); + + sni.removeFile("a.zig"); + try testing.expectEqual(@as(u32, 0), sni.fileCount()); +} + + +test "sparse ngram candidates: sliding window finds file with short n-gram" { + var sni = SparseNgramIndex.init(testing.allocator); + defer sni.deinit(); + + // "a.zig" is indexed with content "rec" — produces the 3-char n-gram "rec". + // "b.zig" is indexed with unrelated content. + try sni.indexFile("a.zig", "rec"); + try sni.indexFile("b.zig", "xxxxxxxxxx"); + + // Query "record" (6 chars) contains "rec" as a 3-char sliding-window + // substring. buildCoveringSet generates "rec" → hash matches the indexed + // n-gram of "a.zig". + const cands = sni.candidates("record", testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + + var found_a = false; + if (cands) |cs| { + for (cs) |p| if (std.mem.eql(u8, p, "a.zig")) { + found_a = true; + }; + } + try testing.expect(found_a); +} + + +test "pairWeight: common pairs have lower weight than rare pairs" { + // Common English/code pairs should have lower base weight than rare pairs. + // 'th' and 'er' are in the default_pair_freq table with weight 0x1000. + // 'qx' and 'zj' are not in the table and default to 0xFE00. + // jitter adds 0-255, so common+max_jitter (0x10FF) < rare+min_jitter (0xFE00). + const w_th = pairWeight('t', 'h'); + const w_er = pairWeight('e', 'r'); + const w_qx = pairWeight('q', 'x'); + const w_zj = pairWeight('z', 'j'); + try testing.expect(w_th < w_qx); + try testing.expect(w_er < w_zj); +} + + +test "pairWeight: frequency-weighted produces fewer boundaries for common text" { + // A string composed of very common pairs should produce few local maxima + // (interior weights are low and similar), giving fewer n-grams than a + // string of rare pairs. + const common = "thehereinandonthere"; + const rare = "qxzjvkqxzjvkqxzjvk"; + const ng_common = try extractSparseNgrams(common, testing.allocator); + defer testing.allocator.free(ng_common); + const ng_rare = try extractSparseNgrams(rare, testing.allocator); + defer testing.allocator.free(ng_rare); + // Rare pairs create more local maxima → more (shorter) n-grams. + try testing.expect(ng_rare.len >= ng_common.len); +} + + +test "pairWeight: deterministic with frequency table" { + const w1 = pairWeight('a', 'b'); + const w2 = pairWeight('a', 'b'); + try testing.expectEqual(w1, w2); + // Verify common and rare pairs also remain deterministic. + try testing.expectEqual(pairWeight('t', 'h'), pairWeight('t', 'h')); + try testing.expectEqual(pairWeight('q', 'x'), pairWeight('q', 'x')); +} + + +test "buildFrequencyTable: common pairs get lower weight than absent pairs" { + // Construct content where 'ab' appears many times and 'qx' never appears. + const content = "ababababababababababab"; + const table = buildFrequencyTable(content); + // 'ab' is frequent → low weight; 'qx' absent → default high (0xFE00). + try testing.expect(table['a']['b'] < table['q']['x']); + try testing.expectEqual(@as(u16, 0xFE00), table['q']['x']); +} + + +test "frequency table: disk round-trip" { + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + var dir_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf); + const dir_path = dir_buf[0..dir_path_len]; + + // Build a table with distinct values. + const content = "ababcdcdefefghghijij"; + const original = buildFrequencyTable(content); + + try writeFrequencyTable(io, &original, dir_path); + + const loaded_opt = try readFrequencyTable(io, dir_path, testing.allocator); + try testing.expect(loaded_opt != null); + const loaded = loaded_opt.?; + defer testing.allocator.destroy(loaded); + + // Byte-for-byte identical. + try testing.expectEqualSlices( + u16, + @as([*]const u16, @ptrCast(&original))[0 .. 256 * 256], + @as([*]const u16, @ptrCast(loaded))[0 .. 256 * 256], + ); +} + + +test "frequency table: little-endian byte order on disk" { + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + var dir_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf); + const dir_path = dir_buf[0..dir_path_len]; + + var table: [256][256]u16 = .{.{0} ** 256} ** 256; + table[0][0] = 0x1234; // little-endian on disk: 0x34, 0x12 + table[0][1] = 0xABCD; // little-endian on disk: 0xCD, 0xAB + try writeFrequencyTable(io, &table, dir_path); + + const file_path = try std.fmt.allocPrint(testing.allocator, "{s}/pair_freq.bin", .{dir_path}); + defer testing.allocator.free(file_path); + const f = try std.Io.Dir.cwd().openFile(io, file_path, .{}); + defer f.close(io); + var raw: [4]u8 = undefined; + try testing.expectEqual(@as(usize, 4), try f.readPositionalAll(io, &raw, 0)); + try testing.expectEqual(@as(u8, 0x34), raw[0]); + try testing.expectEqual(@as(u8, 0x12), raw[1]); + try testing.expectEqual(@as(u8, 0xCD), raw[2]); + try testing.expectEqual(@as(u8, 0xAB), raw[3]); + + const loaded = try readFrequencyTable(io, dir_path, testing.allocator); + try testing.expect(loaded != null); + defer testing.allocator.destroy(loaded.?); + try testing.expectEqual(@as(u16, 0x1234), loaded.?[0][0]); + try testing.expectEqual(@as(u16, 0xABCD), loaded.?[0][1]); +} + + +test "setFrequencyTable / resetFrequencyTable: pairWeight output changes" { + // Build a table where 'th' is rare (high weight) — opposite of default. + var custom: [256][256]u16 = .{.{0x1000} ** 256} ** 256; // all common + custom['q']['x'] = 0xFE00; // make 'qx' rare + + const before_th = pairWeight('t', 'h'); + const before_qx = pairWeight('q', 'x'); + + setFrequencyTable(&custom); + defer resetFrequencyTable(); + + const after_th = pairWeight('t', 'h'); + const after_qx = pairWeight('q', 'x'); + + // After swap: 'th' should be lower (we set it to 0x1000 vs default table's 0x1000 — same). + // What definitely changes: 'qx' base shifts from 0xFE00 to 0xFE00 (custom kept it high). + // More importantly verify that resetting restores original values. + resetFrequencyTable(); + try testing.expectEqual(before_th, pairWeight('t', 'h')); + try testing.expectEqual(before_qx, pairWeight('q', 'x')); + _ = after_th; + _ = after_qx; +} + + +test "file versions: append and latest" { + var fv = version.FileVersions.init(testing.allocator, "test.zig"); + defer fv.deinit(); + + try fv.versions.append(testing.allocator, .{ + .seq = 1, + .agent = 0, + .timestamp = 0, + .op = .snapshot, + .hash = 0x11, + .size = 100, + }); + try fv.versions.append(testing.allocator, .{ + .seq = 2, + .agent = 0, + .timestamp = 0, + .op = .replace, + .hash = 0x22, + .size = 150, + }); + + const latest = fv.latest().?; + try testing.expect(latest.seq == 2); + try testing.expect(latest.size == 150); +} + + +test "file versions: countSince" { + var fv = version.FileVersions.init(testing.allocator, "test.zig"); + defer fv.deinit(); + + try fv.versions.append(testing.allocator, .{ + .seq = 1, + .agent = 0, + .timestamp = 0, + .op = .snapshot, + .hash = 0, + .size = 0, + }); + try fv.versions.append(testing.allocator, .{ + .seq = 5, + .agent = 0, + .timestamp = 0, + .op = .replace, + .hash = 0, + .size = 0, + }); + try fv.versions.append(testing.allocator, .{ + .seq = 10, + .agent = 0, + .timestamp = 0, + .op = .delete, + .hash = 0, + .size = 0, + }); + + try testing.expect(fv.countSince(0) == 3); + try testing.expect(fv.countSince(1) == 2); + try testing.expect(fv.countSince(5) == 1); + try testing.expect(fv.countSince(10) == 0); +} + + +test "watcher: queue overflow is explicit" { + var queue = watcher.EventQueue{}; + + var pushed: usize = 0; + while (true) : (pushed += 1) { + var path_buf: [32]u8 = undefined; + const path = try std.fmt.bufPrint(&path_buf, "tmp/f-{d}.zig", .{pushed}); + if (!queue.push(watcher.FsEvent.init(path, .modified, @intCast(pushed)) orelse unreachable)) break; + } + + var overflow_path_buf: [32]u8 = undefined; + const overflow_path = try std.fmt.bufPrint(&overflow_path_buf, "tmp/overflow.zig", .{}); + try testing.expect(!queue.push(watcher.FsEvent.init(overflow_path, .created, 999) orelse unreachable)); + + var popped: usize = 0; + while (queue.pop() != null) : (popped += 1) {} + try testing.expect(popped == pushed); +} + + +test "watcher: queue event copies path bytes" { + var queue = watcher.EventQueue{}; + const original = try testing.allocator.dupe(u8, "tmp/deleted.zig"); + try testing.expect(queue.push(watcher.FsEvent.init(original, .deleted, 99) orelse unreachable)); + testing.allocator.free(original); + + const event = queue.pop() orelse return error.TestUnexpectedResult; + try testing.expectEqualStrings("tmp/deleted.zig", event.path()); + try testing.expect(event.kind == .deleted); + try testing.expect(event.seq == 99); +} + + +test "watcher: parallel initial scan matches sequential results" { + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + try tmp_dir.dir.createDirPath(io, "src/nested"); + try tmp_dir.dir.writeFile(io, .{ .sub_path = "src/main.zig", .data = "const std = @import(\"std\");\npub fn alpha() void {}\n// TODO: keep me\n" }); + try tmp_dir.dir.writeFile(io, .{ .sub_path = "src/nested/util.py", .data = "def beta():\n return 42\n# TODO later\n" }); + try tmp_dir.dir.writeFile(io, .{ .sub_path = "README.md", .data = "# demo\n" }); + + var root_buf: [std.fs.max_path_bytes]u8 = undefined; + const root_len = try tmp_dir.dir.realPathFile(io, ".", &root_buf); + const root = root_buf[0..root_len]; + + var store_seq = Store.init(testing.allocator); + defer store_seq.deinit(); + var explorer_seq = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer_seq.deinit(); + explorer_seq.setRoot(io, root); + try watcher.initialScanWithWorkerCount(io, &store_seq, &explorer_seq, root, testing.allocator, false, 1); + + var store_par = Store.init(testing.allocator); + defer store_par.deinit(); + var explorer_par = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer_par.deinit(); + explorer_par.setRoot(io, root); + try watcher.initialScanWithWorkerCount(io, &store_par, &explorer_par, root, testing.allocator, false, 4); + + const tree_seq = try explorer_seq.getTree(testing.allocator, false); + defer testing.allocator.free(tree_seq); + const tree_par = try explorer_par.getTree(testing.allocator, false); + defer testing.allocator.free(tree_par); + try testing.expectEqualStrings(tree_seq, tree_par); + + const seq_hits = try explorer_seq.searchWord("TODO", testing.allocator); + defer testing.allocator.free(seq_hits); + const par_hits = try explorer_par.searchWord("TODO", testing.allocator); + defer testing.allocator.free(par_hits); + try testing.expectEqual(seq_hits.len, par_hits.len); + + try testing.expectEqual(explorer_seq.outlines.count(), explorer_par.outlines.count()); +} + + +test "edit: range_start zero is invalid" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + const rel_path = try std.fmt.allocPrint(testing.allocator, ".zig-cache/tmp/{s}/edit-range.txt", .{tmp.sub_path}); + defer testing.allocator.free(rel_path); + + var file = try tmp.dir.createFile(io, "edit-range.txt", .{}); + defer file.close(io); + try file.writeStreamingAll(io, "line 1\nline 2\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + const agent_id = try agents.register("test-agent"); + + try testing.expectError(error.InvalidRange, edit_mod.applyEdit(io, testing.allocator, &store, &agents, null, .{ + .path = rel_path, + .agent_id = agent_id, + .op = .replace, + .range = .{ 0, 1 }, + .content = "changed", + })); +} + + +test "edit: range_start beyond file is invalid" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + const rel_path = try std.fmt.allocPrint(testing.allocator, ".zig-cache/tmp/{s}/edit-range-oob.txt", .{tmp.sub_path}); + defer testing.allocator.free(rel_path); + + var file = try tmp.dir.createFile(io, "edit-range-oob.txt", .{}); + defer file.close(io); + try file.writeStreamingAll(io, "line 1\nline 2\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + const agent_id = try agents.register("test-agent-oob"); + + try testing.expectError(error.InvalidRange, edit_mod.applyEdit(io, testing.allocator, &store, &agents, null, .{ + .path = rel_path, + .agent_id = agent_id, + .op = .replace, + .range = .{ 3, 3 }, + .content = "changed", + })); +} + + +test "regression #2: searchContent frees trigram candidate slice" { + // Verifies that the candidates() return value is freed by searchContent. + // If the defer is missing, the GPA will detect the leak and fail. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("leak-check.zig", "pub fn recordSnapshot(self: *Store) void {}\npub fn init() void {}"); + try explorer.indexFile("other.zig", "pub fn register(self: *Agent) void {}"); + + const results = try explorer.searchContent("recordSnapshot", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len == 1); + try testing.expectEqualStrings("leak-check.zig", results[0].path); +} + + +test "regression #2: searchContent no leak on zero results" { + // Even when trigram narrows to candidates but none match full text, + // the candidate slice must be freed. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("abc.zig", "pub fn abcdef() void {}"); + + // "abcxyz" shares trigrams "abc" but won't match full text + const results = try explorer.searchContent("abcxyz", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len == 0); +} + + +test "regression #2: searchContent short query skips trigrams" { + // Queries < 3 chars can't use trigram index — ensure no leak from null path. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("short.zig", "fn ab() void {}"); + + const results = try explorer.searchContent("ab", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len == 1); +} + + +test "regression #5: getHotFiles does not deadlock" { + // getHotFiles used to hold explorer.mu while calling store.getLatest() + // which locks store.mu — a lock ordering violation. The fix collects + // paths under explorer.mu, releases it, then locks store.mu separately. + // This test verifies correctness; deadlock would cause a hang. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + var store = Store.init(testing.allocator); + defer store.deinit(); + + try explorer.indexFile("hot-a.zig", "pub fn a() void {}"); + try explorer.indexFile("hot-b.zig", "pub fn b() void {}"); + try explorer.indexFile("hot-c.zig", "pub fn c() void {}"); + + _ = try store.recordSnapshot("hot-a.zig", 10, 0x1); + _ = try store.recordSnapshot("hot-b.zig", 20, 0x2); + _ = try store.recordSnapshot("hot-c.zig", 30, 0x3); + _ = try store.recordSnapshot("hot-b.zig", 25, 0x4); // b updated again + + const hot = try explorer.getHotFiles(&store, testing.allocator, 2); + defer { + for (hot) |path| testing.allocator.free(path); + testing.allocator.free(hot); + } + try testing.expect(hot.len == 2); + // Most recent should be hot-b.zig (seq 4) then hot-c.zig (seq 3) + try testing.expectEqualStrings("hot-b.zig", hot[0]); + try testing.expectEqualStrings("hot-c.zig", hot[1]); +} + + +test "regression #5: getHotFiles with no store entries" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + var store = Store.init(testing.allocator); + defer store.deinit(); + + try explorer.indexFile("orphan.zig", "pub fn x() void {}"); + + const hot = try explorer.getHotFiles(&store, testing.allocator, 10); + defer { + for (hot) |path| testing.allocator.free(path); + testing.allocator.free(hot); + } + // File exists in explorer but not in store — seq defaults to 0 + try testing.expect(hot.len == 1); + try testing.expectEqualStrings("orphan.zig", hot[0]); +} + + +test "regression: concurrent hot/read with remove" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + var store = Store.init(testing.allocator); + defer store.deinit(); + + try explorer.indexFile("race.zig", "pub fn race() void {}"); + _ = try store.recordSnapshot("race.zig", 24, 0x1); + + const Ctx = struct { + explorer: *Explorer, + store: *Store, + stop: *std.atomic.Value(bool), + }; + + const Worker = struct { + fn run(ctx: *Ctx) void { + while (!ctx.stop.load(.acquire)) { + const hot = ctx.explorer.getHotFiles(ctx.store, testing.allocator, 2) catch continue; + defer { + for (hot) |path| testing.allocator.free(path); + testing.allocator.free(hot); + } + + const cached = ctx.explorer.getContent("race.zig", testing.allocator) catch continue; + if (cached) |content| testing.allocator.free(content); + } + } + }; + + var stop = std.atomic.Value(bool).init(false); + var ctx = Ctx{ .explorer = &explorer, .store = &store, .stop = &stop }; + const worker = try std.Thread.spawn(.{}, Worker.run, .{&ctx}); + defer worker.join(); + var i: usize = 0; + while (i < 200) : (i += 1) { + if (i % 2 == 0) { + try explorer.indexFile("race.zig", "pub fn race() void {}"); + _ = try store.recordSnapshot("race.zig", @intCast(24 + i), @intCast(i + 2)); + } else { + explorer.removeFile("race.zig"); + } + } + + stop.store(true, .release); +} + + +test "regression #5: store getLatestSeqUnlocked" { + var store = Store.init(testing.allocator); + defer store.deinit(); + + _ = try store.recordSnapshot("seq.zig", 100, 0xAA); + _ = try store.recordSnapshot("seq.zig", 200, 0xBB); + + store.mu.lock(); + const seq = store.getLatestSeqUnlocked("seq.zig"); + const missing = store.getLatestSeqUnlocked("nope.zig"); + store.mu.unlock(); + + try testing.expect(seq == 2); + try testing.expect(missing == 0); +} + + +test "regression #7: tree shows directory nodes" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("src/main.zig", "pub fn main() void {}"); + try explorer.indexFile("src/lib.zig", "pub fn init() void {}"); + try explorer.indexFile("build.zig", "pub fn build() void {}"); + + const tree = try explorer.getTree(testing.allocator, false); + defer testing.allocator.free(tree); + + // Should contain "src/" directory node + try testing.expect(std.mem.indexOf(u8, tree, "src/\n") != null); + // Should contain file basenames, not full paths + try testing.expect(std.mem.indexOf(u8, tree, " main.zig") != null); + try testing.expect(std.mem.indexOf(u8, tree, " lib.zig") != null); + // Root-level file should not be indented + try testing.expect(std.mem.indexOf(u8, tree, "build.zig") != null); +} + + +test "regression #7: tree handles nested directories" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("src/utils/hash.zig", "pub fn hash() void {}"); + try explorer.indexFile("src/main.zig", "pub fn main() void {}"); + + const tree = try explorer.getTree(testing.allocator, false); + defer testing.allocator.free(tree); + + // Should have both directory levels + try testing.expect(std.mem.indexOf(u8, tree, "src/\n") != null); + try testing.expect(std.mem.indexOf(u8, tree, " utils/\n") != null); + // Nested file should be double-indented + try testing.expect(std.mem.indexOf(u8, tree, " hash.zig") != null); +} + + +test "regression #7: tree shows only basenames" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("pkg/foo/bar.zig", "const x = 1;"); + + const tree = try explorer.getTree(testing.allocator, false); + defer testing.allocator.free(tree); + + // Full path should NOT appear in tree output + try testing.expect(std.mem.indexOf(u8, tree, "pkg/foo/bar.zig") == null); + // Only basename + try testing.expect(std.mem.indexOf(u8, tree, "bar.zig") != null); +} + + +test "regression: searchWord empty result is allocator-owned" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("math.zig", "pub fn add(a: i32, b: i32) i32 { return a + b; }"); + + const hits = try explorer.searchWord("missing_identifier", testing.allocator); + defer testing.allocator.free(hits); + try testing.expect(hits.len == 0); +} + + +test "regression: searchContent frees empty trigram candidate slice" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("f.zig", "hello world"); + + const results = try explorer.searchContent("zzzzz", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len == 0); +} + + +test "regression: queue push stays non-blocking when full" { + var queue = watcher.EventQueue{}; + + var pushed: usize = 0; + while (true) : (pushed += 1) { + var path_buf: [32]u8 = undefined; + const path = try std.fmt.bufPrint(&path_buf, "tmp/fill-{d}.zig", .{pushed}); + if (!queue.push(watcher.FsEvent.init(path, .modified, @intCast(pushed)) orelse unreachable)) break; + } + + var overflow_path_buf: [32]u8 = undefined; + const overflow_path = try std.fmt.bufPrint(&overflow_path_buf, "tmp/overflow-2.zig", .{}); + const start = cio.nanoTimestamp(); + _ = queue.push(watcher.FsEvent.init(overflow_path, .created, 1000) orelse unreachable); + const elapsed = cio.nanoTimestamp() - start; + + try testing.expect(elapsed < 50 * std.time.ns_per_ms); +} + + +test "isPathSafe: rejects absolute paths" { + const mcp = @import("mcp.zig"); + try testing.expect(!mcp.isPathSafe("/etc/passwd")); + try testing.expect(!mcp.isPathSafe("/")); +} + + +test "isPathSafe: rejects parent traversal" { + const mcp = @import("mcp.zig"); + try testing.expect(!mcp.isPathSafe("../secret")); + try testing.expect(!mcp.isPathSafe("foo/../../etc/passwd")); + try testing.expect(!mcp.isPathSafe("..")); +} + + +test "isPathSafe: rejects empty path" { + const mcp = @import("mcp.zig"); + try testing.expect(!mcp.isPathSafe("")); +} + + +test "isPathSafe: accepts valid relative paths" { + const mcp = @import("mcp.zig"); + try testing.expect(mcp.isPathSafe("src/main.zig")); + try testing.expect(mcp.isPathSafe("README.md")); + try testing.expect(mcp.isPathSafe("a/b/c/d.txt")); +} + + +test "findSymbol: returned data is owned copy" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("a.zig", "pub fn myFunc() void {}"); + + const result = try explorer.findSymbol("myFunc", alloc); + try testing.expect(result != null); + + // Remove the source — if result was borrowed, this would corrupt it + explorer.removeFile("a.zig"); + + // Owned copy should still be valid + try testing.expectEqualStrings("a.zig", result.?.path); + try testing.expectEqualStrings("myFunc", result.?.symbol.name); +} + + +test "findAllSymbols: returned data survives source removal" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("a.zig", "pub fn foo() void {}"); + try explorer.indexFile("b.zig", "pub fn foo() void {}"); + + const results = try explorer.findAllSymbols("foo", alloc); + + // Remove sources + explorer.removeFile("a.zig"); + explorer.removeFile("b.zig"); + + // Owned copies should still be valid + try testing.expect(results.len == 2); + for (results) |r| { + try testing.expectEqualStrings("foo", r.symbol.name); + } +} + + +test "searchContent: returned paths are owned copies" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("src/hello.zig", "pub fn greetWorld() void {}"); + + const results = try explorer.searchContent("greetWorld", alloc, 10); + try testing.expect(results.len == 1); + + // Remove the source + explorer.removeFile("src/hello.zig"); + + // Path and line_text should still be valid (owned) + try testing.expectEqualStrings("src/hello.zig", results[0].path); +} + + +test "trigram index: removeFile prunes empty sets" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + try ti.indexFile("only.zig", "xyzUniqueTrigramContent"); + const before = ti.candidates("xyzUniqueTrigramContent", testing.allocator); + if (before) |b| { + try testing.expect(b.len > 0); + testing.allocator.free(b); + } + + ti.removeFile("only.zig"); + const after = ti.candidates("xyzUniqueTrigramContent", testing.allocator); + if (after) |a| { + try testing.expect(a.len == 0); + testing.allocator.free(a); + } +} + + +test "edit: atomic write leaves no temp files on success" { + // Create a temp file to edit + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + const path = "test_atomic.zig"; + const content = "line1\nline2\nline3\n"; + try tmp_dir.dir.writeFile(io, .{ .sub_path = path, .data = content }); + + // The temp file pattern is "{path}.codedb_tmp" + const tmp_path = path ++ ".codedb_tmp"; + + // After a successful edit, no .codedb_tmp file should remain + tmp_dir.dir.access(io, tmp_path, .{}) catch { + // Expected: temp file doesn't exist (good) + return; + }; + // If we get here, the temp file exists — that's a bug + return error.TempFileNotCleaned; +} + + +test "getBool: returns true for bool true" { + var map: std.json.ObjectMap = .empty; + defer map.deinit(testing.allocator); + try map.put(testing.allocator, "flag", .{ .bool = true }); + const mcp_getBool = @import("mcp.zig").getBool; + try testing.expect(mcp_getBool(&map, "flag") == true); +} + + +test "getBool: returns false for bool false" { + var map: std.json.ObjectMap = .empty; + defer map.deinit(testing.allocator); + try map.put(testing.allocator, "flag", .{ .bool = false }); + const mcp_getBool = @import("mcp.zig").getBool; + try testing.expect(mcp_getBool(&map, "flag") == false); +} + + +test "getBool: returns false for missing key" { + var map: std.json.ObjectMap = .empty; + defer map.deinit(testing.allocator); + const mcp_getBool = @import("mcp.zig").getBool; + try testing.expect(mcp_getBool(&map, "missing") == false); +} + + +test "getBool: returns false for non-bool value" { + var map: std.json.ObjectMap = .empty; + defer map.deinit(testing.allocator); + try map.put(testing.allocator, "flag", .{ .integer = 1 }); + const mcp_getBool = @import("mcp.zig").getBool; + try testing.expect(mcp_getBool(&map, "flag") == false); +} + + +test "Tool enum: all valid tool names parse" { + const Tool = @import("mcp.zig").Tool; + try testing.expect(std.meta.stringToEnum(Tool, "codedb_tree") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_outline") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_symbol") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_search") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_word") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_hot") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_deps") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_read") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_edit") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_changes") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_status") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_snapshot") != null); + try testing.expect(std.meta.stringToEnum(Tool, "codedb_bundle") != null); +} + + +test "Tool enum: invalid names return null" { + const Tool = @import("mcp.zig").Tool; + try testing.expect(std.meta.stringToEnum(Tool, "codedb_invalid") == null); + try testing.expect(std.meta.stringToEnum(Tool, "") == null); + try testing.expect(std.meta.stringToEnum(Tool, "tree") == null); +} + + +test "decomposeRegex: pure literal extracts trigrams" { + var q = try decomposeRegex("hello", testing.allocator); + defer q.deinit(); + // "hello" has 3 trigrams: hel, ell, llo + try testing.expectEqual(@as(usize, 3), q.and_trigrams.len); + try testing.expectEqual(@as(usize, 0), q.or_groups.len); +} + + +test "decomposeRegex: short literal yields no trigrams" { + var q = try decomposeRegex("ab", testing.allocator); + defer q.deinit(); + try testing.expectEqual(@as(usize, 0), q.and_trigrams.len); +} + + +test "decomposeRegex: dot breaks trigram chain" { + var q = try decomposeRegex("he.lo", testing.allocator); + defer q.deinit(); + // "he" then "lo" — neither long enough for trigrams + try testing.expectEqual(@as(usize, 0), q.and_trigrams.len); +} + + +test "decomposeRegex: dot in longer literal" { + var q = try decomposeRegex("hello.world", testing.allocator); + defer q.deinit(); + // "hello" -> hel,ell,llo; "world" -> wor,orl,rld = 6 trigrams + try testing.expectEqual(@as(usize, 6), q.and_trigrams.len); +} + + +test "decomposeRegex: alternation creates OR groups" { + var q = try decomposeRegex("foo|bar", testing.allocator); + defer q.deinit(); + try testing.expectEqual(@as(usize, 0), q.and_trigrams.len); + // All branch trigrams merged into single OR group + try testing.expectEqual(@as(usize, 1), q.or_groups.len); + // "foo" has 1 trigram + "bar" has 1 trigram = 2 trigrams in the group + try testing.expectEqual(@as(usize, 2), q.or_groups[0].len); +} + + +test "decomposeRegex: quantifier removes preceding char" { + var q = try decomposeRegex("hel+o", testing.allocator); + defer q.deinit(); + // "he" then "o" — + removes 'l', neither segment >= 3 + try testing.expectEqual(@as(usize, 0), q.and_trigrams.len); +} + + +test "decomposeRegex: escaped literal preserved" { + var q = try decomposeRegex("a\\.bc", testing.allocator); + defer q.deinit(); + // Escaped dot is literal: "a.bc" = 2 trigrams: a.b, .bc + try testing.expectEqual(@as(usize, 2), q.and_trigrams.len); +} + + +test "decomposeRegex: character class breaks chain" { + var q = try decomposeRegex("abc[xy]def", testing.allocator); + defer q.deinit(); + // "abc" = 1 trigram, "def" = 1 trigram + try testing.expectEqual(@as(usize, 2), q.and_trigrams.len); +} + + +test "decomposeRegex: backslash-w breaks chain" { + var q = try decomposeRegex("abc\\wdef", testing.allocator); + defer q.deinit(); + // "abc" = 1 trigram, "def" = 1 trigram + try testing.expectEqual(@as(usize, 2), q.and_trigrams.len); +} + + +test "candidatesRegex: finds files with AND trigrams" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + try ti.indexFile("foo.zig", "pub fn recordSnapshot() void {}"); + try ti.indexFile("bar.zig", "const x = 42;"); + + var q = try decomposeRegex("record.*Snapshot", testing.allocator); + defer q.deinit(); + // Should extract trigrams from "record" and "Snapshot" + try testing.expect(q.and_trigrams.len > 0); + + const cands = ti.candidatesRegex(&q, testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + try testing.expect(cands.?.len >= 1); + // foo.zig should be a candidate + var found_foo = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "foo.zig")) found_foo = true; + } + try testing.expect(found_foo); +} + + +test "candidatesRegex: OR groups union posting lists" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + try ti.indexFile("alpha.zig", "function foobar() {}"); + try ti.indexFile("beta.zig", "function bazqux() {}"); + try ti.indexFile("gamma.zig", "const x = 1;"); + + var q = try decomposeRegex("foobar|bazqux", testing.allocator); + defer q.deinit(); + // All branch trigrams merged into single OR group + try testing.expectEqual(@as(usize, 1), q.or_groups.len); + + const cands = ti.candidatesRegex(&q, testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + // Both alpha.zig and beta.zig should be candidates + var found_alpha = false; + var found_beta = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "alpha.zig")) found_alpha = true; + if (std.mem.eql(u8, p, "beta.zig")) found_beta = true; + } + try testing.expect(found_alpha or found_beta); +} + + +test "regexMatch: literal match" { + try testing.expect(regexMatch("hello world", "hello")); + try testing.expect(regexMatch("hello world", "world")); + try testing.expect(!regexMatch("hello world", "xyz")); +} + + +test "regexMatch: dot matches any char" { + try testing.expect(regexMatch("hello", "h.llo")); + try testing.expect(regexMatch("hello", "h..lo")); + try testing.expect(!regexMatch("hello", "h...lo")); +} + + +test "regexMatch: star quantifier" { + try testing.expect(regexMatch("helllo", "hel*o")); + try testing.expect(regexMatch("heo", "hel*o")); + try testing.expect(regexMatch("aab", "a*b")); +} + + +test "regexMatch: plus quantifier" { + try testing.expect(regexMatch("helllo", "hel+o")); + try testing.expect(!regexMatch("heo", "hel+o")); +} + + +test "regexMatch: question quantifier" { + try testing.expect(regexMatch("color", "colou?r")); + try testing.expect(regexMatch("colour", "colou?r")); +} + + +test "regexMatch: character class" { + try testing.expect(regexMatch("cat", "c[aeiou]t")); + try testing.expect(regexMatch("cot", "c[aeiou]t")); + try testing.expect(!regexMatch("cxt", "c[aeiou]t")); +} + + +test "regexMatch: negated character class" { + try testing.expect(!regexMatch("cat", "c[^aeiou]t")); + try testing.expect(regexMatch("cxt", "c[^aeiou]t")); +} + + +test "regexMatch: anchors" { + try testing.expect(regexMatch("hello", "^hello")); + try testing.expect(!regexMatch("say hello", "^hello")); + try testing.expect(regexMatch("hello", "hello$")); + try testing.expect(!regexMatch("hello world", "hello$")); +} + + +test "regexMatch: escape sequences" { + try testing.expect(regexMatch("abc123", "\\d+")); + try testing.expect(regexMatch("hello world", "\\w+\\s\\w+")); + try testing.expect(regexMatch("a.b", "a\\.b")); + try testing.expect(!regexMatch("axb", "a\\.b")); +} + + +test "regexMatch: alternation" { + try testing.expect(regexMatch("foo", "foo|bar")); + try testing.expect(regexMatch("bar", "foo|bar")); + try testing.expect(!regexMatch("baz", "foo|bar")); +} + + +test "regexMatch: alternation with many branches does not stack overflow" { + // 300 branches: 4 chars each + 299 separators = 1499 bytes max + var buf: [1500]u8 = undefined; + var pos: usize = 0; + var bi: usize = 0; + while (bi < 300) : (bi += 1) { + if (bi > 0) { + buf[pos] = '|'; + pos += 1; + } + buf[pos] = 'a'; + pos += 1; + buf[pos] = @as(u8, @intCast('0' + bi / 100 % 10)); + pos += 1; + buf[pos] = @as(u8, @intCast('0' + bi / 10 % 10)); + pos += 1; + buf[pos] = @as(u8, @intCast('0' + bi % 10)); + pos += 1; + } + const pattern = buf[0..pos]; + try testing.expect(regexMatch("a000", pattern)); + try testing.expect(regexMatch("a299", pattern)); + try testing.expect(!regexMatch("a999", pattern)); +} + + +test "regexMatch: dot-star" { + try testing.expect(regexMatch("hello world", "hello.*world")); + try testing.expect(regexMatch("helloworld", "hello.*world")); +} + + +test "issue-454: regex \\b word boundary matches whole-word, not literal 'b'" { + // \b is a word-boundary assertion: should match "foo" as a whole word + // but not when it appears as a substring inside another word. + try testing.expect(regexMatch("foo bar", "\\bfoo\\b")); + try testing.expect(!regexMatch("foobar", "\\bfoo\\b")); + // Whole-word "bar" at end + try testing.expect(regexMatch("foo bar", "\\bbar\\b")); + try testing.expect(!regexMatch("foobarbaz", "\\bbar\\b")); +} + + +test "bloom: PostingMask is populated during indexing" { + // Verify that indexing actually sets mask bits, not just zeros. + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + try ti.indexFile("a.zig", "pub fn init(allocator) void {}"); + + // Trigram "pub" should exist with non-zero masks + const tri_pub = packTrigram('p', 'u', 'b'); + const file_set = ti.index.getPtr(tri_pub); + try testing.expect(file_set != null); + + const mask = file_set.?.get("a.zig"); + try testing.expect(mask != null); + // loc_mask must have at least one bit set (position 0) + try testing.expect(mask.?.loc_mask != 0); + // next_mask must have at least one bit set (char after "pub" is ' ') + try testing.expect(mask.?.next_mask != 0); +} + + +test "bloom: loc_mask records correct position bits" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + // Content where "abc" appears at known positions + // Position 0: "abcXXXXXabcYYYYY" — abc at pos 0 and pos 8 + try ti.indexFile("pos.zig", "abcXXXXXabcYYYYY"); + + const tri_abc = packTrigram('a', 'b', 'c'); + const file_set = ti.index.getPtr(tri_abc).?; + const mask = file_set.get("pos.zig").?; + + // pos 0 → bit 0, pos 8 → bit 0 (8 % 8 = 0) + try testing.expect(mask.loc_mask & 1 != 0); // bit 0 set +} + + +test "bloom: next_mask records the following character" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + try ti.indexFile("next.zig", "abcdef"); + + // For trigram "abc" at position 0, next char is 'd' + const tri_abc = packTrigram('a', 'b', 'c'); + const file_set = ti.index.getPtr(tri_abc).?; + const mask = file_set.get("next.zig").?; + + const expected_bit: u8 = @as(u8, 1) << @intCast(normalizeChar('d') % 8); + try testing.expect(mask.next_mask & expected_bit != 0); +} + + +test "bloom: soundness — never rejects actual matches" { + // The bloom filter must NEVER produce false negatives. + // Every file that actually contains the query must appear in candidates. + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + // Index many files with varied content, some containing the target + try ti.indexFile("match1.zig", "fn handleRequest(ctx: *Context) void {}"); + try ti.indexFile("match2.zig", "pub fn handleRequest() !void { return error.Fail; }"); + try ti.indexFile("noise1.zig", "fn processData(input: []const u8) void {}"); + try ti.indexFile("noise2.zig", "const handler = RequestPool.init();"); // has "handl" and "eques" but not "handleRequest" + try ti.indexFile("noise3.zig", "fn handleResponse(ctx: *Context) void {}"); // close but different + try ti.indexFile("noise4.zig", "pub fn register(name: []const u8) void {}"); + try ti.indexFile("noise5.zig", "const request_handler = getHandler();"); // has both words but not adjacent + + const cands = ti.candidates("handleRequest", testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + + // MUST find both actual matches — bloom filter cannot reject them + var found1 = false; + var found2 = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "match1.zig")) found1 = true; + if (std.mem.eql(u8, p, "match2.zig")) found2 = true; + } + try testing.expect(found1); + try testing.expect(found2); +} + + +test "bloom: reduces candidates vs pure trigram intersection" { + // This is the key test: prove bloom filtering actually eliminates + // files that trigram intersection alone would not. + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + // "pub fn init" — common trigrams "pub", "ub ", "b f", " fn", "fn ", "n i", " in", "ini", "nit" + // We'll create files that share many of these trigrams but NOT adjacently. + try ti.indexFile("real.zig", "pub fn init() void {}"); // actual match + try ti.indexFile("shuffled1.zig", "fn publish(nit_pick: bool) void {}"); // has "pub","fn ","nit" but not adjacently + try ti.indexFile("shuffled2.zig", "fn pubNitInit() void {}"); // has "pub","nit","ini" but wrong order + try ti.indexFile("unrelated.zig", "const x = 42;"); // no overlap + + const cands = ti.candidates("pub fn init", testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + + // real.zig MUST be found (soundness) + var found_real = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "real.zig")) found_real = true; + } + try testing.expect(found_real); + + // unrelated.zig must NOT be found + var found_unrelated = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "unrelated.zig")) found_unrelated = true; + } + try testing.expect(!found_unrelated); + + // Count how many candidates we got — should be fewer than all files + // that share trigrams. At minimum, "unrelated.zig" is excluded. + try testing.expect(cands.?.len < 4); +} + + +test "bloom: loc_mask adjacency filtering works" { + // Construct a scenario where two trigrams exist in a file but at + // positions where they can't be adjacent. The loc_mask check should + // filter this out (probabilistically, but deterministically for + // carefully chosen positions). + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + // "XXXabcYYYYYYYYYYYYYYYdefZZZ" — "abc" at pos 3, "def" at pos 21 + // Query "abcdef" needs abc at pos N and def at pos N+3. + // But abc is at pos 3 (bit 3) and def is at pos 21 (bit 5). + // Shifted abc loc_mask bit 3 → bit 4. "bcd" would need to be at bit 4. + // This tests the adjacency logic. + try ti.indexFile("adjacent.zig", "XXabcdefGH"); // abc and def ARE adjacent + try ti.indexFile("apart.zig", "XXXabcYYYYYYYYYYYYYYdefZZZ"); // abc and def far apart + + const cands = ti.candidates("abcdef", testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + + // adjacent.zig MUST be found + var found_adjacent = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "adjacent.zig")) found_adjacent = true; + } + try testing.expect(found_adjacent); + + // apart.zig MAY be filtered out by loc_mask (depends on position mod 8 collision) + // We can't assert it's excluded because bloom filters allow false positives, + // but we CAN assert the total candidate count is reasonable. + try testing.expect(cands.?.len >= 1); // at least the real match +} + + +test "bloom: masks accumulate across multiple positions" { + // If a trigram appears at many positions in a file, both masks should + // have multiple bits set (OR'd together, never replaced). + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + // "the" appears at positions 0, 10, 20, 30, 40, 50, 60, 70 + try ti.indexFile("repeat.zig", "the_______the_______the_______the_______the_______the_______the_______the_______"); + + const tri_the = packTrigram('t', 'h', 'e'); + const file_set = ti.index.getPtr(tri_the).?; + const mask = file_set.get("repeat.zig").?; + + // With 8+ occurrences at varying positions, loc_mask should have many bits set + try testing.expect(@popCount(mask.loc_mask) >= 3); + // next_mask should also have bits set (from the chars following each "the") + try testing.expect(mask.next_mask != 0); +} + + +test "bloom: regression — candidate count for known queries" { + // Regression benchmark: index a controlled set of files and assert + // specific candidate counts. If bloom filtering breaks or regresses, + // these counts will increase. + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + try ti.indexFile("a.zig", "pub fn initAllocator() void {}"); + try ti.indexFile("b.zig", "pub fn deinitAllocator() void {}"); + try ti.indexFile("c.zig", "pub fn init() void {}"); + try ti.indexFile("d.zig", "fn publish(data: []u8) void {}"); + try ti.indexFile("e.zig", "const initial_value = 0;"); + try ti.indexFile("f.zig", "fn processInput() !void {}"); + try ti.indexFile("g.zig", "const config = getConfig();"); + try ti.indexFile("h.zig", "fn handleNotification() void {}"); + + // "initAllocator" — a.zig must be found; b.zig ("deinitAllocator") shares trigrams + { + const cands = ti.candidates("initAllocator", testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + var found_a = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "a.zig")) found_a = true; + } + try testing.expect(found_a); + // b.zig is a valid false positive (shares "initAllocator" substring in "deinitAllocator") + // but d/e/f/g/h should not appear + try testing.expect(cands.?.len <= 2); + } + + // "pub fn init" — should find a.zig, c.zig; maybe b.zig (shares "pub fn ") + // but NOT d/e/f/g/h + { + const cands = ti.candidates("pub fn init", testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + // Must include actual matches + var found_a = false; + var found_c = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "a.zig")) found_a = true; + if (std.mem.eql(u8, p, "c.zig")) found_c = true; + } + try testing.expect(found_a); + try testing.expect(found_c); + // Candidate count must be <= 4 (bloom should exclude some) + // Without bloom: files sharing any "pub"/"fn "/"ini"/"nit" trigrams = many + // With bloom: adjacency + next_mask filtering should narrow it down + try testing.expect(cands.?.len <= 4); + } + + // "processInput" — f.zig must be found, few false positives allowed + { + const cands = ti.candidates("processInput", testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + var found_f = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "f.zig")) found_f = true; + } + try testing.expect(found_f); + // Bloom may allow a false positive but should be way less than 8 + try testing.expect(cands.?.len <= 3); + } +} + + +test "regex regression: trigram extraction counts" { + // Verify exact trigram counts for known patterns. + // If decomposition logic changes, these catch it. + { + var q = try decomposeRegex("handleRequest", testing.allocator); + defer q.deinit(); + // 13 chars → 11 trigrams, all AND + try testing.expectEqual(@as(usize, 11), q.and_trigrams.len); + try testing.expectEqual(@as(usize, 0), q.or_groups.len); + } + { + var q = try decomposeRegex("foo.*bar.*baz", testing.allocator); + defer q.deinit(); + // "foo", "bar", "baz" — each 3 chars = 1 trigram each = 3 AND trigrams + try testing.expectEqual(@as(usize, 3), q.and_trigrams.len); + try testing.expectEqual(@as(usize, 0), q.or_groups.len); + } + { + var q = try decomposeRegex("alpha|beta|gamma", testing.allocator); + defer q.deinit(); + // No AND trigrams — all in OR groups + try testing.expectEqual(@as(usize, 0), q.and_trigrams.len); + try testing.expectEqual(@as(usize, 1), q.or_groups.len); + // alpha=3 + beta=2 + gamma=3 = 8 trigrams in the OR group + try testing.expectEqual(@as(usize, 8), q.or_groups[0].len); + } +} + + +test "regex regression: regexMatch edge cases" { + // Empty pattern matches anything + try testing.expect(regexMatch("anything", "")); + + // Pure wildcard + try testing.expect(regexMatch("abc", ".*")); + try testing.expect(regexMatch("", ".*")); + + // Consecutive quantifiers shouldn't crash + try testing.expect(regexMatch("aab", "a+b")); + try testing.expect(!regexMatch("b", "a+b")); + + // Nested-ish patterns + try testing.expect(regexMatch("foobar", "foo.ar")); + try testing.expect(!regexMatch("foar", "foo.ar")); + + // Backslash at end of pattern (edge case) + try testing.expect(!regexMatch("abc", "abc\\")); +} + + +test "regex regression: candidatesRegex reduces vs brute force" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + try ti.indexFile("handler.zig", "pub fn handleRequest(ctx: *Context) !void { }"); + try ti.indexFile("process.zig", "pub fn processData(input: []u8) void { }"); + try ti.indexFile("utils.zig", "pub fn formatString(s: []const u8) []u8 { return s; }"); + try ti.indexFile("config.zig", "const default_config = Config{ .debug = false };"); + + // "handle.*Request" — should extract trigrams from "handle" and "Request" + var q = try decomposeRegex("handle.*Request", testing.allocator); + defer q.deinit(); + try testing.expect(q.and_trigrams.len >= 4); // at least some from both halves + + const cands = ti.candidatesRegex(&q, testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + + // handler.zig MUST be a candidate (soundness) + var found_handler = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "handler.zig")) found_handler = true; + } + try testing.expect(found_handler); + + // Should NOT include config.zig (no "handle" or "Request" trigrams) + var found_config = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "config.zig")) found_config = true; + } + try testing.expect(!found_config); + + // Candidate count should be much less than total files + try testing.expect(cands.?.len <= 2); +} + + +test "perf regression: indexing 200 files under 200ms" { + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + var wi = WordIndex.init(testing.allocator); + defer wi.deinit(); + + // Generate 200 synthetic files with realistic content + var bufs: [200][]u8 = undefined; + var names: [200][]u8 = undefined; + for (0..200) |i| { + names[i] = try std.fmt.allocPrint(testing.allocator, "src/file_{d:0>3}.zig", .{i}); + bufs[i] = try std.fmt.allocPrint(testing.allocator, + \\pub fn handler_{d}(ctx: *Context, req: Request) !Response {{ + \\ const allocator = ctx.allocator; + \\ const data = try req.readBody(allocator); + \\ defer allocator.free(data); + \\ return Response.init(.ok, data); + \\}} + \\ + \\const Config_{d} = struct {{ + \\ name: []const u8, + \\ value: i64 = {d}, + \\ enabled: bool = true, + \\}}; + , .{ i, i, i * 42 }); + } + defer for (0..200) |i| { + testing.allocator.free(bufs[i]); + testing.allocator.free(names[i]); + }; + + var timer = try cio.Timer.start(); + for (0..200) |i| { + try ti.indexFile(names[i], bufs[i]); + try wi.indexFile(names[i], bufs[i]); + } + const elapsed_ns = timer.read(); + const elapsed_ms = @as(f64, @floatFromInt(elapsed_ns)) / 1_000_000.0; + + // Must complete under 200ms (generous budget — typically ~30ms) + // Debug builds are ~10x slower than ReleaseFast; give generous headroom. + // ReleaseFast typically ~30ms; Debug ~100–250ms depending on host. + try testing.expect(elapsed_ms < 500.0); +} + + +test "perf regression: trigram candidate lookup under 1ms per query" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + for (0..100) |i| { + const name = try std.fmt.allocPrint(alloc, "mod_{d}.zig", .{i}); + const content = try std.fmt.allocPrint(alloc, + \\pub fn process_{d}(data: []const u8) !void {{ + \\ const result = transform(data); + \\ try validate(result); + \\}} + , .{i}); + try ti.indexFile(name, content); + } + + const queries = [_][]const u8{ + "process_42", + "transform", + "pub fn process", + "validate(result)", + }; + + var timer = try cio.Timer.start(); + const iters: usize = 1000; + for (0..iters) |_| { + for (queries) |q| { + const cands = ti.candidates(q, testing.allocator); + if (cands) |c| testing.allocator.free(c); + } + } + const elapsed_ns = timer.read(); + const ns_per_query = elapsed_ns / (iters * queries.len); + + // Must be under 1ms (1_000_000 ns) per query — typically ~100µs + try testing.expect(ns_per_query < 1_000_000); +} + + +test "perf regression: word index lookup under 100ns per query" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var wi = WordIndex.init(testing.allocator); + defer wi.deinit(); + + for (0..100) |i| { + const name = try std.fmt.allocPrint(alloc, "src_{d}.zig", .{i}); + const content = try std.fmt.allocPrint(alloc, "pub fn handleRequest_{d}(ctx: *Context) void {{}}\nconst allocator = getDefaultAllocator();\n", .{i}); + try wi.indexFile(name, content); + } + + const queries = [_][]const u8{ "handleRequest_50", "allocator", "getDefaultAllocator", "Context" }; + + var timer = try cio.Timer.start(); + const iters: usize = 100_000; + for (0..iters) |_| { + for (queries) |q| { + _ = wi.search(q); + } + } + const elapsed_ns = timer.read(); + const ns_per_query = elapsed_ns / (iters * queries.len); + // Word lookup must be under 500ns in debug — typically ~5ns in release + try testing.expect(ns_per_query < 500); +} + + +test "perf regression: bloom filter reduces scan work" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var ti = TrigramIndex.init(testing.allocator); + defer ti.deinit(); + + for (0..50) |i| { + const name = try std.fmt.allocPrint(alloc, "f{d:0>2}.zig", .{i}); + const content = try std.fmt.allocPrint(alloc, "pub fn init_{d}(allocator: Allocator) void {{}}\nfn deinit_{d}() void {{}}\n", .{ i, i }); + try ti.indexFile(name, content); + } + + // "pub fn init_25" — specific enough to test bloom effectiveness + const cands = ti.candidates("pub fn init_25", testing.allocator); + defer if (cands) |c| testing.allocator.free(c); + try testing.expect(cands != null); + + // With bloom filtering, should find very few candidates + try testing.expect(cands.?.len <= 10); + + // The actual target file MUST be present (soundness) + var found_target = false; + for (cands.?) |p| { + if (std.mem.eql(u8, p, "f25.zig")) found_target = true; + } + try testing.expect(found_target); + + // KEY ASSERTION: candidate count is meaningfully less than total files + // This proves bloom filtering is doing work, not just passing through + try testing.expect(cands.?.len < 25); // must eliminate at least half +} + + +test "disk word index: round-trip write and read preserves hits" { + const alloc = testing.allocator; + var wi = WordIndex.init(alloc); + defer wi.deinit(); + + try wi.indexFile("src/main.zig", "const Store = @import(\"store.zig\").Store;\npub fn main() void {}\n"); + try wi.indexFile("src/store.zig", "pub const Store = struct {};\npub fn open() void {}\n"); + + const hits_before = try wi.searchDeduped("Store", alloc); + defer alloc.free(hits_before); + try testing.expectEqual(@as(usize, 2), hits_before.len); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + const fake_head = "0123456789abcdef0123456789abcdef01234567".*; + try wi.writeToDisk(io, dir_path, fake_head); + + const header = try WordIndex.readDiskHeader(io, dir_path, alloc); + try testing.expect(header != null); + try testing.expectEqual(@as(u32, 2), header.?.file_count); + try testing.expect(header.?.git_head != null); + try testing.expectEqualSlices(u8, &fake_head, &header.?.git_head.?); + + const loaded = WordIndex.readFromDisk(io, dir_path, alloc); + try testing.expect(loaded != null); + var loaded_wi = loaded.?; + defer loaded_wi.deinit(); + + const hits_after = try loaded_wi.searchDeduped("Store", alloc); + defer alloc.free(hits_after); + try testing.expectEqual(hits_before.len, hits_after.len); + + var found_main = false; + var found_store = false; + for (hits_after) |hit| { + if (std.mem.eql(u8, loaded_wi.hitPath(hit), "src/main.zig")) found_main = true; + if (std.mem.eql(u8, loaded_wi.hitPath(hit), "src/store.zig")) found_store = true; + } + try testing.expect(found_main); + try testing.expect(found_store); +} + + +test "disk word index: skip_file_words still writes file table" { + const alloc = testing.allocator; + var wi = WordIndex.init(alloc); + defer wi.deinit(); + wi.skip_file_words = true; + + try wi.indexFile("src/a.zig", "pub fn alphaToken() void {}\n"); + try wi.indexFile("src/b.zig", "pub fn betaToken() void {}\n"); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + try wi.writeToDisk(io, dir_path, null); + + const header = try WordIndex.readDiskHeader(io, dir_path, alloc); + try testing.expect(header != null); + try testing.expectEqual(@as(u32, 2), header.?.file_count); + + const loaded = WordIndex.readFromDisk(io, dir_path, alloc); + try testing.expect(loaded != null); + var loaded_wi = loaded.?; + defer loaded_wi.deinit(); + + const hits = try loaded_wi.searchDeduped("alphaToken", alloc); + defer alloc.free(hits); + try testing.expectEqual(@as(usize, 1), hits.len); + try testing.expectEqualStrings("src/a.zig", loaded_wi.hitPath(hits[0])); +} + + +test "disk index: round-trip write and read preserves candidates" { + const alloc = testing.allocator; + var ti = TrigramIndex.init(alloc); + defer ti.deinit(); + + try ti.indexFile("src/main.zig", "pub fn main() void { const store = Store.init(allocator); }"); + try ti.indexFile("src/index.zig", "pub fn indexFile(self: *TrigramIndex, path: []const u8) !void {}"); + try ti.indexFile("src/watcher.zig", "pub fn initialScan(store: *Store) !void {}"); + + // Verify candidates before write + const cands_before = ti.candidates("indexFile", testing.allocator); + defer if (cands_before) |c| alloc.free(c); + try testing.expect(cands_before != null); + try testing.expect(cands_before.?.len >= 1); + + // Write to temp dir + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + try ti.writeToDisk(io, dir_path, null); + + // Read back + const loaded = TrigramIndex.readFromDisk(io, dir_path, alloc); + try testing.expect(loaded != null); + var loaded_ti = loaded.?; + defer loaded_ti.deinit(); + + // Same candidates should be returned + const cands_after = loaded_ti.candidates("indexFile", testing.allocator); + defer if (cands_after) |c| alloc.free(c); + try testing.expect(cands_after != null); + try testing.expectEqual(cands_before.?.len, cands_after.?.len); + + // Verify specific file is present + var found = false; + for (cands_after.?) |p| { + if (std.mem.eql(u8, p, "src/index.zig")) found = true; + } + try testing.expect(found); +} + + +test "disk index: readFromDisk returns null for missing files" { + const loaded = TrigramIndex.readFromDisk(io, "/tmp/codedb_nonexistent_dir_12345", testing.allocator); + try testing.expect(loaded == null); +} + + +test "disk index: readFromDisk returns null for corrupt magic" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + // Write garbage postings file + const postings_path = try std.fmt.allocPrint(testing.allocator, "{s}/trigram.postings", .{dir_path}); + defer testing.allocator.free(postings_path); + { + const f = try std.Io.Dir.cwd().createFile(io, postings_path, .{}); + defer f.close(io); + try f.writeStreamingAll(io, "BAADMAGIC"); + } + // Write garbage lookup file + const lookup_path = try std.fmt.allocPrint(testing.allocator, "{s}/trigram.lookup", .{dir_path}); + defer testing.allocator.free(lookup_path); + { + const f = try std.Io.Dir.cwd().createFile(io, lookup_path, .{}); + defer f.close(io); + try f.writeStreamingAll(io, "BAADMAGIC"); + } + + const loaded = TrigramIndex.readFromDisk(io, dir_path, testing.allocator); + try testing.expect(loaded == null); +} + + +test "disk index: empty index round-trips correctly" { + const alloc = testing.allocator; + var ti = TrigramIndex.init(alloc); + defer ti.deinit(); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + try ti.writeToDisk(io, dir_path, null); + + const loaded = TrigramIndex.readFromDisk(io, dir_path, alloc); + try testing.expect(loaded != null); + var loaded_ti = loaded.?; + defer loaded_ti.deinit(); + + try testing.expectEqual(@as(u32, 0), loaded_ti.fileCount()); +} + + +test "disk index: bloom masks preserved after round-trip" { + const alloc = testing.allocator; + var ti = TrigramIndex.init(alloc); + defer ti.deinit(); + + try ti.indexFile("bloom.zig", "pub fn handleRequest(ctx: *Context) void {}"); + + // Get original masks + const tri = packTrigram('h', 'a', 'n'); + const orig_set = ti.index.getPtr(tri).?; + const orig_mask = orig_set.get("bloom.zig").?; + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + try ti.writeToDisk(io, dir_path, null); + + const loaded = TrigramIndex.readFromDisk(io, dir_path, alloc); + try testing.expect(loaded != null); + var loaded_ti = loaded.?; + defer loaded_ti.deinit(); + + // Check masks match + const loaded_set = loaded_ti.index.getPtr(tri).?; + const loaded_mask = loaded_set.get("bloom.zig").?; + try testing.expectEqual(orig_mask.next_mask, loaded_mask.next_mask); + try testing.expectEqual(orig_mask.loc_mask, loaded_mask.loc_mask); +} + + +test "disk index: fileCount matches after round-trip" { + const alloc = testing.allocator; + var ti = TrigramIndex.init(alloc); + defer ti.deinit(); + + try ti.indexFile("a.zig", "fn alpha() void {}"); + try ti.indexFile("b.zig", "fn beta() void {}"); + try ti.indexFile("c.zig", "fn gamma() void {}"); + + try testing.expectEqual(@as(u32, 3), ti.fileCount()); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + try ti.writeToDisk(io, dir_path, null); + + const loaded = TrigramIndex.readFromDisk(io, dir_path, alloc); + try testing.expect(loaded != null); + var loaded_ti = loaded.?; + defer loaded_ti.deinit(); + + try testing.expectEqual(@as(u32, 3), loaded_ti.fileCount()); +} + + +test "disk index: writeToDisk stores git_head, readGitHead retrieves it" { + const alloc = testing.allocator; + var ti = TrigramIndex.init(alloc); + defer ti.deinit(); + + try ti.indexFile("a.zig", "fn hello() void {}"); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + const fake_head = "aabbccddeeff00112233445566778899aabbccdd".*; + try ti.writeToDisk(io, dir_path, fake_head); + + const retrieved = try TrigramIndex.readGitHead(io, dir_path, alloc); + try testing.expect(retrieved != null); + try testing.expectEqualSlices(u8, &fake_head, &retrieved.?); +} + + +test "disk index: writeToDisk with null git_head, readGitHead returns null" { + const alloc = testing.allocator; + var ti = TrigramIndex.init(alloc); + defer ti.deinit(); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + try ti.writeToDisk(io, dir_path, null); + + const retrieved = try TrigramIndex.readGitHead(io, dir_path, alloc); + try testing.expect(retrieved == null); +} + + +test "disk index: readDiskHeader returns file_count and git_head" { + const alloc = testing.allocator; + var ti = TrigramIndex.init(alloc); + defer ti.deinit(); + + try ti.indexFile("x.zig", "pub const X = 42;"); + try ti.indexFile("y.zig", "pub const Y = 99;"); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + const fake_head = "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef".*; + try ti.writeToDisk(io, dir_path, fake_head); + + const hdr = try TrigramIndex.readDiskHeader(io, dir_path, alloc); + try testing.expect(hdr != null); + try testing.expectEqual(@as(u32, 2), hdr.?.file_count); + try testing.expect(hdr.?.git_head != null); + try testing.expectEqualSlices(u8, &fake_head, &hdr.?.git_head.?); +} + + +test "disk index: v1 format (no git_head) still loads and readGitHead returns null" { + const alloc = testing.allocator; + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + // Manually write a v1 postings file (no git head bytes) + const postings_path = try std.fmt.allocPrint(alloc, "{s}/trigram.postings", .{dir_path}); + defer alloc.free(postings_path); + { + const f = try std.Io.Dir.cwd().createFile(io, postings_path, .{}); + defer f.close(io); + // magic(4) + version=1(2) + file_count=0(2) = 8 bytes total + try f.writeStreamingAll(io, &.{ 'C', 'D', 'B', 'T' }); + try f.writeStreamingAll(io, &.{ 1, 0 }); // version = 1 LE + try f.writeStreamingAll(io, &.{ 0, 0 }); // file_count = 0 + } + // Write a matching v1 lookup file + const lookup_path = try std.fmt.allocPrint(alloc, "{s}/trigram.lookup", .{dir_path}); + defer alloc.free(lookup_path); + { + const f = try std.Io.Dir.cwd().createFile(io, lookup_path, .{}); + defer f.close(io); + // magic(4) + version=1(2) + pad(2) + entry_count=0(4) = 12 bytes + try f.writeStreamingAll(io, &.{ 'C', 'D', 'B', 'L' }); + try f.writeStreamingAll(io, &.{ 1, 0 }); // version = 1 + try f.writeStreamingAll(io, &.{ 0, 0 }); // pad + try f.writeStreamingAll(io, &.{ 0, 0, 0, 0 }); // entry_count = 0 + } + + // readGitHead on a v1 file must return null (no git head stored) + const git_head = try TrigramIndex.readGitHead(io, dir_path, alloc); + try testing.expect(git_head == null); + + // readFromDisk on a v1 file must still succeed (backward compat) + const loaded = TrigramIndex.readFromDisk(io, dir_path, alloc); + try testing.expect(loaded != null); + var loaded_ti = loaded.?; + defer loaded_ti.deinit(); + try testing.expectEqual(@as(u32, 0), loaded_ti.fileCount()); +} + + +test "issue-105: large files skip trigram indexing to prevent OOM" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + // Create content just over 64KB — should be indexed for outline/word but NOT trigram + const large_content = try testing.allocator.alloc(u8, 65 * 1024); + defer testing.allocator.free(large_content); + @memset(large_content, 'a'); + // Make it valid Zig so outline parsing works + @memcpy(large_content[0..21], "pub fn bigFunc() void"); + + // indexFileSkipTrigram should succeed without building trigrams + try explorer.indexFileSkipTrigram("large.zig", large_content); + + // The file should be in outlines and contents but NOT in the trigram index + try testing.expect(explorer.outlines.count() == 1); + try testing.expect(explorer.contents.count() == 1); + try testing.expect(explorer.trigram_index.fileCount() == 0); + + // A small file should still get trigram-indexed + try explorer.indexFile("small.zig", "pub fn tiny() void {}"); + try testing.expect(explorer.trigram_index.fileCount() == 1); +} + + +test "issue-107: codedb_deps returns results for Python files" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("mypackage/utils/helpers.py", "def helper_func():\n pass\n"); + try explorer.indexFile("consumer.py", "from mypackage.utils.helpers import helper_func\n"); + + const deps = try explorer.getImportedBy("mypackage/utils/helpers.py", testing.allocator); + defer { + for (deps) |d| testing.allocator.free(d); + testing.allocator.free(deps); + } + + try testing.expect(deps.len == 1); + try testing.expectEqualStrings("consumer.py", deps[0]); +} + + +test "regression-142: trigram index finds all matching files" { + var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp.deinit(); + + try exp.indexFile("src/main.zig", "pub fn handleRequest(ctx: *Context) !void {}"); + try exp.indexFile("src/server.zig", "fn handleRequest(req: Request) void {}"); + try exp.indexFile("src/util.zig", "pub fn formatDate() []u8 {}"); + + const results = try exp.searchContent("handleRequest", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + // Must find both files containing "handleRequest" + try testing.expect(results.len == 2); +} + + +test "regression-142: trigram index returns no false positives" { + var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp.deinit(); + + try exp.indexFile("a.zig", "pub fn alpha() void {}"); + try exp.indexFile("b.zig", "pub fn beta() void {}"); + + const results = try exp.searchContent("gamma", testing.allocator, 50); + defer testing.allocator.free(results); + // Must return zero results for non-existent content + try testing.expect(results.len == 0); +} + + +test "regression-142: trigram intersection narrows correctly" { + var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp.deinit(); + + try exp.indexFile("match.zig", "const unique_identifier_xyz = 42;"); + try exp.indexFile("partial.zig", "const unique_other = 99;"); + try exp.indexFile("none.zig", "pub fn foo() void {}"); + + const results = try exp.searchContent("unique_identifier_xyz", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + // Only the exact match file, not the partial + try testing.expect(results.len == 1); + try testing.expectEqualStrings("match.zig", results[0].path); +} + + +test "regression-142: trigram handles file removal" { + var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp.deinit(); + + try exp.indexFile("temp.zig", "pub fn removable() void {}"); + try exp.indexFile("keep.zig", "pub fn permanent() void {}"); + + // Remove a file + exp.removeFile("temp.zig"); + + const results = try exp.searchContent("removable", testing.allocator, 50); + defer testing.allocator.free(results); + try testing.expect(results.len == 0); + + const results2 = try exp.searchContent("permanent", testing.allocator, 50); + defer { + for (results2) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results2); + } + try testing.expect(results2.len == 1); +} + + +test "regression-142: trigram handles re-indexing same file" { + var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp.deinit(); + + try exp.indexFile("mutable.zig", "pub fn oldContent() void {}"); + try exp.indexFile("mutable.zig", "pub fn newContent() void {}"); + + const old = try exp.searchContent("oldContent", testing.allocator, 50); + defer testing.allocator.free(old); + try testing.expect(old.len == 0); + + const new = try exp.searchContent("newContent", testing.allocator, 50); + defer { + for (new) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(new); + } + try testing.expect(new.len == 1); +} + + +test "regression-142: trigram disk roundtrip preserves results" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + // Build index + var idx1 = TrigramIndex.init(testing.allocator); + try idx1.indexFile("a.zig", "pub fn searchable() void {}"); + try idx1.indexFile("b.zig", "const value = 42;"); + + // Write to disk + try idx1.writeToDisk(io, dir_path, null); + idx1.deinit(); + + // Read back + var idx2 = TrigramIndex.readFromDisk(io, dir_path, testing.allocator) orelse return error.TestUnexpectedResult; + defer idx2.deinit(); + + // Must find same results + const cands = idx2.candidates("searchable", testing.allocator) orelse return error.TestUnexpectedResult; + defer testing.allocator.free(cands); + try testing.expect(cands.len == 1); +} + + +test "regression-142: many files don't corrupt index" { + var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp.deinit(); + + // Index 500 files + var i: usize = 0; + while (i < 500) : (i += 1) { + var name_buf: [32]u8 = undefined; + const name = try std.fmt.bufPrint(&name_buf, "file_{d}.zig", .{i}); + var content_buf: [64]u8 = undefined; + const content = try std.fmt.bufPrint(&content_buf, "pub fn func_{d}() void {{}}", .{i}); + try exp.indexFile(name, content); + } + + // Search for a specific one + const results = try exp.searchContent("func_250", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len == 1); + try testing.expectEqualStrings("file_250.zig", results[0].path); +} + + +test "regression-142: short queries fall back gracefully" { + var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp.deinit(); + + try exp.indexFile("a.zig", "pub fn ab() void {}"); + + // 2-char query: too short for trigrams, should still work via fallback + const results = try exp.searchContent("ab", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len == 1); +} + + +test "regression-142: word index still works alongside trigram" { + var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp.deinit(); + + try exp.indexFile("words.zig", "pub fn mySpecialFunction() void {}"); + + const hits = try exp.searchWord("mySpecialFunction", testing.allocator); + defer testing.allocator.free(hits); + try testing.expect(hits.len == 1); +} + + +test "issue-164: mmap trigram index returns same candidates as heap index" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/auth.zig", "pub fn handleAuth(req: *Request) !void { validate(req); }"); + try explorer.indexFile("src/gate.zig", "pub fn checkGate(ctx: *Context) !bool { return ctx.authenticated; }"); + try explorer.indexFile("src/util.zig", "pub fn formatStr(buf: []u8, args: anytype) !void {}"); + + const heap_results = explorer.trigram_index.candidates("handleAuth", allocator) orelse + return error.NoCandidates; + + try testing.expect(heap_results.len >= 1); + + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const tmp_path_len = try tmp_dir.dir.realPathFile(io, ".", &path_buf); + const tmp_path = path_buf[0..tmp_path_len]; + + try explorer.trigram_index.writeToDisk(io, tmp_path, null); + + var mmap_idx = MmapTrigramIndex.initFromDisk(io, tmp_path, testing.allocator) orelse + return error.MmapInitFailed; + defer mmap_idx.deinit(); + + const mmap_results = mmap_idx.candidates("handleAuth", allocator) orelse + return error.NoCandidates; + + try testing.expect(mmap_results.len >= 1); + try testing.expectEqual(heap_results.len, mmap_results.len); + try testing.expectEqual(explorer.trigram_index.fileCount(), mmap_idx.fileCount()); + try testing.expect(mmap_idx.containsFile("src/auth.zig")); + try testing.expect(mmap_idx.containsFile("src/gate.zig")); + try testing.expect(!mmap_idx.containsFile("nonexistent.zig")); +} + + +test "issue-164: mmap binary search on sorted lookup table" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("a.zig", "const alpha = 42;"); + try explorer.indexFile("b.zig", "const beta = 43;"); + try explorer.indexFile("c.zig", "const gamma = 44;"); + try explorer.indexFile("d.zig", "const delta = 45;"); + try explorer.indexFile("e.zig", "const alpha_beta = 99;"); + + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const tmp_path_len = try tmp_dir.dir.realPathFile(io, ".", &path_buf); + const tmp_path = path_buf[0..tmp_path_len]; + + try explorer.trigram_index.writeToDisk(io, tmp_path, null); + + var mmap_idx = MmapTrigramIndex.initFromDisk(io, tmp_path, testing.allocator) orelse + return error.MmapInitFailed; + defer mmap_idx.deinit(); + + const results = mmap_idx.candidates("alpha", allocator) orelse + return error.NoCandidates; + try testing.expect(results.len >= 2); + + const no_results = mmap_idx.candidates("zzzzz", allocator); + if (no_results) |nr| { + try testing.expectEqual(@as(usize, 0), nr.len); + } +} + + +test "issue-164: mmap handles missing files gracefully" { + const result = MmapTrigramIndex.initFromDisk(io, "/tmp/nonexistent-codedb-test-dir-164", testing.allocator); + try testing.expect(result == null); +} + + +test "issue-164: AnyTrigramIndex dispatches to mmap variant" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("foo.zig", "pub fn fooBar(x: i32) i32 { return x + 1; }"); + + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const tmp_path_len = try tmp_dir.dir.realPathFile(io, ".", &path_buf); + const tmp_path = path_buf[0..tmp_path_len]; + + try explorer.trigram_index.writeToDisk(io, tmp_path, null); + + const mmap_loaded = MmapTrigramIndex.initFromDisk(io, tmp_path, testing.allocator) orelse + return error.MmapInitFailed; + + explorer.trigram_index.deinit(); + explorer.trigram_index = .{ .mmap = mmap_loaded }; + + const results = try explorer.searchContent("fooBar", allocator, 10); + try testing.expect(results.len >= 1); + + try testing.expect(explorer.trigram_index.containsFile("foo.zig")); + try testing.expect(!explorer.trigram_index.containsFile("bar.zig")); +} + + +test "issue-246: TrigramIndex.removeFile cleans stale path_to_id left by failed indexFile" { + // Reproduces the corrupted state an OOM mid-way through indexFile leaves: + // removeFile cleared file_trigrams, getOrCreateDocId wrote to path_to_id, + // then an allocation failure meant file_trigrams.put never completed. + // Fix: removeFile must purge path_to_id even when file_trigrams has no entry. + var idx = TrigramIndex.init(testing.allocator); + defer idx.deinit(); + + // Plant the invariant-violating state OOM would leave behind. + try idx.path_to_id.put("ghost.zig", 0); + try idx.id_to_path.append(testing.allocator, "ghost.zig"); + // file_trigrams intentionally has NO entry for "ghost.zig". + + idx.removeFile("ghost.zig"); + + // Currently FAILS: removeFile returns early at the second file_trigrams.getPtr + // check, leaving path_to_id permanently dirty. + try testing.expectEqual(@as(usize, 0), idx.path_to_id.count()); +} + + +test "issue-247: TrigramIndex.id_to_path does not grow on re-index of same file" { + // removeFile removes path_to_id[path] but leaves the id_to_path slot intact. + // getOrCreateDocId then appends a new slot since path_to_id misses. + // After N re-indexes id_to_path.items.len must equal the number of *unique* files. + var idx = TrigramIndex.init(testing.allocator); + defer idx.deinit(); + + const src = "fn alpha() void {} fn beta() void {} const X = 1;"; + var i: usize = 0; + while (i < 5) : (i += 1) { + try idx.indexFile("f.zig", src); + } + + // Currently FAILS: id_to_path.items.len == 5 (grows by 1 per re-index). + try testing.expectEqual(@as(usize, 1), idx.id_to_path.items.len); +} + + +test "issue-227: TrigramIndex.id_to_path stays bounded across many files re-indexed" { + // Broader regression: ensure re-indexing multiple distinct files also doesn't + // accumulate dead id_to_path slots. + var idx = TrigramIndex.init(testing.allocator); + defer idx.deinit(); + + const files = [_][]const u8{ "a.zig", "b.zig", "c.zig" }; + var round: usize = 0; + while (round < 4) : (round += 1) { + for (files) |f| try idx.indexFile(f, "fn foo() void {}"); + } + + // 3 unique files × 4 rounds = 12 slots currently; fix should keep it at 3. + try testing.expectEqual(@as(usize, files.len), idx.id_to_path.items.len); +} + + +test "issue-248: PostingList.removeDocId removes target and preserves sorted order" { + // Documents the correctness contract for the O(log n) binary-search replacement. + // Currently correct but O(n); fix replaces linear scan with bsearch + single remove. + const PostingList = @import("index.zig").PostingList; + var list = PostingList{}; + defer list.items.deinit(testing.allocator); + + var id: u32 = 0; + while (id < 100) : (id += 1) { + const p = try list.getOrAddPosting(testing.allocator, id * 2); // even doc_ids 0..198 + p.loc_mask = 0xFF; + } + + list.removeDocId(50); + try testing.expectEqual(@as(usize, 99), list.items.items.len); + try testing.expect(list.getByDocId(48) != null); + try testing.expect(list.getByDocId(50) == null); + try testing.expect(list.getByDocId(52) != null); + + // Sorted invariant must hold after removal. + for (1..list.items.items.len) |k| { + try testing.expect(list.items.items[k].doc_id > list.items.items[k - 1].doc_id); + } +} + + +test "issue-250: searchContent finds content in files skipped by trigram index" { + // Files indexed with skip_trigram=true (e.g. past the 15k cap) must still be + // reachable via the fallback full-scan path in searchContent. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFileSkipTrigram("large.zig", "fn unique_zzz_sentinel() void {}"); + + const results = try explorer.searchContent("unique_zzz_sentinel", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expectEqual(@as(usize, 1), results.len); +} + + +test "issue-262: sparse+trigram intersection drops files only in trigram index" { + // When both sparse and trigram indices return candidates, searchContent + // intersects them. A file present in trigram candidates but absent from + // sparse candidates is silently dropped — a recall loss. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + // Index two files — both contain the query. + try explorer.indexFile("a.zig", "fn recall_target_alpha() void {}"); + try explorer.indexFile("b.zig", "fn recall_target_alpha() void {} // more text here for variety"); + + // Simulate sparse index missing file "b.zig" (e.g. boundary misalignment). + // File b.zig remains in the trigram index but not in sparse. + explorer.sparse_ngram_index.removeFile("b.zig"); + + const results = try explorer.searchContent("recall_target_alpha", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + // Both files contain the query — both must appear. + try testing.expectEqual(@as(usize, 2), results.len); +} + + +test "issue-263: skip_trigram_files searched before max_results exhausted" { + // Files indexed with skip_trigram=true are only searched after all + // trigram/sparse/word paths are exhausted. When a single normal file + // has enough matches to fill max_results, the skip_trigram file is + // never checked — even though it contains relevant content. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + // Normal file with 6 matches (one per line). + try explorer.indexFile("noisy.zig", + \\fn my_unique_func() void {} + \\fn my_unique_func_v2() void {} + \\const my_unique_func_ptr = undefined; + \\var my_unique_func_state = 0; + \\test "my_unique_func works" {} + \\// calls my_unique_func internally + ); + + // skip-trigram file with 1 match. + try explorer.indexFileSkipTrigram("large.zig", "fn my_unique_func() void {}"); + + // max_results=5: the normal file fills the quota, skip_trigram never searched. + const results = try explorer.searchContent("my_unique_func", testing.allocator, 5); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + // The skip_trigram file must be represented in results. + var found_large = false; + for (results) |r| { + if (std.mem.eql(u8, r.path, "large.zig")) found_large = true; + } + try testing.expect(found_large); +} + + +test "search: BM25 ranks higher-frequency line first" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + // Line with two occurrences of "token" should outrank line with one + const content = "// single token mention\nconst token = token_cache.get();\n"; + try explorer.indexFile("auth.zig", content); + + const results = try explorer.searchContent("token", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + try testing.expect(results.len >= 2); + // Line 2 has "token" twice; line 1 has it once — line 2 should come first + try testing.expect(results[0].score >= results[1].score); + try testing.expectEqual(@as(u32, 2), results[0].line_num); +} + + +test "issue-388: TrigramIndex.removeFile frees owned path on tombstone" { + // owns_paths=true means getOrCreateDocId duped the path so callers can + // free their copy. removeFile must release that dup before tombstoning + // the slot — otherwise every snapshot-loaded session leaks one path + // allocation per file removed/re-indexed. + var idx = TrigramIndex.init(testing.allocator); + defer idx.deinit(); + idx.owns_paths = true; + + const path = "src/leaky.zig"; + try idx.indexFile(path, "pub fn leaky() void {}\n"); + idx.removeFile(path); + + // testing.allocator reports any unfreed bytes when this scope exits via + // deinit. The bug leaks the dup on the tombstoned id_to_path slot + // (cleared to ""), so deinit's `if (p.len > 0) free(p)` misses it. +} + + +test "bm25-persistence: writeToDisk/readFromDisk preserves total_tokens and doc_lengths" { + const alloc = testing.allocator; + var wi = WordIndex.init(alloc); + defer wi.deinit(); + + try wi.indexFile("low.txt", "needle filler filler filler filler filler filler filler filler filler"); + try wi.indexFile("high.txt", "needle needle needle filler"); + try wi.indexFile("none.txt", "filler filler filler filler"); + + const pre_total = wi.total_tokens; + const pre_low_len = wi.docLength(wi.path_to_id.get("low.txt") orelse 0); + const pre_high_len = wi.docLength(wi.path_to_id.get("high.txt") orelse 0); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + try wi.writeToDisk(io, dir_path, null); + + const maybe_loaded = WordIndex.readFromDisk(io, dir_path, alloc); + try testing.expect(maybe_loaded != null); + var loaded = maybe_loaded.?; + defer loaded.deinit(); + + try testing.expectEqual(pre_total, loaded.total_tokens); + + const post_low_id = loaded.path_to_id.get("low.txt") orelse { + try testing.expect(false); + return; + }; + const post_high_id = loaded.path_to_id.get("high.txt") orelse { + try testing.expect(false); + return; + }; + try testing.expectEqual(pre_low_len, loaded.docLength(post_low_id)); + try testing.expectEqual(pre_high_len, loaded.docLength(post_high_id)); + + const hits = try loaded.searchDeduped("needle", alloc); + defer alloc.free(hits); + try testing.expect(hits.len >= 2); + + var saw_high = false; + var saw_low = false; + for (hits) |h| { + const p = loaded.hitPath(h); + if (std.mem.eql(u8, p, "high.txt")) saw_high = true; + if (std.mem.eql(u8, p, "low.txt")) saw_low = true; + } + try testing.expect(saw_high); + try testing.expect(saw_low); + + // Post-roundtrip ranked search must still work and return hits for "needle". + var wi2 = WordIndex.init(alloc); + defer wi2.deinit(); + try wi2.indexFile("low.txt", "needle filler filler filler filler filler filler filler filler filler"); + try wi2.indexFile("high.txt", "needle needle needle filler"); + try wi2.indexFile("none.txt", "filler filler filler filler"); + + const low_id_orig = wi2.path_to_id.get("low.txt") orelse 0; + const high_id_orig = wi2.path_to_id.get("high.txt") orelse 0; + try testing.expectEqual(pre_low_len, wi2.docLength(low_id_orig)); + try testing.expectEqual(pre_high_len, wi2.docLength(high_id_orig)); + try testing.expectEqual(pre_total, wi2.total_tokens); +} + + +test "issue-451: scope search surfaces skip-trigram canonical file" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + var i: usize = 0; + while (i < 12) : (i += 1) { + var path_buf: [32]u8 = undefined; + const path = try std.fmt.bufPrint(&path_buf, "small_{d}.zig", .{i}); + try explorer.indexFile(path, "fn s() void { _ = widgetX; }\n"); + } + + const canonical_content = + "fn canonical() void {\n" ++ + " _ = widgetX;\n" ++ + " _ = widgetX;\n" ++ + " _ = widgetX;\n" ++ + " _ = widgetX;\n" ++ + " _ = widgetX;\n" ++ + "}\n"; + try explorer.indexFileSkipTrigram("canonical.zig", canonical_content); + + const results = try explorer.searchContentWithScope("widgetX", testing.allocator, 5); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + if (r.scope_name) |n| testing.allocator.free(n); + } + testing.allocator.free(results); + } + + var found_canonical = false; + for (results) |r| { + if (std.mem.eql(u8, r.path, "canonical.zig")) found_canonical = true; + } + try testing.expect(found_canonical); +} + + +test "issue-447: searchContent surfaces large (>64KB) skip-trigram files for common identifiers" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + var i: usize = 0; + while (i < 12) : (i += 1) { + var path_buf: [32]u8 = undefined; + const path = try std.fmt.bufPrint(&path_buf, "small_{d}.zig", .{i}); + try explorer.indexFile(path, "fn s() void { _ = widgetX; }\n"); + } + + const canonical_content = + "fn canonical() void {\n" ++ + " _ = widgetX;\n" ++ + " _ = widgetX;\n" ++ + " _ = widgetX;\n" ++ + " _ = widgetX;\n" ++ + " _ = widgetX;\n" ++ + "}\n"; + try explorer.indexFileSkipTrigram("canonical.zig", canonical_content); + + const results = try explorer.searchContent("widgetX", testing.allocator, 5); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + var found_canonical = false; + for (results) |r| { + if (std.mem.eql(u8, r.path, "canonical.zig")) found_canonical = true; + } + try testing.expect(found_canonical); +} + diff --git a/src/test_mcp.zig b/src/test_mcp.zig new file mode 100644 index 0000000..66540e7 --- /dev/null +++ b/src/test_mcp.zig @@ -0,0 +1,1568 @@ +const std = @import("std"); +const cio = @import("cio.zig"); +const testing = std.testing; +const io = std.testing.io; +const Store = @import("store.zig").Store; +const Explorer = @import("explore.zig").Explorer; +const explore = @import("explore.zig"); +const Language = explore.Language; +const AgentRegistry = @import("agent.zig").AgentRegistry; +const mcp_mod = @import("mcp.zig"); +const main_mod = @import("main.zig"); +const nuke_mod = @import("nuke.zig"); +const update_mod = @import("update.zig"); +const Config = @import("config.zig").Config; +const telemetry_mod = @import("telemetry.zig"); +const release_info = @import("release_info.zig"); +const root_policy = @import("root_policy.zig"); +const edit_mod = @import("edit.zig"); +const snapshot_mod = @import("snapshot.zig"); +const watcher = @import("watcher.zig"); +const WordIndex = @import("index.zig").WordIndex; +const TrigramIndex = @import("index.zig").TrigramIndex; +const SparseNgramIndex = @import("index.zig").SparseNgramIndex; +comptime { + _ = @import("config.zig"); +} + + +fn buildCliForHelpTests() !void { + const build = try cio.runCapture(.{ + .allocator = testing.allocator, + .argv = &.{ "zig", "build" }, + .max_output_bytes = 8192, + }); + defer testing.allocator.free(build.stdout); + defer testing.allocator.free(build.stderr); + + try testing.expect(build.term == .Exited); + try testing.expect(build.term.Exited == 0); +} + + +test "issue-59: telemetry writes session, tool, and codebase stats ndjson" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + var telem = telemetry_mod.Telemetry.init(io, dir_path, testing.allocator, false); + defer telem.deinit(); + + telem.recordSessionStart(); + telem.recordToolCall("codedb_status", 1234, false, 56); + + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + try explorer.indexFile("src/lib.py", "def run():\n return 1\n"); + + telem.recordCodebaseStats(&explorer, 42); + telem.flush(); + + const ndjson_path = try std.fmt.allocPrint(testing.allocator, "{s}/telemetry.ndjson", .{dir_path}); + defer testing.allocator.free(ndjson_path); + + const contents = try std.Io.Dir.cwd().readFileAlloc(io, ndjson_path, testing.allocator, .limited(64 * 1024)); + defer testing.allocator.free(contents); + + try testing.expect(std.mem.indexOf(u8, contents, "\"event_type\":\"session_start\"") != null); + const version_needle = try std.fmt.allocPrint(testing.allocator, "\"version\":\"{s}\"", .{release_info.semver}); + defer testing.allocator.free(version_needle); + try testing.expect(std.mem.indexOf(u8, contents, version_needle) != null); + try testing.expect(std.mem.indexOf(u8, contents, "\"event_type\":\"tool_call\"") != null); + try testing.expect(std.mem.indexOf(u8, contents, "\"tool\":\"codedb_status\"") != null); + try testing.expect(std.mem.indexOf(u8, contents, "\"event_type\":\"codebase_stats\"") != null); + try testing.expect(std.mem.indexOf(u8, contents, "\"startup_time_ms\":42") != null); + try testing.expect(std.mem.indexOf(u8, contents, "\"languages\":[\"zig\",\"python\"]") != null); +} + + +test "issue-60: telemetry disabled path is a no-op" { + var telem = telemetry_mod.Telemetry.init(io, "/tmp", testing.allocator, true); + defer telem.deinit(); + + telem.recordSessionStart(); + telem.recordToolCall("codedb_search", 99, true, 10); + try testing.expect(!telem.enabled); + try testing.expect(telem.file == null); + try testing.expect(telem.head.load(.monotonic) == 0); +} + + +test "issue-77: mcp index accepts temporary-directory roots that cause pathological cache growth" { + var tmp_name_buf: [128]u8 = undefined; + const tmp_name = try std.fmt.bufPrint(&tmp_name_buf, "codedb-issue-77-{d}", .{@as(i64, @intCast(@divTrunc(cio.nanoTimestamp(), 1000)))}); + const tmp_root = try std.fs.path.join(testing.allocator, &.{ "/private/tmp", tmp_name }); + defer testing.allocator.free(tmp_root); + + std.Io.Dir.cwd().createDirPath(io, tmp_root) catch |err| switch (err) { + error.PathAlreadyExists => {}, + else => return err, + }; + defer std.Io.Dir.cwd().deleteTree(io, tmp_root) catch {}; + + const source_path = try std.fs.path.join(testing.allocator, &.{ tmp_root, "sample.zig" }); + defer testing.allocator.free(source_path); + { + const file = try std.Io.Dir.cwd().createFile(io, source_path, .{}); + defer file.close(io); + try file.writeStreamingAll(io, "pub fn sample() void {}\n"); + } + + const result = try cio.runCapture(.{ + .allocator = testing.allocator, + .argv = &.{ "zig", "build", "run", "--", tmp_root, "snapshot" }, + .max_output_bytes = 256 * 1024, + }); + defer testing.allocator.free(result.stdout); + defer testing.allocator.free(result.stderr); + + try testing.expect(result.term.Exited != 0); +} + + +test "issue-93: isSensitivePath blocks .env and credentials" { + try testing.expect(watcher.isSensitivePath(".env")); + try testing.expect(watcher.isSensitivePath(".env.local")); + try testing.expect(watcher.isSensitivePath(".env.production")); + try testing.expect(watcher.isSensitivePath("credentials.json")); + try testing.expect(watcher.isSensitivePath("service-account.json")); + try testing.expect(watcher.isSensitivePath("id_rsa")); + try testing.expect(watcher.isSensitivePath("secrets.yaml")); + try testing.expect(watcher.isSensitivePath("config/secrets.yml")); + try testing.expect(watcher.isSensitivePath("server.key")); + try testing.expect(watcher.isSensitivePath("cert.pem")); + try testing.expect(watcher.isSensitivePath("keystore.jks")); + try testing.expect(watcher.isSensitivePath("identity.pfx")); + try testing.expect(watcher.isSensitivePath(".ssh/known_hosts")); + // Normal files should NOT be blocked + try testing.expect(!watcher.isSensitivePath("main.zig")); + try testing.expect(!watcher.isSensitivePath("src/server.zig")); + try testing.expect(!watcher.isSensitivePath("README.md")); + try testing.expect(!watcher.isSensitivePath("package.json")); +} + + +test "issue-93: isPathSafe blocks traversal" { + const MCP = @import("mcp.zig"); + try testing.expect(!MCP.isPathSafe("../../../etc/passwd")); + try testing.expect(!MCP.isPathSafe("/etc/passwd")); + try testing.expect(!MCP.isPathSafe("")); + try testing.expect(MCP.isPathSafe("src/main.zig")); + try testing.expect(MCP.isPathSafe("README.md")); +} + + +test "auto-update: shouldRunAutoUpdate gates correctly" { + const day_ms: i64 = 24 * 60 * 60 * 1000; + + // Disabled by env: never runs + try testing.expect(!update_mod.shouldRunAutoUpdate(0, null, true)); + try testing.expect(!update_mod.shouldRunAutoUpdate(day_ms * 100, null, true)); + try testing.expect(!update_mod.shouldRunAutoUpdate(day_ms * 100, 0, true)); + + // First run (no stamp): always runs when not disabled + try testing.expect(update_mod.shouldRunAutoUpdate(0, null, false)); + + // Throttled: <24h since last check → skip + try testing.expect(!update_mod.shouldRunAutoUpdate(day_ms - 1, 0, false)); + + // Exactly 24h since last check → run + try testing.expect(update_mod.shouldRunAutoUpdate(day_ms, 0, false)); + + // Long after last check → run + try testing.expect(update_mod.shouldRunAutoUpdate(day_ms * 7, 0, false)); +} + + +test "issue-394: shouldRunAutoUpdate permanently blocked by future-timestamp stamp file" { + // Reproduces the case where the stamp file contains a timestamp in the + // future relative to the wall clock — for example, after an NTP clock + // correction that rolls the clock back, or after a stamp written by a + // host with a fast clock. The current implementation computes + // (now - last) and only fires when that delta >= 24h, so a future + // `last` produces a negative delta and the check is silently skipped + // for as long as the stamp stays in the future — potentially many days. + // + // Expected: a wildly future stamp should NOT prevent the next check + // from firing. The simplest correct behavior is: if last > now, treat + // the stamp as invalid and allow the update check to run. + + const day_ms: i64 = 24 * 60 * 60 * 1000; + const now_ms: i64 = 1_700_000_000_000; + const future_last_ms: i64 = now_ms + day_ms * 30; // 30 days in the future + + try testing.expect(update_mod.shouldRunAutoUpdate(now_ms, future_last_ms, false)); +} + + +test "issue-395: shouldRunAutoUpdate panics on i64 underflow when stamp is corrupt" { + // Reproduces a panic when ~/.codedb/last_auto_update_check is corrupt + // and decodes to a very negative i64. readAutoUpdateStamp does no + // sanity check — it reads 8 bytes, calls std.mem.readInt(i64, ...), + // and feeds that straight into shouldRunAutoUpdate, which evaluates + // `now_ms - last` with checked subtraction. For last = minInt(i64) + // and any positive now_ms, the subtraction overflows and triggers an + // integer-overflow panic in Debug / ReleaseSafe builds (which is what + // `zig build test` and the shipped MCP binary use). + // + // Result: every `codedb mcp` startup crashes during the auto-update + // gate for any user whose stamp file got corrupted to a value with + // the high bit set (e.g. truncated write, partial flush, or any byte + // sequence starting with 0x80..0xFF in the stamp). + // + // Expected fix: clamp the delta with a saturating/wrapping subtraction + // or treat any last_ms <= 0 (or in the distant past) as invalid and + // run the update. + + const now_ms: i64 = 1_700_000_000_000; + const last_ms: i64 = std.math.minInt(i64); + + try testing.expect(update_mod.shouldRunAutoUpdate(now_ms, last_ms, false)); +} + + +test "issue-150: --help prints usage" { + try buildCliForHelpTests(); + + const result = try cio.runCapture(.{ + .allocator = testing.allocator, + .argv = &.{ "./zig-out/bin/codedb", "--help" }, + .max_output_bytes = 8192, + }); + defer testing.allocator.free(result.stdout); + defer testing.allocator.free(result.stderr); + + try testing.expect(result.term == .Exited); + try testing.expect(result.term.Exited == 0); + try testing.expect(std.mem.indexOf(u8, result.stdout, "usage:") != null or + std.mem.indexOf(u8, result.stderr, "usage:") != null); + try testing.expect(std.mem.indexOf(u8, result.stdout, "update") != null or + std.mem.indexOf(u8, result.stderr, "update") != null); + try testing.expect(std.mem.indexOf(u8, result.stdout, "nuke") != null or + std.mem.indexOf(u8, result.stderr, "nuke") != null); +} + + +test "issue-150: -h prints usage" { + try buildCliForHelpTests(); + + const result = try cio.runCapture(.{ + .allocator = testing.allocator, + .argv = &.{ "./zig-out/bin/codedb", "-h" }, + .max_output_bytes = 8192, + }); + defer testing.allocator.free(result.stdout); + defer testing.allocator.free(result.stderr); + + try testing.expect(result.term == .Exited); + try testing.expect(result.term.Exited == 0); + try testing.expect(std.mem.indexOf(u8, result.stdout, "usage:") != null or + std.mem.indexOf(u8, result.stderr, "usage:") != null); +} + + +test "update: compareVersions orders semantic versions" { + try testing.expect(try update_mod.compareVersions("0.2.55", "0.2.56") == .lt); + try testing.expect(try update_mod.compareVersions("0.2.56", "0.2.56") == .eq); + try testing.expect(try update_mod.compareVersions("v0.2.57", "0.2.56") == .gt); + try testing.expect(try update_mod.compareVersions("0.2.56", "0.2.56.0") == .eq); +} + + +test "update: checksumForBinary parses release manifest" { + const manifest = + \\7be38140d090b2e23723c8cde02be150171c818daa16b18c520b44cc1e078add codedb-darwin-arm64 + \\76bc7b81bc9fd211aa2c1ac59d1d26e8c80bc211ab560de2dc998ea9e04ec471 codedb-darwin-x86_64 + \\aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa *codedb-linux-arm64 + ; + + try testing.expectEqualStrings( + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + update_mod.checksumForBinary(manifest, "codedb-linux-arm64") orelse return error.TestUnexpectedResult, + ); + try testing.expect(update_mod.checksumForBinary(manifest, "codedb-linux-x86_64") == null); +} + + +test "update: asset names match published release naming" { + try testing.expectEqualStrings("codedb-darwin-arm64", update_mod.assetNameForTarget(.macos, .aarch64).?); + try testing.expectEqualStrings("codedb-darwin-x86_64", update_mod.assetNameForTarget(.macos, .x86_64).?); + try testing.expectEqualStrings("codedb-linux-arm64", update_mod.assetNameForTarget(.linux, .aarch64).?); + try testing.expectEqualStrings("codedb-linux-x86_64", update_mod.assetNameForTarget(.linux, .x86_64).?); + try testing.expect(update_mod.assetNameForTarget(.windows, .x86_64) == null); +} + + +test "nuke: commandTargetsBinary only matches the current install path" { + try testing.expect(nuke_mod.commandTargetsBinary( + "/tmp/codedb-test/bin/codedb serve", + "/tmp/codedb-test/bin/codedb", + )); + try testing.expect(nuke_mod.commandTargetsBinary( + "/var/folders/example/codedb serve", + "/private/var/folders/example/codedb", + )); + try testing.expect(!nuke_mod.commandTargetsBinary( + "/Users/rachpradhan/bin/codedb --mcp", + "/tmp/codedb-test/bin/codedb", + )); +} + + +test "nuke: removeJsonMcpServerEntry drops only codedb integration" { + const input = + \\{ + \\ "mcpServers": { + \\ "codedb": { "command": "/Users/me/bin/codedb", "args": ["mcp"] }, + \\ "other": { "command": "other", "args": [] } + \\ }, + \\ "theme": "dark" + \\} + ; + + const output = (try nuke_mod.removeJsonMcpServerEntry(testing.allocator, input, "codedb")) orelse + return error.TestUnexpectedResult; + defer testing.allocator.free(output); + + try testing.expect(std.mem.indexOf(u8, output, "\"codedb\"") == null); + try testing.expect(std.mem.indexOf(u8, output, "\"other\"") != null); + try testing.expect(std.mem.indexOf(u8, output, "\"theme\"") != null); +} + + +test "nuke: removeJsonMcpServerEntry removes empty mcpServers object" { + const input = + \\{ + \\ "mcpServers": { + \\ "codedb": { "command": "/Users/me/bin/codedb", "args": ["mcp"] } + \\ }, + \\ "theme": "dark" + \\} + ; + + const output = (try nuke_mod.removeJsonMcpServerEntry(testing.allocator, input, "codedb")) orelse + return error.TestUnexpectedResult; + defer testing.allocator.free(output); + + try testing.expect(std.mem.indexOf(u8, output, "\"codedb\"") == null); + try testing.expect(std.mem.indexOf(u8, output, "\"mcpServers\"") == null); + try testing.expect(std.mem.indexOf(u8, output, "\"theme\"") != null); +} + + +test "nuke: removeCodexMcpServerBlock removes codedb block only" { + const input = + \\[mcp_servers.codedb] + \\command = "/Users/me/bin/codedb" + \\args = ["mcp"] + \\startup_timeout_sec = 30 + \\ + \\[mcp_servers.other] + \\command = "other" + \\args = [] + ; + + const output = (try nuke_mod.removeCodexMcpServerBlock(testing.allocator, input, "codedb")) orelse + return error.TestUnexpectedResult; + defer testing.allocator.free(output); + + try testing.expect(std.mem.indexOf(u8, output, "[mcp_servers.codedb]") == null); + try testing.expect(std.mem.indexOf(u8, output, "[mcp_servers.other]") != null); + try testing.expect(std.mem.indexOf(u8, output, "command = \"other\"") != null); +} + + +test "nuke: removeCodexMcpServerBlock matches indented header with inline comment" { + const input = + \\ [mcp_servers.codedb] # local override + \\command = "/Users/me/bin/codedb" + \\args = ["mcp"] + \\ + \\[mcp_servers.other] + \\command = "other" + \\args = [] + ; + + const output = (try nuke_mod.removeCodexMcpServerBlock(testing.allocator, input, "codedb")) orelse + return error.TestUnexpectedResult; + defer testing.allocator.free(output); + + try testing.expect(std.mem.indexOf(u8, output, "codedb") == null); + try testing.expect(std.mem.indexOf(u8, output, "[mcp_servers.other]") != null); +} + + +test "nuke: deregisterJsonIntegrationFile handles configs larger than 64 KiB" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + const rel_path = try std.fmt.allocPrint(testing.allocator, ".zig-cache/tmp/{s}/large-claude.json", .{tmp.sub_path}); + defer testing.allocator.free(rel_path); + + var content: std.ArrayList(u8) = .empty; + defer content.deinit(testing.allocator); + try content.appendSlice(testing.allocator, + \\{ + \\ "mcpServers": { + \\ "codedb": { "command": "/Users/me/bin/codedb", "args": ["mcp"] }, + \\ "other": { "command": "other", "args": [] } + \\ }, + \\ "padding": " + ); + try content.appendNTimes(testing.allocator, 'x', 70 * 1024); + try content.appendSlice(testing.allocator, "\"\n}\n"); + + var file = try tmp.dir.createFile(io, "large-claude.json", .{}); + defer file.close(io); + try file.writeStreamingAll(io, content.items); + + try testing.expect(try nuke_mod.deregisterJsonIntegrationFile(io, testing.allocator, rel_path)); + + const rewritten = try std.Io.Dir.cwd().readFileAlloc(io, rel_path, testing.allocator, .limited(std.math.maxInt(usize))); + defer testing.allocator.free(rewritten); + + try testing.expect(std.mem.indexOf(u8, rewritten, "\"codedb\"") == null); + try testing.expect(std.mem.indexOf(u8, rewritten, "\"other\"") != null); + try testing.expect(std.mem.indexOf(u8, rewritten, "\"padding\"") != null); +} + + +test "issue-148: dead MCP clients are polled every second" { + const mcp = @import("mcp.zig"); + try testing.expectEqual(@as(u64, 1000), mcp.dead_client_poll_ms); +} + + +test "issue-148: POLLHUP detects closed pipe" { + // Verify the polling infrastructure works for pipe-based transports + const pipe = try cio.makePipe(); + defer _ = std.c.close(pipe[0]); + + // Close write end — simulates client disconnect + _ = std.c.close(pipe[1]); + + // Poll should detect POLLHUP on the read end + var fds = [_]std.posix.pollfd{.{ + .fd = pipe[0], + .events = std.posix.POLL.IN, + .revents = 0, + }}; + + const n = try std.posix.poll(&fds, 100); // 100ms timeout + try testing.expect(n > 0); + try testing.expect((fds[0].revents & std.posix.POLL.HUP) != 0); +} + + +test "issue-148: idle watchdog exits on shutdown signal" { + // The watchdog should check shutdown every ~1s (not 30s) + // and return quickly when signalled + var shutdown = std.atomic.Value(bool).init(false); + + const t0 = cio.milliTimestamp(); + // Signal shutdown after a small delay + const signal_thread = try std.Thread.spawn(.{}, struct { + fn run(s: *std.atomic.Value(bool)) void { + cio.sleepMs(500); + s.store(true, .release); + } + }.run, .{&shutdown}); + + // Run a simplified watchdog loop (matches the real one's 1s granularity) + while (!shutdown.load(.acquire)) { + for (0..30) |_| { + if (shutdown.load(.acquire)) break; + cio.sleepMs(100); // faster for test + } + break; // one iteration is enough to test + } + signal_thread.join(); + + const elapsed = cio.milliTimestamp() - t0; + // With 1s granularity, should respond well under 5s (not 30s) + // Using 100ms intervals in test, so should be ~500ms + if (elapsed > 0) { + // Just verify it didn't hang for 30 seconds + try testing.expect(elapsed < 5_000); + } +} + + +test "issue-278: MCP tracks activity without using it as a transport timeout" { + const mcp = @import("mcp.zig"); + + // Save and restore + const saved = mcp.last_activity.load(.acquire); + defer mcp.last_activity.store(saved, .release); + + // Set activity to "just now" + mcp.last_activity.store(cio.milliTimestamp(), .release); + + const last = mcp.last_activity.load(.acquire); + const now = cio.milliTimestamp(); + try testing.expect(now - last < 1_000); +} + + +test "issue-278: MCP session may remain idle longer than old timeout" { + const mcp = @import("mcp.zig"); + // Stale activity is now only an accounting signal. The stdio transport is + // kept alive until the client actually disconnects. + const old_idle_timeout_ms = 60 * 60 * 1000; + const older_than_old_timeout = cio.milliTimestamp() - old_idle_timeout_ms - 1_000; + + // Save and restore + const saved = mcp.last_activity.load(.acquire); + defer mcp.last_activity.store(saved, .release); + + mcp.last_activity.store(older_than_old_timeout, .release); + const last = mcp.last_activity.load(.acquire); + const now = cio.milliTimestamp(); + + try testing.expect(now - last > old_idle_timeout_ms); +} + + +test "issue-148: open pipe does not trigger HUP" { + const pipe = try cio.makePipe(); + defer _ = std.c.close(pipe[0]); + defer _ = std.c.close(pipe[1]); + + var poll_fds = [_]std.posix.pollfd{.{ + .fd = pipe[0], + .events = std.posix.POLL.IN | std.posix.POLL.HUP, + .revents = 0, + }}; + + const result = try std.posix.poll(&poll_fds, 0); + try testing.expectEqual(@as(usize, 0), result); +} + + +test "issue-148: codedb mcp exits when stdin is closed" { + // Integration test: spawn codedb mcp, close stdin, verify it exits + var child = std.process.spawn(io, .{ + .argv = &.{ "zig", "build", "run", "--", "--mcp" }, + .stdin = .pipe, + .stdout = .pipe, + .stderr = .ignore, + }) catch { + // If spawn fails (e.g., zig not on PATH), skip the test + return; + }; + + // Send initialize then close stdin (simulate client crash) + const init_msg = "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"initialize\",\"params\":{\"protocolVersion\":\"2024-11-05\",\"capabilities\":{},\"clientInfo\":{\"name\":\"test\",\"version\":\"1\"}}}"; + const header = std.fmt.comptimePrint("Content-Length: {d}\r\n\r\n", .{init_msg.len}); + + if (child.stdin) |stdin| { + stdin.writeStreamingAll(io, header) catch {}; + stdin.writeStreamingAll(io, init_msg) catch {}; + // Close stdin — simulates client disconnecting + stdin.close(io); + child.stdin = null; + } + + // Wait for the process to exit. The main read loop exits on stdin EOF; + // the watchdog also polls dead clients every second as a backup. + const start = cio.milliTimestamp(); + const term = child.wait(io) catch { + // If wait fails, the process is stuck — test fails + try testing.expect(false); + return; + }; + + const elapsed = cio.milliTimestamp() - start; + + // Should have exited (not been killed by us) + switch (term) { + .exited => |code| _ = code, + else => {}, + } + + // Should exit promptly after stdin closes. + try testing.expect(elapsed < 5_000); +} + + +test "issue-249: nuke.removeJsonMcpServerEntry returns null when key absent" { + // Verifies removeJsonMcpServerEntry does not signal a write when key is absent, + // which ensures the non-atomic rewriteConfigFile path is never triggered unnecessarily. + const result = try nuke_mod.removeJsonMcpServerEntry(testing.allocator, "{\"other\":1}", "codedb"); + try testing.expect(result == null); +} + + +test "issue-207: ScanState round-trips through atomic" { + const initial = mcp_mod.getScanState(); + defer mcp_mod.setScanState(initial); + + mcp_mod.setScanState(.loading_snapshot); + try testing.expectEqual(mcp_mod.ScanState.loading_snapshot, mcp_mod.getScanState()); + + mcp_mod.setScanState(.walking); + try testing.expectEqual(mcp_mod.ScanState.walking, mcp_mod.getScanState()); + + mcp_mod.setScanState(.indexing); + try testing.expectEqual(mcp_mod.ScanState.indexing, mcp_mod.getScanState()); + + mcp_mod.setScanState(.ready); + try testing.expectEqual(mcp_mod.ScanState.ready, mcp_mod.getScanState()); +} + + +test "issue-207: ScanState.name covers all states" { + try testing.expectEqualStrings("loading_snapshot", mcp_mod.ScanState.loading_snapshot.name()); + try testing.expectEqualStrings("walking", mcp_mod.ScanState.walking.name()); + try testing.expectEqualStrings("indexing", mcp_mod.ScanState.indexing.name()); + try testing.expectEqualStrings("ready", mcp_mod.ScanState.ready.name()); +} + + +test "issue-346: root_policy rejects dangerous ambient cwd roots" { + try testing.expect(!root_policy.isIndexableRoot("/")); + try testing.expect(!root_policy.isIndexableRoot("/Applications")); + try testing.expect(!root_policy.isIndexableRoot("/usr")); + try testing.expect(!root_policy.isIndexableRoot("/usr/local")); + try testing.expect(!root_policy.isIndexableRoot("/usr/local/bin")); + try testing.expect(!root_policy.isIndexableRoot("/opt")); + try testing.expect(!root_policy.isIndexableRoot("/opt/homebrew")); +} + + +test "issue-357: bundle preserves nested 'arguments' for codedb_outline" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + try explorer.indexFile("src/lib.zig", "pub fn helper() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const bundle_json = + \\{"ops":[ + \\ {"tool":"codedb_outline","arguments":{"path":"src/main.zig"}}, + \\ {"tool":"codedb_outline","arguments":{"path":"src/lib.zig"}} + \\]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, bundle_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed.value.object, &out, &store, &explorer, &agents); + + // Nested-args bundle path must preserve 'path' for every op — no missing-arg errors. + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'path' argument") == null); + try testing.expect(std.mem.indexOf(u8, out.items, "src/main.zig") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "src/lib.zig") != null); +} + + +test "issue-357: bundle surfaces received keys when an op is missing required path" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + // Bundle with a wrong key name ('file_path' instead of 'path'). The op must + // fail (path is missing), but the bundle wrapper must surface the keys it + // received so the caller can tell whether codedb dropped the arg or the + // client sent it under the wrong name. + const bundle_json = + \\{"ops":[{"tool":"codedb_outline","arguments":{"file_path":"src/main.zig"}}]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, bundle_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed.value.object, &out, &store, &explorer, &agents); + + // The error itself must still appear (legitimate — path is missing). + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'path' argument") != null); + // And the bundle must surface what the op actually contained, naming the + // bad key so the caller can self-diagnose. + try testing.expect(std.mem.indexOf(u8, out.items, "received keys") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "file_path") != null); +} + + +test "issue-423: bundle emits 'received keys' exactly once per failing op" { + // Regression: handler (handleSearch etc) appends the diagnostic, AND the + // bundle dispatch loop also appends it — caller saw the line twice in a + // row. Must appear exactly once per failing op. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const bundle_json = + \\{"ops":[{"tool":"codedb_search","arguments":{}}]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, bundle_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed.value.object, &out, &store, &explorer, &agents); + + var count: usize = 0; + var idx: usize = 0; + while (std.mem.indexOfPos(u8, out.items, idx, "received keys:")) |pos| { + count += 1; + idx = pos + 1; + } + try testing.expectEqual(@as(usize, 1), count); +} + + +test "issue-367: openDataLog truncates orphan bytes from prior session" { + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + var dir_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf); + const dir_path = dir_buf[0..dir_path_len]; + + const log_path = try std.fmt.allocPrint(testing.allocator, "{s}/data.log", .{dir_path}); + defer testing.allocator.free(log_path); + + const orphan = "ORPHAN_SECRET_TOKEN_FROM_PRIOR_SESSION"; + { + const f = try std.Io.Dir.cwd().createFile(io, log_path, .{ .truncate = true }); + defer f.close(io); + try f.writePositionalAll(io, orphan, 0); + } + + var store = Store.init(testing.allocator); + defer store.deinit(); + try store.openDataLog(io, log_path); + + const f = try std.Io.Dir.cwd().openFile(io, log_path, .{}); + defer f.close(io); + const len = try f.length(io); + try testing.expectEqual(@as(u64, 0), len); + try testing.expectEqual(@as(u64, 0), store.data_log_pos); + + const diff = "fresh diff"; + _ = try store.recordEdit("foo.zig", 1, .replace, 0xABCD, diff.len, diff); + + var buf: [128]u8 = undefined; + const f2 = try std.Io.Dir.cwd().openFile(io, log_path, .{}); + defer f2.close(io); + const new_len = try f2.length(io); + try testing.expectEqual(@as(u64, diff.len), new_len); + const read_len = try f2.readPositionalAll(io, buf[0..diff.len], 0); + try testing.expectEqual(diff.len, read_len); + try testing.expectEqualStrings(diff, buf[0..diff.len]); +} + + +test "issue-367-dx: tty summary surfaces received keys on missing-arg error" { + const args_json = + \\{"file_path":"src/main.zig","weird_key":"x"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + const raw_output = "error: missing 'path' argument\nreceived keys: [file_path, weird_key]"; + + var summary: std.ArrayList(u8) = .empty; + defer summary.deinit(testing.allocator); + + mcp_mod.mcpGenerateSummary( + testing.allocator, + "codedb_outline", + &parsed.value.object, + raw_output, + true, + &summary, + ); + + try testing.expect(std.mem.indexOf(u8, summary.items, "received") != null); + try testing.expect(std.mem.indexOf(u8, summary.items, "file_path") != null); +} + + +test "issue-bug2: tool calls during scan-in-progress hint at scan state" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const prev_state = mcp_mod.getScanState(); + defer mcp_mod.setScanState(prev_state); + mcp_mod.setScanState(.walking); + + const args_json = + \\{"query":"some_unknown_symbol_that_will_not_match"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_search, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "0 results") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "scan still in progress") != null); +} + + +test "issue-378: search waits briefly for scan to reach ready instead of returning empty" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const prev_state = mcp_mod.getScanState(); + defer mcp_mod.setScanState(prev_state); + mcp_mod.setScanState(.walking); + + const Flipper = struct { + fn run(exp: *Explorer) void { + cio.sleepMs(100); + exp.indexFile("src/late.zig", "fn waitsForScanMarker() void {}\n") catch return; + mcp_mod.setScanState(.ready); + } + }; + const t = try std.Thread.spawn(.{}, Flipper.run, .{&explorer}); + defer t.join(); + + const args_json = + \\{"query":"waitsForScanMarker"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_search, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "src/late.zig") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "scan still in progress") == null); +} + + +test "issue-bug5: codedb_read returns binary stub instead of dumping bytes" { + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + var dir_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf); + const dir_path = dir_buf[0..dir_path_len]; + + const bin_rel = "blob.bin"; + const bin_full = try std.fmt.allocPrint(testing.allocator, "{s}/{s}", .{ dir_path, bin_rel }); + defer testing.allocator.free(bin_full); + { + const f = try std.Io.Dir.cwd().createFile(io, bin_full, .{ .truncate = true }); + defer f.close(io); + const payload = [_]u8{ 'a', 'b', 0, 'c', 'd', 0, 'e' }; + try f.writePositionalAll(io, &payload, 0); + } + + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + explorer.setRoot(io, dir_path); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, dir_path, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = try std.fmt.allocPrint(testing.allocator, + "{{\"path\":\"{s}\"}}", .{bin_rel}); + defer testing.allocator.free(args_json); + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_read, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "binary file") != null); + try testing.expect(std.mem.indexOf(u8, out.items, &[_]u8{0}) == null); +} + + +test "issue-bug6: codedb_read errors when line_start > line_end" { + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + var dir_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf); + const dir_path = dir_buf[0..dir_path_len]; + + const rel = "small.txt"; + const full = try std.fmt.allocPrint(testing.allocator, "{s}/{s}", .{ dir_path, rel }); + defer testing.allocator.free(full); + { + const f = try std.Io.Dir.cwd().createFile(io, full, .{ .truncate = true }); + defer f.close(io); + try f.writePositionalAll(io, "alpha\nbeta\ngamma\n", 0); + } + + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + explorer.setRoot(io, dir_path); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, dir_path, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = try std.fmt.allocPrint(testing.allocator, + "{{\"path\":\"{s}\",\"line_start\":100,\"line_end\":10}}", .{rel}); + defer testing.allocator.free(args_json); + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_read, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.startsWith(u8, out.items, "error:")); + try testing.expect(std.mem.indexOf(u8, out.items, "line_start") != null); +} + + +test "issue-bug7: codedb_search rejects empty query" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"query":""} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_search, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.startsWith(u8, out.items, "error:")); + try testing.expect(std.mem.indexOf(u8, out.items, "empty") != null); +} + + +test "issue-bug7: codedb_search rejects negative max_results" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"query":"foo","max_results":-3} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_search, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.startsWith(u8, out.items, "error:")); + try testing.expect(std.mem.indexOf(u8, out.items, "max_results") != null); +} + + +test "issue-bug11: codedb_bundle marks isError when all ops fail" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"ops":[{"tool":"codedb_outline"}]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.startsWith(u8, out.items, "error:")); +} + + +test "issue-386: telemetry recordToolCall preserves UTF-8 codepoint boundaries" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + var telem = telemetry_mod.Telemetry.init(io, dir_path, testing.allocator, false); + defer telem.deinit(); + + // 30 ASCII bytes + a 3-byte UTF-8 codepoint (✓ = 0xE2 0x9C 0x93) lands the + // codepoint boundary at byte 33. The 32-byte cap currently truncates inside + // the codepoint, leaving 0xE2 0x9C as the trailing bytes — invalid UTF-8. + const tool_name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\xe2\x9c\x93_tail"; + telem.recordToolCall(tool_name, 1234, false, 56); + telem.flush(); + + const ndjson_path = try std.fmt.allocPrint(testing.allocator, "{s}/telemetry.ndjson", .{dir_path}); + defer testing.allocator.free(ndjson_path); + + const contents = try std.Io.Dir.cwd().readFileAlloc(io, ndjson_path, testing.allocator, .limited(64 * 1024)); + defer testing.allocator.free(contents); + + const tool_field = "\"tool\":\""; + const idx = std.mem.indexOf(u8, contents, tool_field) orelse return error.ToolFieldMissing; + const after = contents[idx + tool_field.len ..]; + const end = std.mem.indexOfScalar(u8, after, '"') orelse return error.ToolFieldUnterminated; + const recorded = after[0..end]; + + // The recorded tool slice must be valid UTF-8. A mid-codepoint truncation + // produces invalid bytes — std.unicode.utf8ValidateSlice rejects them. + try testing.expect(std.unicode.utf8ValidateSlice(recorded)); +} + + +test "issue-387: appendId preserves JSON-RPC numeric and number_string ids" { + // JSON-RPC ids are typed as String|Number|Null. The MCP server must echo + // the id verbatim so the client can correlate the reply with its request. + // appendId currently only handles .integer and .string — .float and + // .number_string fall through to "null", breaking correlation for any + // client that uses a fractional id (some test runners) or that the JSON + // parser materializes as number_string. + + // Float id round-trips: parsing "3.5" yields .float, which must serialize + // back to "3.5" (or any representation a JSON parser accepts as the same + // number) — NOT "null". + { + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, "3.5", .{}); + defer parsed.deinit(); + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(testing.allocator); + mcp_mod.appendId(testing.allocator, &buf, parsed.value); + try testing.expect(!std.mem.eql(u8, buf.items, "null")); + } + + // number_string round-trips: a request with `"id": 12345678901234567890` + // (>i64) is parsed as .number_string. The reply must echo the digits, not + // the literal "null". + { + const v = std.json.Value{ .number_string = "12345678901234567890" }; + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(testing.allocator); + mcp_mod.appendId(testing.allocator, &buf, v); + try testing.expectEqualStrings("12345678901234567890", buf.items); + } +} + + +test "issue-406: root_policy blocks /private/etc (macOS realpath of /etc)" { + // /etc is in the system_prefixes deny list, but on macOS /etc is a symlink + // to /private/etc. Callers feed isIndexableRoot a path resolved by + // realPathFile (see handleIndex in src/mcp.zig), which turns "/etc" into + // "/private/etc" — and then this textual prefix check accepts it. The + // canonical form must be blocked too, otherwise the deny list is bypassed + // by the very normalization step the callers depend on. + try testing.expect(!root_policy.isIndexableRoot("/private/etc")); + try testing.expect(!root_policy.isIndexableRoot("/private/etc/ssh")); +} + + +test "issue-407: root_policy blocks /var and its non-folders subtree" { + // The system_prefixes list explicitly blocks /var/folders and /var/tmp, + // but not /var itself or /var/log, /var/lib, /var/db, /var/spool, etc. + // On Linux those hold logs, mail, and package state; on macOS realPathFile + // turns /var into /private/var (also unblocked). Accidentally pointing + // the indexer at /var/log on a server pulls in GBs of secrets and is + // never a valid "project root". + try testing.expect(!root_policy.isIndexableRoot("/var")); + try testing.expect(!root_policy.isIndexableRoot("/var/log")); + try testing.expect(!root_policy.isIndexableRoot("/var/lib")); + try testing.expect(!root_policy.isIndexableRoot("/private/var")); + try testing.expect(!root_policy.isIndexableRoot("/private/var/log")); +} + + +test "issue-412: bundle reports 'missing tool' for tool field of wrong type" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const bundle_json = + \\{"ops":[{"tool":123,"arguments":{"path":"x.zig"}}]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, bundle_json, .{}); + defer parsed.deinit(); + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'tool' field") == null); +} + + +test "issue-413: bundle truncation drops subsequent ops without telling the caller" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + // Index a single large file (~120KB) so two reads exceed the 200KB + // bundle cap. Bundle truncates and breaks out of the loop after op[1], + // emitting a TRUNCATED note — but op[2] is silently dropped. + var big: std.ArrayList(u8) = .empty; + defer big.deinit(testing.allocator); + while (big.items.len < 120 * 1024) { + try big.appendSlice(testing.allocator, "pub fn placeholder() void { _ = 0; }\n"); + } + try explorer.indexFile("big.zig", big.items); + try explorer.indexFile("small.zig", "pub fn small() void {}\n"); + + // Three reads: first two exceed 200KB → truncate. op[2] is small.zig + // and should still surface — at minimum, the bundle output must + // mention it (e.g. as another truncated entry) so the caller knows + // their request had three ops, not one. + const bundle_json = + \\{"ops":[ + \\ {"tool":"codedb_read","arguments":{"path":"big.zig"}}, + \\ {"tool":"codedb_read","arguments":{"path":"big.zig"}}, + \\ {"tool":"codedb_outline","arguments":{"path":"small.zig"}} + \\]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, bundle_json, .{}); + defer parsed.deinit(); + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed.value.object, &out, &store, &explorer, &agents); + + // op[2] (index 2) was sent — caller deserves to see something for it. + // Either its result, or an explicit "[2]" entry noting it was dropped. + try testing.expect(std.mem.indexOf(u8, out.items, "[2]") != null); +} + + +test "issue-424-B: bundle falls through to inline args when arguments is empty object" { + // Forge-style buggy clients sometimes send `arguments: {}` AND put the + // real args inline at the op level. The dispatcher currently sees the + // empty `arguments` and stops looking — resulting in a misleading + // "missing 'path'" with `received keys: []` even though `path` is + // sitting right there in the op. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const bundle_json = + \\{"ops":[{"tool":"codedb_outline","arguments":{},"path":"src/main.zig"}]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, bundle_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed.value.object, &out, &store, &explorer, &agents); + + // Should succeed: path was discoverable inline even though `arguments` was empty. + try testing.expect(std.mem.indexOf(u8, out.items, "src/main.zig") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'path'") == null); + try testing.expect(std.mem.indexOf(u8, out.items, "received keys: []") == null); +} + + +test "issue-424-D: received-keys diagnostic hints at inline-args workaround when empty" { + // When a sub-op fails with truly-empty args, the diagnostic should + // point users at the inline-args fallback so a broken client wrapper + // can be routed around without a server change. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const bundle_json = + \\{"ops":[{"tool":"codedb_outline","arguments":{}}]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, bundle_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed.value.object, &out, &store, &explorer, &agents); + + // Original error stays. + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'path'") != null); + // The diagnostic should fire (received-keys line present) and surface + // the inline-shape hint, since no real sub-op args were observed. + try testing.expect(std.mem.indexOf(u8, out.items, "received keys:") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "inline shape") != null); +} + + +test "issue-424-A: bundle envelope errors carry the 'error:' prefix consistently" { + // Pre-fix the bundle dispatcher emits 'op must be an object' and + // 'missing 'tool' field' WITHOUT the 'error:' prefix that per-tool + // handlers and TTY-summary parsing both expect. Normalize. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + // Op is a string, not an object. + const bad_shape = + \\{"ops":["not-an-object"]} + ; + const parsed1 = try std.json.parseFromSlice(std.json.Value, testing.allocator, bad_shape, .{}); + defer parsed1.deinit(); + var out1: std.ArrayList(u8) = .empty; + defer out1.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed1.value.object, &out1, &store, &explorer, &agents); + try testing.expect(std.mem.indexOf(u8, out1.items, "error: op must be an object") != null); + + // Op missing 'tool' field. + const no_tool = + \\{"ops":[{"arguments":{}}]} + ; + const parsed2 = try std.json.parseFromSlice(std.json.Value, testing.allocator, no_tool, .{}); + defer parsed2.deinit(); + var out2: std.ArrayList(u8) = .empty; + defer out2.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed2.value.object, &out2, &store, &explorer, &agents); + try testing.expect(std.mem.indexOf(u8, out2.items, "error: missing 'tool'") != null); +} + + +test "issue-441: bundle rejects codedb_projects sub-op" { + // codedb_projects lists every indexed project on the machine, which is a + // global directory enumeration unrelated to whatever repo the agent is + // working on. When a planner sees a previous bundle that called + // codedb_projects, it tends to replay the same shape — re-emitting 5x + // codedb_projects ops as if that were the canonical "what do I do here" + // call. Block it at the dispatcher, mirroring the existing rejections of + // codedb_bundle (recursive) and codedb_edit (write op). + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const bundle_json = + \\{"ops":[{"tool":"codedb_projects","arguments":{}}]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, bundle_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed.value.object, &out, &store, &explorer, &agents); + + // The op must be rejected with an explicit error, not silently dispatched. + try testing.expect(std.mem.indexOf(u8, out.items, "error: codedb_projects not allowed in bundle") != null); +} + + +test "issue-441: codedb_projects branch is excluded from augmented oneOf" { + // Mirror of the dispatcher rejection at the schema level — when the + // discriminated oneOf is opted into via CODEDB_DISCRIMINATED_SCHEMA=1, + // there must not be a oneOf branch advertising codedb_projects as a + // valid sub-tool, since the bundle dispatcher rejects it at runtime. + const augmented = try mcp_mod.buildAugmentedToolsList(testing.allocator); + defer testing.allocator.free(augmented); + + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, augmented, .{}); + defer parsed.deinit(); + + const tools = parsed.value.object.get("tools").?.array; + var bundle_items: ?std.json.Value = null; + for (tools.items) |t| { + if (std.mem.eql(u8, t.object.get("name").?.string, "codedb_bundle")) { + bundle_items = t.object.get("inputSchema").?.object.get("properties").?.object.get("ops").?.object.get("items").?; + break; + } + } + const one_of = bundle_items.?.object.get("oneOf").?.array; + + for (one_of.items) |branch| { + const props = branch.object.get("properties").?.object; + const tool_v = props.get("tool").?; + const tool_const = tool_v.object.get("const") orelse continue; + try testing.expect(!std.mem.eql(u8, tool_const.string, "codedb_projects")); + } +} + + +test "issue-443: codedb_bundle is omitted from default tools/list response" { + // The codedb_bundle tool has been a footgun across multiple stages: + // #434 — schema permitted empty arguments (Stage 1 fix: required arguments) + // #437 — Stage 2 oneOf augmentation broke OpenAI strict-mode (#440 hotfix) + // #441 — codedb_projects sub-op replay loop in planners + // Even with all of the above, OpenAI clients still emit + // {"tool":"codedb_*","arguments":{}} because the default schema's + // arguments field is a bare {type:"object"} with no inner shape, and + // the discriminated oneOf is opt-in only. + // + // Disable codedb_bundle entirely until the schema can be reworked to + // bind sub-tool arguments inline (no `arguments` wrapper), removing + // the empty-args footgun structurally. The dispatcher-side handler + // stays so clients with cached schemas don't crash, but the runtime + // tools/list response no longer advertises it. CODEDB_BUNDLE_ENABLED=1 + // re-enables advertisement for callers that want to re-engage it. + const response = try mcp_mod.buildToolsListResponse(testing.allocator, .{ + .bundle_enabled = false, + .discriminated_opt_in = false, + }); + defer testing.allocator.free(response); + + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, response, .{}); + defer parsed.deinit(); + + const tools = parsed.value.object.get("tools").?.array; + for (tools.items) |t| { + const name = t.object.get("name").?.string; + try testing.expect(!std.mem.eql(u8, name, "codedb_bundle")); + } + + // Sanity: legitimate tools still advertised. + var saw_search = false; + var saw_outline = false; + for (tools.items) |t| { + const name = t.object.get("name").?.string; + if (std.mem.eql(u8, name, "codedb_search")) saw_search = true; + if (std.mem.eql(u8, name, "codedb_outline")) saw_outline = true; + } + try testing.expect(saw_search); + try testing.expect(saw_outline); +} + + +test "issue-443: codedb_bundle is advertised when CODEDB_BUNDLE_ENABLED=1" { + // Re-enable path. When bundle_enabled is true the runtime response + // includes codedb_bundle, exactly as it did before this gate. + const response = try mcp_mod.buildToolsListResponse(testing.allocator, .{ + .bundle_enabled = true, + .discriminated_opt_in = false, + }); + defer testing.allocator.free(response); + + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, response, .{}); + defer parsed.deinit(); + + const tools = parsed.value.object.get("tools").?.array; + var saw_bundle = false; + for (tools.items) |t| { + if (std.mem.eql(u8, t.object.get("name").?.string, "codedb_bundle")) saw_bundle = true; + } + try testing.expect(saw_bundle); +} + + +test "issue-434: codedb_bundle ops items schema requires arguments field" { + // The codedb_bundle inputSchema in tools_list advertises ops items as + // {required: ["tool"]} with arguments as a bare {type: "object"} that + // permits {}. Function-calling LLMs read the schema as authoritative and + // emit the minimum-valid payload — {tool: "...", arguments: {}} — which + // misroutes through the inline-args fallback and surfaces as + // "received keys: [tool, arguments]" from each sub-tool. Stage 1 fix: + // add "arguments" to the items.required array so models are forced to + // populate it. (Stage 2 — discriminated oneOf over tool — is a follow-up.) + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, mcp_mod.tools_list, .{}); + defer parsed.deinit(); + + const tools = parsed.value.object.get("tools").?.array; + var bundle_schema: ?std.json.Value = null; + for (tools.items) |t| { + const name = t.object.get("name").?.string; + if (std.mem.eql(u8, name, "codedb_bundle")) { + bundle_schema = t.object.get("inputSchema").?; + break; + } + } + try testing.expect(bundle_schema != null); + + const ops = bundle_schema.?.object.get("properties").?.object.get("ops").?; + const items = ops.object.get("items").?; + const required = items.object.get("required").?.array; + + var has_tool = false; + var has_arguments = false; + for (required.items) |r| { + if (std.mem.eql(u8, r.string, "tool")) has_tool = true; + if (std.mem.eql(u8, r.string, "arguments")) has_arguments = true; + } + try testing.expect(has_tool); + try testing.expect(has_arguments); +} + + +test "issue-437: codedb_bundle ops items schema has discriminated oneOf per sub-tool" { + // Stage 2 of the bundle-schema fix. Stage 1 (#434) made `arguments` + // required but left it as a bare {type: "object"} — so a schema-greedy + // model can still emit `arguments: {}` to satisfy the required check + // without populating real keys. Stage 2 binds the *contents* of + // arguments to each sub-tool's actual inputSchema via a discriminated + // oneOf on `tool` (const) → `arguments` (sub-tool inputSchema). + // + // The augmented schema is built at runtime from the per-sub-tool + // schemas already advertised in tools_list, so there is no + // hand-maintained duplication. + const augmented = try mcp_mod.buildAugmentedToolsList(testing.allocator); + defer testing.allocator.free(augmented); + + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, augmented, .{}); + defer parsed.deinit(); + + const tools = parsed.value.object.get("tools").?.array; + var bundle_items: ?std.json.Value = null; + for (tools.items) |t| { + const name = t.object.get("name").?.string; + if (std.mem.eql(u8, name, "codedb_bundle")) { + bundle_items = t.object.get("inputSchema").?.object.get("properties").?.object.get("ops").?.object.get("items").?; + break; + } + } + try testing.expect(bundle_items != null); + + // `oneOf` array must exist on items. + const one_of_val = bundle_items.?.object.get("oneOf"); + try testing.expect(one_of_val != null); + const one_of = one_of_val.?.array; + + // Must have at least one branch per dispatchable codedb_* sub-tool. + // codedb_bundle (recursive) and codedb_edit (write op) are explicitly + // rejected by handleBundle, so they are excluded. + try testing.expect(one_of.items.len >= 10); + + // Find the codedb_outline branch and verify it pins tool to a const + // and binds arguments to a populated schema (with `path` property). + var found_outline = false; + for (one_of.items) |branch| { + const props = branch.object.get("properties").?.object; + const tool_v = props.get("tool").?; + const tool_const = tool_v.object.get("const"); + if (tool_const == null) continue; + if (!std.mem.eql(u8, tool_const.?.string, "codedb_outline")) continue; + found_outline = true; + + const args_schema = props.get("arguments").?; + const args_props = args_schema.object.get("properties").?.object; + try testing.expect(args_props.get("path") != null); + // codedb_outline requires `path` — preserved by the augmentation. + const args_required = args_schema.object.get("required").?.array; + var path_required = false; + for (args_required.items) |r| { + if (std.mem.eql(u8, r.string, "path")) path_required = true; + } + try testing.expect(path_required); + break; + } + try testing.expect(found_outline); + + // No branch should be for the recursive codedb_bundle or the write-op codedb_edit. + for (one_of.items) |branch| { + const props = branch.object.get("properties").?.object; + const tool_v = props.get("tool").?; + const tool_const = tool_v.object.get("const") orelse continue; + try testing.expect(!std.mem.eql(u8, tool_const.string, "codedb_bundle")); + try testing.expect(!std.mem.eql(u8, tool_const.string, "codedb_edit")); + } +} + diff --git a/src/test_parser.zig b/src/test_parser.zig new file mode 100644 index 0000000..4a0ffde --- /dev/null +++ b/src/test_parser.zig @@ -0,0 +1,1654 @@ +const std = @import("std"); +const cio = @import("cio.zig"); +const testing = std.testing; +const io = std.testing.io; +const explore = @import("explore.zig"); +const Explorer = explore.Explorer; +const Language = explore.Language; +const SymbolKind = explore.SymbolKind; +const DependencyGraph = explore.DependencyGraph; +const Store = @import("store.zig").Store; + + +fn expectOutlineSymbol(outline: *const explore.FileOutline, name: []const u8, kind: SymbolKind) !void { + for (outline.symbols.items) |sym| { + if (std.mem.eql(u8, sym.name, name) and sym.kind == kind) return; + } + return error.TestUnexpectedResult; +} + + +fn expectOutlineImport(outline: *const explore.FileOutline, import_path: []const u8) !void { + for (outline.imports.items) |imp| { + if (std.mem.eql(u8, imp, import_path)) return; + } + return error.TestUnexpectedResult; +} + + +test "issue-301: Dart / Flutter parser" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("lib/home_screen.dart", + \\import 'package:flutter/material.dart'; + \\export 'src/helpers.dart'; + \\part 'home_screen.g.dart'; + \\ + \\typedef ItemBuilder = Widget Function(BuildContext context); + \\ + \\abstract class HomeScreen extends StatelessWidget { + \\ @override + \\ Widget build(BuildContext context) { + \\ return const Placeholder(); + \\ } + \\} + \\ + \\mixin Loader on State { + \\ Future loadData() async {} + \\} + \\ + \\extension ContextX on BuildContext { + \\ ThemeData get theme => Theme.of(this); + \\} + \\ + \\enum LoadState { idle, loading } + \\ + \\const String appTitle = 'codedb'; + ); + + var outline = (try explorer.getOutline("lib/home_screen.dart", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + + try testing.expectEqual(Language.dart, outline.language); + try testing.expectEqual(@as(usize, 3), outline.imports.items.len); + + var found_typedef = false; + var found_class = false; + var found_mixin = false; + var found_extension = false; + var found_enum = false; + var found_build = false; + var found_load = false; + var found_const = false; + for (outline.symbols.items) |sym| { + if (sym.kind == .type_alias and std.mem.eql(u8, sym.name, "ItemBuilder")) found_typedef = true; + if (sym.kind == .class_def and std.mem.eql(u8, sym.name, "HomeScreen")) found_class = true; + if (sym.kind == .trait_def and std.mem.eql(u8, sym.name, "Loader")) found_mixin = true; + if (sym.kind == .impl_block and std.mem.eql(u8, sym.name, "ContextX")) found_extension = true; + if (sym.kind == .enum_def and std.mem.eql(u8, sym.name, "LoadState")) found_enum = true; + if (sym.kind == .function and std.mem.eql(u8, sym.name, "build")) found_build = true; + if (sym.kind == .function and std.mem.eql(u8, sym.name, "loadData")) found_load = true; + if (sym.kind == .constant and std.mem.eql(u8, sym.name, "appTitle")) found_const = true; + } + try testing.expect(found_typedef); + try testing.expect(found_class); + try testing.expect(found_mixin); + try testing.expect(found_extension); + try testing.expect(found_enum); + try testing.expect(found_build); + try testing.expect(found_load); + try testing.expect(found_const); + + const tree = try explorer.getTree(testing.allocator, false); + defer testing.allocator.free(tree); + try testing.expect(std.mem.indexOf(u8, tree, "home_screen.dart dart") != null); +} + + +test "issue-php-1: PHP class definition herkend" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("app/Models/Candidate.php", + \\ 0 + \\} + ); + + var outline = (try explorer.getOutline("main.go", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + var func_count: usize = 0; + var struct_count: usize = 0; + for (outline.symbols.items) |sym| { + if (sym.kind == .function) func_count += 1; + if (sym.kind == .struct_def) struct_count += 1; + } + try testing.expect(func_count == 2); // main + Validate + try testing.expect(struct_count == 2); // Config + Handler + try testing.expect(outline.imports.items.len == 1); // "fmt" +} + + +test "issue-151: Ruby class, module, and def" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("app.rb", + \\require "json" + \\require_relative "./helpers" + \\ + \\module Authentication + \\ class User + \\ def initialize(name) + \\ @name = name + \\ end + \\ + \\ def greet + \\ puts "hello" + \\ end + \\ end + \\end + ); + + var outline = (try explorer.getOutline("app.rb", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + var func_count: usize = 0; + var struct_count: usize = 0; + for (outline.symbols.items) |sym| { + if (sym.kind == .function) func_count += 1; + if (sym.kind == .struct_def) struct_count += 1; + } + try testing.expect(func_count == 2); // initialize + greet + try testing.expect(struct_count == 2); // Authentication + User + try testing.expect(outline.imports.items.len == 2); // json + ./helpers +} + + +test "issue-151: Ruby =begin/=end comments skipped" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("commented.rb", + \\def real_method + \\ true + \\end + \\=begin + \\def fake_method + \\ false + \\end + \\=end + ); + + var outline = (try explorer.getOutline("commented.rb", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + var func_count: usize = 0; + for (outline.symbols.items) |sym| { + if (sym.kind == .function) func_count += 1; + } + try testing.expect(func_count == 1); // only real_method +} + + +test "issue-151: Go block comments skipped" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("commented.go", + \\package main + \\ + \\func realFunc() {} + \\/* + \\func fakeFunc() {} + \\*/ + ); + + var outline = (try explorer.getOutline("commented.go", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + var func_count: usize = 0; + for (outline.symbols.items) |sym| { + if (sym.kind == .function) func_count += 1; + } + try testing.expect(func_count == 1); // only realFunc +} + + +test "issue-301: Dart block comments skipped" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("commented.dart", + \\class RealWidget {} + \\/* + \\class FakeWidget {} + \\void fakeHelper() {} + \\*/ + ); + + var outline = (try explorer.getOutline("commented.dart", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + + var class_count: usize = 0; + var func_count: usize = 0; + for (outline.symbols.items) |sym| { + if (sym.kind == .class_def) class_count += 1; + if (sym.kind == .function) func_count += 1; + } + try testing.expectEqual(@as(usize, 1), class_count); + try testing.expectEqual(@as(usize, 0), func_count); +} + + +test "issue-179: block comment does not produce phantom symbols" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("test.zig", "/* commented out\npub fn fake_func() void {}\n*/\npub fn real_func() void {}\n"); + + const outline = (try explorer.getOutline("test.zig", testing.allocator)).?; + defer { + var o = outline; + o.deinit(); + } + var found_real = false; + var found_fake = false; + for (outline.symbols.items) |sym| { + if (std.mem.indexOf(u8, sym.name, "real_func") != null) found_real = true; + if (std.mem.indexOf(u8, sym.name, "fake_func") != null) found_fake = true; + } + try testing.expect(found_real); + try testing.expect(!found_fake); +} + + +test "issue-179: code after single-line /* */ comment is parsed" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("test.zig", "/* skip this */ pub fn visible() void {}\n"); + + const outline = (try explorer.getOutline("test.zig", testing.allocator)).?; + defer { + var o = outline; + o.deinit(); + } + var found = false; + for (outline.symbols.items) |sym| { + if (std.mem.indexOf(u8, sym.name, "visible") != null) found = true; + } + try testing.expect(found); +} + + +test "issue-179: Python docstring with text does not leak symbols" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("test.py", "def real():\n \"\"\"This is a docstring.\n def fake():\n pass\n \"\"\"\n pass\n"); + + const outline = (try explorer.getOutline("test.py", testing.allocator)).?; + defer { + var o = outline; + o.deinit(); + } + var found_real = false; + var found_fake = false; + for (outline.symbols.items) |sym| { + if (std.mem.indexOf(u8, sym.name, "real") != null) found_real = true; + if (std.mem.indexOf(u8, sym.name, "fake") != null) found_fake = true; + } + try testing.expect(found_real); + try testing.expect(!found_fake); +} + + +test "issue-108: HCL resource block parsed" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("main.tf", + \\resource "aws_instance" "web" { + \\ ami = "abc-123" + \\} + ); + const results = try explorer.findAllSymbols("web", alloc); + defer alloc.free(results); + try testing.expect(results.len == 1); + try testing.expectEqual(SymbolKind.struct_def, results[0].symbol.kind); +} + + +test "issue-108: HCL variable and output parsed" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("vars.tf", + \\variable "region" { + \\ default = "us-east-1" + \\} + \\output "ip" { + \\ value = aws_instance.web.public_ip + \\} + ); + const vars = try explorer.findAllSymbols("region", alloc); + defer alloc.free(vars); + try testing.expect(vars.len == 1); + try testing.expectEqual(SymbolKind.variable, vars[0].symbol.kind); + const outs = try explorer.findAllSymbols("ip", alloc); + defer alloc.free(outs); + try testing.expect(outs.len == 1); + try testing.expectEqual(SymbolKind.constant, outs[0].symbol.kind); +} + + +test "issue-108: HCL module and provider parsed" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("main.tf", + \\provider "aws" { + \\ region = "us-east-1" + \\} + \\module "vpc" { + \\ source = "./modules/vpc" + \\} + ); + const providers = try explorer.findAllSymbols("aws", alloc); + defer alloc.free(providers); + try testing.expect(providers.len == 1); + const mods = try explorer.findAllSymbols("vpc", alloc); + defer alloc.free(mods); + try testing.expect(mods.len == 1); +} + + +test "issue-108: HCL comment lines skipped" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("main.tf", + \\# This is a comment + \\// Another comment + \\variable "name" {} + ); + const results = try explorer.findAllSymbols("name", alloc); + defer alloc.free(results); + try testing.expect(results.len == 1); +} + + +test "issue-215: R function assignment parsed" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("analysis.R", + \\greet <- function(name) { + \\ paste("Hello", name) + \\} + ); + const results = try explorer.findAllSymbols("greet", alloc); + defer alloc.free(results); + try testing.expect(results.len == 1); + try testing.expectEqual(SymbolKind.function, results[0].symbol.kind); +} + + +test "issue-215: R library import parsed" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("script.r", + \\library(dplyr) + \\require(ggplot2) + ); + const outline = try explorer.getOutline("script.r", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(@as(usize, 2), outline.imports.items.len); +} + + +test "issue-215: R setClass parsed" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("classes.R", + \\setClass("Person") + \\setRefClass("Animal") + ); + const p = try explorer.findAllSymbols("Person", alloc); + defer alloc.free(p); + try testing.expect(p.len == 1); + try testing.expectEqual(SymbolKind.class_def, p[0].symbol.kind); + const a2 = try explorer.findAllSymbols("Animal", alloc); + defer alloc.free(a2); + try testing.expect(a2.len == 1); +} + + +test "issue-319: C parser extracts includes macros types and functions" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("src/core.c", + \\#include + \\#include "local.h" + \\#define MAX_SIZE 64 + \\#define SQUARE(x) ((x) * (x)) + \\struct Worker { + \\ int id; + \\}; + \\enum Mode { + \\ MODE_A, + \\}; + \\union Value { + \\ int i; + \\}; + \\typedef unsigned long size_alias_t; + \\static inline const char *worker_name(const struct Worker *worker) { + \\ return "worker"; + \\} + \\void *alloc_item(size_t size) + \\{ + \\ return malloc(size); + \\} + ); + + const outline = try explorer.getOutline("src/core.c", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.c, outline.language); + try testing.expectEqual(@as(usize, 2), outline.imports.items.len); + try testing.expectEqualStrings("stdio.h", outline.imports.items[0]); + try testing.expectEqualStrings("local.h", outline.imports.items[1]); + + const max_size = try explorer.findAllSymbols("MAX_SIZE", alloc); + defer alloc.free(max_size); + try testing.expectEqual(@as(usize, 1), max_size.len); + try testing.expectEqual(SymbolKind.macro_def, max_size[0].symbol.kind); + + const square = try explorer.findAllSymbols("SQUARE", alloc); + defer alloc.free(square); + try testing.expectEqual(@as(usize, 1), square.len); + try testing.expectEqual(SymbolKind.macro_def, square[0].symbol.kind); + + const worker = try explorer.findAllSymbols("Worker", alloc); + defer alloc.free(worker); + try testing.expectEqual(@as(usize, 1), worker.len); + try testing.expectEqual(SymbolKind.struct_def, worker[0].symbol.kind); + + const mode = try explorer.findAllSymbols("Mode", alloc); + defer alloc.free(mode); + try testing.expectEqual(@as(usize, 1), mode.len); + try testing.expectEqual(SymbolKind.enum_def, mode[0].symbol.kind); + + const value = try explorer.findAllSymbols("Value", alloc); + defer alloc.free(value); + try testing.expectEqual(@as(usize, 1), value.len); + try testing.expectEqual(SymbolKind.union_def, value[0].symbol.kind); + + const alias = try explorer.findAllSymbols("size_alias_t", alloc); + defer alloc.free(alias); + try testing.expectEqual(@as(usize, 1), alias.len); + try testing.expectEqual(SymbolKind.type_alias, alias[0].symbol.kind); + + const worker_name = try explorer.findAllSymbols("worker_name", alloc); + defer alloc.free(worker_name); + try testing.expectEqual(@as(usize, 1), worker_name.len); + try testing.expectEqual(SymbolKind.function, worker_name[0].symbol.kind); + + const alloc_item = try explorer.findAllSymbols("alloc_item", alloc); + defer alloc.free(alloc_item); + try testing.expectEqual(@as(usize, 1), alloc_item.len); + try testing.expectEqual(SymbolKind.function, alloc_item[0].symbol.kind); +} + + +test "issue-319: C parser avoids comments strings prototypes and macro calls" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("src/noise.c", + \\// int fake_comment(void) { + \\/* int fake_block(void) { */ + \\const char *s = "int fake_string(void) {"; + \\typedef int (*handler_fn)(int); + \\int prototype_only(void); + \\EXPORT_SYMBOL(real_function); + \\if (real_function()) { + \\} + \\int real_function(void) { + \\ return 1; + \\} + ); + + const real = try explorer.findAllSymbols("real_function", alloc); + defer alloc.free(real); + try testing.expectEqual(@as(usize, 1), real.len); + try testing.expectEqual(SymbolKind.function, real[0].symbol.kind); + + const fake_comment = try explorer.findAllSymbols("fake_comment", alloc); + defer alloc.free(fake_comment); + try testing.expectEqual(@as(usize, 0), fake_comment.len); + + const fake_block = try explorer.findAllSymbols("fake_block", alloc); + defer alloc.free(fake_block); + try testing.expectEqual(@as(usize, 0), fake_block.len); + + const fake_string = try explorer.findAllSymbols("fake_string", alloc); + defer alloc.free(fake_string); + try testing.expectEqual(@as(usize, 0), fake_string.len); + + const prototype = try explorer.findAllSymbols("prototype_only", alloc); + defer alloc.free(prototype); + try testing.expectEqual(@as(usize, 0), prototype.len); + + const handler = try explorer.findAllSymbols("handler_fn", alloc); + defer alloc.free(handler); + try testing.expectEqual(@as(usize, 0), handler.len); +} + + +test "issue-321: common detected extensions produce outlines" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("src/math.cc", + \\#include + \\class Calculator { + \\public: + \\ int add(int a, int b) { + \\ return a + b; + \\ } + \\}; + \\int free_add(int a, int b) { + \\ return a + b; + \\} + ); + try explorer.indexFile("src/Bridge.mm", + \\#import "Bridge.h" + \\@interface BrowserController + \\- (void)loadPage:(NSString *)url; + \\@end + \\@implementation BrowserController + \\- (void)loadPage:(NSString *)url { } + \\@end + \\class BrowserBridge { + \\}; + \\int bridge_main(void) { + \\ return 0; + \\} + ); + try explorer.indexFile("src/App.java", + \\package demo; + \\import java.util.List; + \\public class Worker { + \\ public void run() {} + \\} + \\interface RunnableThing {} + \\enum Mode { A } + \\record Pair(int left, int right) {} + ); + try explorer.indexFile("src/App.kt", + \\package demo + \\import kotlinx.coroutines.runBlocking + \\data class User(val name: String) + \\interface Repo + \\enum class KotlinMode { A } + \\fun loadUser(): User = User("a") + \\val answer = 42 + ); + try explorer.indexFile("src/Widget.svelte", + \\ + \\.card { color: red; } + ); + try explorer.indexFile("src/View.vue", + \\ + ); + try explorer.indexFile("src/Page.astro", + \\--- + \\import Layout from '../layouts/Layout.astro'; + \\const title = 'Home'; + \\--- + ); + try explorer.indexFile("scripts/build.sh", + \\source ./env.sh + \\function build_app() { + \\} + \\deploy_app() { + \\} + \\BUILD_MODE=release + ); + try explorer.indexFile("styles/app.css", + \\:root { + \\ --brand: red; + \\} + \\.button { + \\ color: var(--brand); + \\} + \\@keyframes fade {} + ); + try explorer.indexFile("styles/app.scss", + \\$gap: 8px; + \\@mixin center {} + \\.panel {} + ); + try explorer.indexFile("db/schema.sql", + \\CREATE TABLE users (id integer); + \\CREATE OR REPLACE FUNCTION do_thing() RETURNS void AS $$ SELECT 1; $$ LANGUAGE sql; + \\CREATE INDEX idx_users_id ON users(id); + ); + try explorer.indexFile("api/service.proto", + \\syntax = "proto3"; + \\import "google/protobuf/timestamp.proto"; + \\message User {} + \\enum Status { STATUS_OK = 0; } + \\service UserService { + \\ rpc GetUser (User) returns (User); + \\} + ); + try explorer.indexFile("math/solver.f90", + \\module solver + \\use mathlib + \\type :: Particle + \\end type + \\subroutine step() + \\end subroutine + \\function energy() + \\end function + ); + try explorer.indexFile("ir/module.ll", + \\%Pair = type { i32, i32 } + \\@global_value = global i32 0 + \\define i32 @main() { + \\ ret i32 0 + \\} + ); + try explorer.indexFile("ir/dialect.mlir", + \\module @kernel_mod { + \\ func.func @kernel() { + \\ return + \\ } + \\} + ); + try explorer.indexFile("llvm/records.td", + \\include "Base.td" + \\class Register; + \\multiclass Pat; + \\def R0 : Register<"r0">; + \\defm ADD : Pat<"add">; + \\let Namespace = "Toy"; + ); + + const cc_outline = try explorer.getOutline("src/math.cc", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.cpp, cc_outline.language); + try expectOutlineImport(&cc_outline, "vector"); + try expectOutlineSymbol(&cc_outline, "Calculator", .class_def); + try expectOutlineSymbol(&cc_outline, "add", .function); + try expectOutlineSymbol(&cc_outline, "free_add", .function); + + const mm_outline = try explorer.getOutline("src/Bridge.mm", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.cpp, mm_outline.language); + try expectOutlineImport(&mm_outline, "Bridge.h"); + try expectOutlineSymbol(&mm_outline, "BrowserController", .class_def); + try expectOutlineSymbol(&mm_outline, "loadPage", .method); + try expectOutlineSymbol(&mm_outline, "BrowserBridge", .class_def); + try expectOutlineSymbol(&mm_outline, "bridge_main", .function); + + const java_outline = try explorer.getOutline("src/App.java", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.java, java_outline.language); + try expectOutlineImport(&java_outline, "java.util.List"); + try expectOutlineSymbol(&java_outline, "Worker", .class_def); + try expectOutlineSymbol(&java_outline, "run", .method); + try expectOutlineSymbol(&java_outline, "RunnableThing", .interface_def); + try expectOutlineSymbol(&java_outline, "Mode", .enum_def); + try expectOutlineSymbol(&java_outline, "Pair", .class_def); + + const kt_outline = try explorer.getOutline("src/App.kt", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.kotlin, kt_outline.language); + try expectOutlineImport(&kt_outline, "kotlinx.coroutines.runBlocking"); + try expectOutlineSymbol(&kt_outline, "User", .class_def); + try expectOutlineSymbol(&kt_outline, "Repo", .interface_def); + try expectOutlineSymbol(&kt_outline, "KotlinMode", .enum_def); + try expectOutlineSymbol(&kt_outline, "loadUser", .function); + try expectOutlineSymbol(&kt_outline, "answer", .constant); + + const svelte_outline = try explorer.getOutline("src/Widget.svelte", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.svelte, svelte_outline.language); + try expectOutlineImport(&svelte_outline, "./Thing.svelte"); + try expectOutlineSymbol(&svelte_outline, "title", .constant); + try expectOutlineSymbol(&svelte_outline, "renderTitle", .function); + try expectOutlineSymbol(&svelte_outline, ".card", .class_def); + + const vue_outline = try explorer.getOutline("src/View.vue", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.vue, vue_outline.language); + try expectOutlineImport(&vue_outline, "./Child.vue"); + try expectOutlineSymbol(&vue_outline, "count", .constant); + try expectOutlineSymbol(&vue_outline, "inc", .function); + + const astro_outline = try explorer.getOutline("src/Page.astro", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.astro, astro_outline.language); + try expectOutlineImport(&astro_outline, "../layouts/Layout.astro"); + try expectOutlineSymbol(&astro_outline, "title", .constant); + + const shell_outline = try explorer.getOutline("scripts/build.sh", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.shell, shell_outline.language); + try expectOutlineImport(&shell_outline, "./env.sh"); + try expectOutlineSymbol(&shell_outline, "build_app", .function); + try expectOutlineSymbol(&shell_outline, "deploy_app", .function); + try expectOutlineSymbol(&shell_outline, "BUILD_MODE", .variable); + + const css_outline = try explorer.getOutline("styles/app.css", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.css, css_outline.language); + try expectOutlineSymbol(&css_outline, "--brand", .constant); + try expectOutlineSymbol(&css_outline, ".button", .class_def); + try expectOutlineSymbol(&css_outline, "fade", .function); + + const scss_outline = try explorer.getOutline("styles/app.scss", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.scss, scss_outline.language); + try expectOutlineSymbol(&scss_outline, "$gap", .constant); + try expectOutlineSymbol(&scss_outline, "center", .function); + try expectOutlineSymbol(&scss_outline, ".panel", .class_def); + + const sql_outline = try explorer.getOutline("db/schema.sql", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.sql, sql_outline.language); + try expectOutlineSymbol(&sql_outline, "users", .struct_def); + try expectOutlineSymbol(&sql_outline, "do_thing", .function); + try expectOutlineSymbol(&sql_outline, "idx_users_id", .constant); + + const proto_outline = try explorer.getOutline("api/service.proto", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.protobuf, proto_outline.language); + try expectOutlineImport(&proto_outline, "google/protobuf/timestamp.proto"); + try expectOutlineSymbol(&proto_outline, "User", .struct_def); + try expectOutlineSymbol(&proto_outline, "Status", .enum_def); + try expectOutlineSymbol(&proto_outline, "UserService", .interface_def); + try expectOutlineSymbol(&proto_outline, "GetUser", .method); + + const fortran_outline = try explorer.getOutline("math/solver.f90", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.fortran, fortran_outline.language); + try expectOutlineImport(&fortran_outline, "mathlib"); + try expectOutlineSymbol(&fortran_outline, "solver", .class_def); + try expectOutlineSymbol(&fortran_outline, "Particle", .struct_def); + try expectOutlineSymbol(&fortran_outline, "step", .function); + try expectOutlineSymbol(&fortran_outline, "energy", .function); + + const llvm_outline = try explorer.getOutline("ir/module.ll", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.llvm_ir, llvm_outline.language); + try expectOutlineSymbol(&llvm_outline, "Pair", .type_alias); + try expectOutlineSymbol(&llvm_outline, "global_value", .variable); + try expectOutlineSymbol(&llvm_outline, "main", .function); + + const mlir_outline = try explorer.getOutline("ir/dialect.mlir", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.mlir, mlir_outline.language); + try expectOutlineSymbol(&mlir_outline, "kernel_mod", .class_def); + try expectOutlineSymbol(&mlir_outline, "kernel", .function); + + const td_outline = try explorer.getOutline("llvm/records.td", alloc) orelse return error.TestUnexpectedResult; + try testing.expectEqual(Language.tablegen, td_outline.language); + try expectOutlineImport(&td_outline, "Base.td"); + try expectOutlineSymbol(&td_outline, "Register", .class_def); + try expectOutlineSymbol(&td_outline, "Pat", .class_def); + try expectOutlineSymbol(&td_outline, "R0", .constant); + try expectOutlineSymbol(&td_outline, "ADD", .constant); + try expectOutlineSymbol(&td_outline, "Namespace", .variable); + + const worker = try explorer.findAllSymbols("Worker", alloc); + defer alloc.free(worker); + try testing.expectEqual(@as(usize, 1), worker.len); + try testing.expectEqual(SymbolKind.class_def, worker[0].symbol.kind); + + const run = try explorer.findAllSymbols("run", alloc); + defer alloc.free(run); + try testing.expectEqual(@as(usize, 1), run.len); + try testing.expectEqual(SymbolKind.method, run[0].symbol.kind); + + const user = try explorer.findAllSymbols("User", alloc); + defer alloc.free(user); + try testing.expect(user.len >= 2); + + const load_user = try explorer.findAllSymbols("loadUser", alloc); + defer alloc.free(load_user); + try testing.expectEqual(@as(usize, 1), load_user.len); + try testing.expectEqual(SymbolKind.function, load_user[0].symbol.kind); + + const title = try explorer.findAllSymbols("title", alloc); + defer alloc.free(title); + try testing.expect(title.len >= 2); + + const build_app = try explorer.findAllSymbols("build_app", alloc); + defer alloc.free(build_app); + try testing.expectEqual(@as(usize, 1), build_app.len); + try testing.expectEqual(SymbolKind.function, build_app[0].symbol.kind); + + const button = try explorer.findAllSymbols(".button", alloc); + defer alloc.free(button); + try testing.expectEqual(@as(usize, 1), button.len); + + const users = try explorer.findAllSymbols("users", alloc); + defer alloc.free(users); + try testing.expectEqual(@as(usize, 1), users.len); + try testing.expectEqual(SymbolKind.struct_def, users[0].symbol.kind); + + const user_service = try explorer.findAllSymbols("UserService", alloc); + defer alloc.free(user_service); + try testing.expectEqual(@as(usize, 1), user_service.len); + try testing.expectEqual(SymbolKind.interface_def, user_service[0].symbol.kind); + + const particle = try explorer.findAllSymbols("Particle", alloc); + defer alloc.free(particle); + try testing.expectEqual(@as(usize, 1), particle.len); + try testing.expectEqual(SymbolKind.struct_def, particle[0].symbol.kind); + + const main_sym = try explorer.findAllSymbols("main", alloc); + defer alloc.free(main_sym); + try testing.expectEqual(@as(usize, 1), main_sym.len); + try testing.expectEqual(SymbolKind.function, main_sym[0].symbol.kind); + + const kernel = try explorer.findAllSymbols("kernel", alloc); + defer alloc.free(kernel); + try testing.expectEqual(@as(usize, 1), kernel.len); + try testing.expectEqual(SymbolKind.function, kernel[0].symbol.kind); + + const r0 = try explorer.findAllSymbols("R0", alloc); + defer alloc.free(r0); + try testing.expectEqual(@as(usize, 1), r0.len); +} + + +test "issue-179: Python inline docstring does not leak symbols" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("mod.py", + \\def real_func(): + \\ """This docstring contains def fake(): pass""" + \\ return 1 + ); + + const real = try explorer.findAllSymbols("real_func", alloc); + defer alloc.free(real); + try testing.expect(real.len == 1); + + const fake = try explorer.findAllSymbols("fake", alloc); + defer alloc.free(fake); + try testing.expectEqual(@as(usize, 0), fake.len); +} + + +test "issue-179: Python multi-line docstring with def inside" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("doc.py", + \\def outer(): + \\ """ + \\ Example: + \\ def inner_example(): + \\ pass + \\ """ + \\ return True + ); + + const outer = try explorer.findAllSymbols("outer", alloc); + defer alloc.free(outer); + try testing.expect(outer.len == 1); + + const inner = try explorer.findAllSymbols("inner_example", alloc); + defer alloc.free(inner); + try testing.expectEqual(@as(usize, 0), inner.len); +} + + +test "issue-331: C parser does not index indented call sites as functions" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + var explorer = Explorer.init(a, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("test.c", + \\void real_func(int x) { + \\ fprintf(stderr, "curl_easy_perform() failed: %s\n", + \\ curl_easy_strerror(res)); + \\ curl_easy_perform(curl); + \\ if (SSL_get_options(ctx)) + \\ return; + \\} + ); + + const syms = explorer.outlines.get("test.c").?.symbols.items; + var found_false = false; + for (syms) |sym| { + if (sym.kind == .function) { + if (std.mem.eql(u8, sym.name, "fprintf") or + std.mem.eql(u8, sym.name, "curl_easy_perform") or + std.mem.eql(u8, sym.name, "curl_easy_strerror") or + std.mem.eql(u8, sym.name, "SSL_get_options")) + { + found_false = true; + } + } + } + try testing.expect(!found_false); + var found_real = false; + for (syms) |sym| { + if (sym.kind == .function and std.mem.eql(u8, sym.name, "real_func")) + found_real = true; + } + try testing.expect(found_real); +} + + +test "issue-331: C parser finds nginx-style split-line definitions" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + var explorer = Explorer.init(a, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("ngx_http_request.c", + \\ngx_int_t + \\ngx_http_init_connection(ngx_connection_t *c) + \\{ + \\ ngx_http_connection_t *hc; + \\} + \\ + \\static ngx_int_t + \\ngx_http_create_request(ngx_http_request_t *r) + \\{ + \\ return NGX_OK; + \\} + ); + + const syms = explorer.outlines.get("ngx_http_request.c").?.symbols.items; + var found_init = false; + var found_create = false; + for (syms) |sym| { + if (sym.kind == .function) { + if (std.mem.eql(u8, sym.name, "ngx_http_init_connection")) found_init = true; + if (std.mem.eql(u8, sym.name, "ngx_http_create_request")) found_create = true; + } + } + try testing.expect(found_init); + try testing.expect(found_create); +} + + +test "issue-392: Swift parser" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("Sources/App/Greeter.swift", + \\import Foundation + \\import UIKit + \\ + \\public struct Greeter { + \\ let name: String + \\ + \\ public func greet() -> String { + \\ return "Hello, \(name)" + \\ } + \\} + \\ + \\public class HomeViewController: UIViewController { + \\ public override func viewDidLoad() { + \\ super.viewDidLoad() + \\ } + \\} + \\ + \\public protocol Reloadable { + \\ func reload() + \\} + \\ + \\public enum LoadState { + \\ case idle + \\ case loading + \\} + \\ + \\public func topLevel() -> Int { return 42 } + ); + + var outline = (try explorer.getOutline("Sources/App/Greeter.swift", testing.allocator)) orelse return error.TestUnexpectedResult; + defer outline.deinit(); + + // Detected language must surface as "swift" — main has no Language.swift, + // so the file falls into .unknown and no parser runs. + try testing.expectEqualStrings("swift", @tagName(outline.language)); + + var found_struct = false; + var found_class = false; + var found_protocol = false; + var found_enum = false; + var found_top_fn = false; + var found_method = false; + for (outline.symbols.items) |sym| { + if (std.mem.eql(u8, sym.name, "Greeter")) found_struct = true; + if (std.mem.eql(u8, sym.name, "HomeViewController")) found_class = true; + if (std.mem.eql(u8, sym.name, "Reloadable")) found_protocol = true; + if (std.mem.eql(u8, sym.name, "LoadState")) found_enum = true; + if (std.mem.eql(u8, sym.name, "topLevel")) found_top_fn = true; + if (std.mem.eql(u8, sym.name, "greet")) found_method = true; + } + try testing.expect(found_struct); + try testing.expect(found_class); + try testing.expect(found_protocol); + try testing.expect(found_enum); + try testing.expect(found_top_fn); + try testing.expect(found_method); +} + diff --git a/src/test_query.zig b/src/test_query.zig new file mode 100644 index 0000000..a784c73 --- /dev/null +++ b/src/test_query.zig @@ -0,0 +1,1232 @@ +const std = @import("std"); +const cio = @import("cio.zig"); +const testing = std.testing; +const io = std.testing.io; +const Store = @import("store.zig").Store; +const Explorer = @import("explore.zig").Explorer; +const SearchResult = @import("explore.zig").SearchResult; +const WordIndex = @import("index.zig").WordIndex; +const TrigramIndex = @import("index.zig").TrigramIndex; +const SparseNgramIndex = @import("index.zig").SparseNgramIndex; +const explore = @import("explore.zig"); +const Language = explore.Language; +const SymbolKind = explore.SymbolKind; +const mcp_mod = @import("mcp.zig"); + + +const fuzzyScore = @import("explore.zig").fuzzyScore; +const AgentRegistry = @import("agent.zig").AgentRegistry; +const edit_mod = @import("edit.zig"); + + +test "issue-360: edit rejects mismatched if_hash and leaves file untouched" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + const rel_path = try std.fmt.allocPrint(testing.allocator, ".zig-cache/tmp/{s}/edit-if-hash.txt", .{tmp.sub_path}); + defer testing.allocator.free(rel_path); + + const original = "line 1\nline 2\nline 3\n"; + var file = try tmp.dir.createFile(io, "edit-if-hash.txt", .{}); + defer file.close(io); + try file.writeStreamingAll(io, original); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + const agent_id = try agents.register("issue-360-agent"); + + // A hash value that cannot match any real file content (caller saw a stale read) + try testing.expectError(error.HashMismatch, edit_mod.applyEdit(io, testing.allocator, &store, &agents, null, .{ + .path = rel_path, + .agent_id = agent_id, + .op = .replace, + .range = .{ 1, 1 }, + .content = "stale-line edit", + .if_hash = "deadbeef", + })); + + // File on disk must be unchanged after the rejected edit + const after_bytes = try std.Io.Dir.cwd().readFileAlloc(io, rel_path, testing.allocator, .limited(10 * 1024)); + defer testing.allocator.free(after_bytes); + try testing.expectEqualStrings(original, after_bytes); +} + + +test "issue-360: edit response reports hex hash matching codedb_read" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + const rel_path = try std.fmt.allocPrint(testing.allocator, ".zig-cache/tmp/{s}/edit-hex.txt", .{tmp.sub_path}); + defer testing.allocator.free(rel_path); + + const original = "alpha\nbeta\ngamma\n"; + var file = try tmp.dir.createFile(io, "edit-hex.txt", .{}); + defer file.close(io); + try file.writeStreamingAll(io, original); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + const agent_id = try agents.register("issue-360-hex-agent"); + + const result = try edit_mod.applyEdit(io, testing.allocator, &store, &agents, null, .{ + .path = rel_path, + .agent_id = agent_id, + .op = .replace, + .range = .{ 2, 2 }, + .content = "BETA", + }); + + // Hash returned matches Wyhash of the new content, hex-formatted same as codedb_read + const new_bytes = try std.Io.Dir.cwd().readFileAlloc(io, rel_path, testing.allocator, .limited(10 * 1024)); + defer testing.allocator.free(new_bytes); + const expected_hash = std.hash.Wyhash.hash(0, new_bytes); + try testing.expectEqual(expected_hash, result.new_hash); +} + + +test "issue-360: edit dry_run returns diff preview and leaves file untouched" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + const rel_path = try std.fmt.allocPrint(testing.allocator, ".zig-cache/tmp/{s}/edit-dry.txt", .{tmp.sub_path}); + defer testing.allocator.free(rel_path); + + const original = "alpha\nbeta\ngamma\n"; + var file = try tmp.dir.createFile(io, "edit-dry.txt", .{}); + defer file.close(io); + try file.writeStreamingAll(io, original); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + const agent_id = try agents.register("issue-360-dry-agent"); + + const result = try edit_mod.applyEdit(io, testing.allocator, &store, &agents, null, .{ + .path = rel_path, + .agent_id = agent_id, + .op = .replace, + .range = .{ 2, 2 }, + .content = "BETA", + .dry_run = true, + }); + defer if (result.preview) |p| testing.allocator.free(p); + + // File on disk is untouched. + const after_bytes = try std.Io.Dir.cwd().readFileAlloc(io, rel_path, testing.allocator, .limited(10 * 1024)); + defer testing.allocator.free(after_bytes); + try testing.expectEqualStrings(original, after_bytes); + + // Store unchanged. + try testing.expectEqual(@as(u64, 0), store.currentSeq()); + + // seq=0 indicates not committed; new_hash is the would-be hash. + try testing.expectEqual(@as(u64, 0), result.seq); + + // Preview shows both the removed and the added line. + try testing.expect(result.preview != null); + const preview = result.preview.?; + try testing.expect(std.mem.indexOf(u8, preview, "-beta") != null); + try testing.expect(std.mem.indexOf(u8, preview, "+BETA") != null); +} + + +test "issue-163: fuzzy exact match scores highest" { + const exact = fuzzyScore("main.zig", "src/main.zig"); + const partial = fuzzyScore("main.zig", "src/main_helper.zig"); + try testing.expect(exact != null); + try testing.expect(partial != null); + try testing.expect(exact.? > partial.?); +} + + +test "issue-163: fuzzy subsequence match works" { + const score = fuzzyScore("authmid", "src/auth_middleware.py"); + try testing.expect(score != null); + try testing.expect(score.? > 0); +} + + +test "issue-163: fuzzy typo-tolerant (missing char)" { + // "auth_midlware" missing the 'd' in middleware — should still match via subsequence + const score = fuzzyScore("auth_midlware", "src/auth_middleware.py"); + try testing.expect(score != null); +} + + +test "issue-163: fuzzy word boundary bonus" { + // "auth" at word boundary should score higher than "auth" buried in a word + const boundary = fuzzyScore("auth", "src/auth_handler.py"); + const buried = fuzzyScore("auth", "src/xauthyhandle.py"); + try testing.expect(boundary != null); + try testing.expect(buried != null); + try testing.expect(boundary.? > buried.?); +} + + +test "issue-163: fuzzy filename ranks above directory" { + // "test" in filename portion should score higher than "test" only in directory + const in_name = fuzzyScore("test", "src/test_auth.py"); + const in_dir = fuzzyScore("test", "testdir/deep/nested/xyzfile.py"); + try testing.expect(in_name != null); + try testing.expect(in_dir != null); + try testing.expect(in_name.? > in_dir.?); +} + + +test "issue-163: fuzzy no match returns null" { + const score = fuzzyScore("zzzzxyz", "src/main.zig"); + try testing.expect(score == null); +} + + +test "issue-163: fuzzyFindFiles via Explorer" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/auth_middleware.py", "def check_auth(): pass"); + try explorer.indexFile("src/middleware/auth.py", "class Auth: pass"); + try explorer.indexFile("tests/test_auth.py", "def test_auth(): pass"); + try explorer.indexFile("src/utils.py", "def format_str(): pass"); + + const results = try explorer.fuzzyFindFiles("authmid", testing.allocator, 10); + defer testing.allocator.free(results); + + try testing.expect(results.len >= 1); + // auth_middleware.py should be top result + try testing.expect(std.mem.indexOf(u8, results[0].path, "auth_middleware") != null); +} + + +test "issue-163: multi-part query matches both parts" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/auth_middleware.py", "def check(): pass"); + try explorer.indexFile("src/auth_handler.py", "def handle(): pass"); + try explorer.indexFile("src/utils.py", "def util(): pass"); + + // "auth middle" should match auth_middleware but not utils + const results = try explorer.fuzzyFindFiles("auth middle", testing.allocator, 10); + defer testing.allocator.free(results); + + try testing.expect(results.len >= 1); + try testing.expect(std.mem.indexOf(u8, results[0].path, "middleware") != null); +} + + +test "issue-163: extension constraint filters results" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/auth.py", "def check(): pass"); + try explorer.indexFile("src/auth.ts", "function check() {}"); + try explorer.indexFile("src/auth.zig", "fn check() void {}"); + + // "auth *.py" should only return the .py file + const results = try explorer.fuzzyFindFiles("auth *.py", testing.allocator, 10); + defer testing.allocator.free(results); + + try testing.expect(results.len >= 1); + for (results) |r| { + try testing.expect(std.mem.endsWith(u8, r.path, ".py")); + } +} + + +test "issue-163: special entry point files get bonus" { + const score_main = fuzzyScore("main", "src/main.zig"); + const score_regular = fuzzyScore("main", "src/maintain.zig"); + try testing.expect(score_main != null); + try testing.expect(score_regular != null); + // main.zig is a special entry point — should score higher than maintain.zig + try testing.expect(score_main.? > score_regular.?); +} + + +test "issue-163: transpositions handled by Smith-Waterman" { + // These all failed with the old subsequence matcher + try testing.expect(fuzzyScore("mpc", "src/mcp.zig") != null); + try testing.expect(fuzzyScore("mian", "src/main.zig") != null); + try testing.expect(fuzzyScore("agnet", "src/agent.zig") != null); + try testing.expect(fuzzyScore("indxe", "src/index.zig") != null); +} + + +test "issue-168: query pipeline find → limit produces file set" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/auth.py", "def check_auth(): pass"); + try explorer.indexFile("src/auth_handler.py", "def handle(): pass"); + try explorer.indexFile("src/utils.py", "def util(): pass"); + try explorer.indexFile("src/config.py", "DEBUG = True"); + + // Pipeline: find "auth" → should return auth files + const results = try explorer.fuzzyFindFiles("auth", testing.allocator, 10); + defer testing.allocator.free(results); + + try testing.expect(results.len >= 2); + // Both auth files should be in results + var found_auth = false; + var found_handler = false; + for (results) |r| { + if (std.mem.indexOf(u8, r.path, "auth.py") != null) found_auth = true; + if (std.mem.indexOf(u8, r.path, "auth_handler") != null) found_handler = true; + } + try testing.expect(found_auth); + try testing.expect(found_handler); +} + + +test "issue-168: query pipeline search returns matching lines" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/main.zig", "pub fn main() void {\n const x = 42;\n}\n"); + try explorer.indexFile("src/lib.zig", "pub fn init() void {}\n"); + + const results = try explorer.searchContent("main", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + try testing.expect(results.len >= 1); + try testing.expect(std.mem.indexOf(u8, results[0].path, "main.zig") != null); +} + + +test "issue-168: query pipeline filter by extension" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/auth.py", "def check(): pass"); + try explorer.indexFile("src/auth.ts", "function check() {}"); + try explorer.indexFile("src/auth.zig", "fn check() void {}"); + + // fuzzyFindFiles with extension constraint + const results = try explorer.fuzzyFindFiles("auth *.py", testing.allocator, 10); + defer testing.allocator.free(results); + + try testing.expect(results.len >= 1); + for (results) |r| { + try testing.expect(std.mem.endsWith(u8, r.path, ".py")); + } +} + + +test "issue-168: query pipeline outline returns symbols" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/main.zig", "pub fn main() void {}\npub fn helper() void {}\n"); + + var outline = (try explorer.getOutline("src/main.zig", testing.allocator)).?; + defer outline.deinit(); + try testing.expect(outline.symbols.items.len >= 2); +} + + +test "issue-168: query pipeline chained find → filter narrows results" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/auth.py", "def check(): pass"); + try explorer.indexFile("src/auth.ts", "function check() {}"); + try explorer.indexFile("src/utils.py", "def util(): pass"); + try explorer.indexFile("docs/auth.md", "# Auth docs"); + + // find "auth" returns all auth files, then *.py filter narrows to python + const all = try explorer.fuzzyFindFiles("auth", testing.allocator, 10); + defer testing.allocator.free(all); + try testing.expect(all.len >= 3); // auth.py, auth.ts, auth.md + + const py_only = try explorer.fuzzyFindFiles("auth *.py", testing.allocator, 10); + defer testing.allocator.free(py_only); + try testing.expect(py_only.len >= 1); + try testing.expect(py_only.len < all.len); // filtered set is smaller +} + + +test "issue-168: query pipeline handles empty results gracefully" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/main.zig", "pub fn main() void {}"); + + // Search for something that doesn't exist + const results = try explorer.fuzzyFindFiles("zzzznonexistent", testing.allocator, 10); + defer testing.allocator.free(results); + try testing.expectEqual(@as(usize, 0), results.len); +} + + +test "issue-168: recall — find + filter preserves only matching extension" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/auth.py", "def check(): pass"); + try explorer.indexFile("src/auth.ts", "function check() {}"); + try explorer.indexFile("src/auth.zig", "fn check() void {}"); + try explorer.indexFile("src/auth.rs", "fn check() {}"); + try explorer.indexFile("src/auth_test.py", "def test_check(): pass"); + + // find "auth" should get all 5, then *.py should narrow to exactly 2 + const all = try explorer.fuzzyFindFiles("auth", testing.allocator, 20); + defer testing.allocator.free(all); + try testing.expect(all.len == 5); + + const py = try explorer.fuzzyFindFiles("auth *.py", testing.allocator, 20); + defer testing.allocator.free(py); + try testing.expect(py.len == 2); + for (py) |r| try testing.expect(std.mem.endsWith(u8, r.path, ".py")); +} + + +test "issue-168: recall — search finds content across multiple files" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/a.zig", "pub fn handleRequest() void {}"); + try explorer.indexFile("src/b.zig", "pub fn handleResponse() void {}"); + try explorer.indexFile("src/c.zig", "pub fn processData() void {}"); + + const results = try explorer.searchContent("handle", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + // Should find "handle" in a.zig and b.zig but not c.zig + try testing.expect(results.len >= 2); + var found_a = false; + var found_b = false; + var found_c = false; + for (results) |r| { + if (std.mem.indexOf(u8, r.path, "a.zig") != null) found_a = true; + if (std.mem.indexOf(u8, r.path, "b.zig") != null) found_b = true; + if (std.mem.indexOf(u8, r.path, "c.zig") != null) found_c = true; + } + try testing.expect(found_a); + try testing.expect(found_b); + try testing.expect(!found_c); +} + + +test "issue-168: recall — fuzzy find ranks exact matches highest" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/auth.zig", "fn auth() void {}"); + try explorer.indexFile("src/authorization.zig", "fn authorize() void {}"); + try explorer.indexFile("src/authenticate.zig", "fn authenticate() void {}"); + + const results = try explorer.fuzzyFindFiles("auth.zig", testing.allocator, 10); + defer testing.allocator.free(results); + + try testing.expect(results.len >= 1); + // Exact match "auth.zig" should be ranked first + try testing.expect(std.mem.eql(u8, results[0].path, "src/auth.zig")); + // Score should decrease for less exact matches + if (results.len >= 2) { + try testing.expect(results[0].score > results[1].score); + } +} + + +test "issue-168: recall — multi-part query intersection" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/auth_controller.py", "class AuthController: pass"); + try explorer.indexFile("src/auth_model.py", "class AuthModel: pass"); + try explorer.indexFile("src/user_controller.py", "class UserController: pass"); + try explorer.indexFile("src/user_model.py", "class UserModel: pass"); + + // "auth controller" should match auth_controller but not user_controller or auth_model + const results = try explorer.fuzzyFindFiles("auth controller", testing.allocator, 10); + defer testing.allocator.free(results); + + try testing.expect(results.len >= 1); + try testing.expect(std.mem.indexOf(u8, results[0].path, "auth_controller") != null); +} + + +test "issue-168: recall — transposition tolerance in pipeline" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/middleware.zig", "fn process() void {}"); + try explorer.indexFile("src/controller.zig", "fn handle() void {}"); + try explorer.indexFile("src/service.zig", "fn serve() void {}"); + + // "midleware" (missing 'd') should still find middleware via Smith-Waterman + const results = try explorer.fuzzyFindFiles("midleware", testing.allocator, 5); + defer testing.allocator.free(results); + + try testing.expect(results.len >= 1); + try testing.expect(std.mem.indexOf(u8, results[0].path, "middleware") != null); +} + + +test "auto-retry: delimiter stripping finds results" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/auth_middleware.py", "def check(): pass"); + + // "authmiddleware" without delimiters should still find auth_middleware + const results = try explorer.fuzzyFindFiles("authmiddleware", testing.allocator, 10); + defer testing.allocator.free(results); + try testing.expect(results.len >= 1); + try testing.expect(std.mem.indexOf(u8, results[0].path, "auth_middleware") != null); +} + + +test "per-file truncation: max 5 matches per file in output" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + // Create a file with 10 lines all matching "const" + var content: [500]u8 = undefined; + var pos: usize = 0; + for (0..10) |i| { + const line = std.fmt.bufPrint(content[pos..], "const val{d} = {d};\n", .{ i, i }) catch break; + pos += line.len; + } + try explorer.indexFile("src/many_consts.zig", content[0..pos]); + + // Search — explorer returns all 10, but MCP handler would truncate to 5 + const results = try explorer.searchContent("const", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + // At the explorer level all 10 should be found + try testing.expect(results.len >= 10); +} + + +test "issue-359: globPaths matches files by glob pattern" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/mcp.zig", "pub fn a() void {}"); + try explorer.indexFile("src/explore.zig", "pub fn b() void {}"); + try explorer.indexFile("src/sub/inner.zig", "pub fn c() void {}"); + try explorer.indexFile("tests/test_main.py", "def t(): pass"); + try explorer.indexFile("README.md", "# readme"); + + // ** matches across path separators + const zigs = try explorer.globPaths(testing.allocator, "src/**/*.zig", 100); + defer testing.allocator.free(zigs); + try testing.expectEqual(@as(usize, 3), zigs.len); + + // single * does not cross path separators + const top_zigs = try explorer.globPaths(testing.allocator, "src/*.zig", 100); + defer testing.allocator.free(top_zigs); + try testing.expectEqual(@as(usize, 2), top_zigs.len); + + // top-level extension match + const md = try explorer.globPaths(testing.allocator, "*.md", 100); + defer testing.allocator.free(md); + try testing.expectEqual(@as(usize, 1), md.len); + try testing.expectEqualStrings("README.md", md[0]); + + // results are sorted + const all_zigs = try explorer.globPaths(testing.allocator, "**/*.zig", 100); + defer testing.allocator.free(all_zigs); + try testing.expect(all_zigs.len >= 2); + var i: usize = 1; + while (i < all_zigs.len) : (i += 1) { + try testing.expect(std.mem.order(u8, all_zigs[i - 1], all_zigs[i]) == .lt); + } +} + + +test "issue-359: lsDir returns immediate children with file metadata" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("src/mcp.zig", "pub fn a() void {}"); + try explorer.indexFile("src/explore.zig", "pub fn b() void {}"); + try explorer.indexFile("src/sub/inner.zig", "pub fn c() void {}"); + try explorer.indexFile("tests/test_main.py", "def t(): pass"); + try explorer.indexFile("README.md", "# readme"); + + // Top-level: 1 file (README.md) + 2 dirs (src/, tests/) + const top = try explorer.lsDir(testing.allocator, ""); + defer testing.allocator.free(top); + try testing.expectEqual(@as(usize, 3), top.len); + + var saw_readme = false; + var saw_src_dir = false; + var saw_tests_dir = false; + for (top) |e| { + if (std.mem.eql(u8, e.name, "README.md")) { + try testing.expect(!e.is_dir); + saw_readme = true; + } + if (std.mem.eql(u8, e.name, "src")) { + try testing.expect(e.is_dir); + saw_src_dir = true; + } + if (std.mem.eql(u8, e.name, "tests")) { + try testing.expect(e.is_dir); + saw_tests_dir = true; + } + } + try testing.expect(saw_readme and saw_src_dir and saw_tests_dir); + + // Inside src/: 2 files (mcp.zig, explore.zig) + 1 dir (sub/) + const src_children = try explorer.lsDir(testing.allocator, "src"); + defer testing.allocator.free(src_children); + try testing.expectEqual(@as(usize, 3), src_children.len); + + var saw_sub_dir = false; + var file_count: usize = 0; + for (src_children) |e| { + if (e.is_dir) { + if (std.mem.eql(u8, e.name, "sub")) saw_sub_dir = true; + } else { + file_count += 1; + try testing.expect(e.line_count >= 1); + } + } + try testing.expect(saw_sub_dir); + try testing.expectEqual(@as(usize, 2), file_count); +} + + +test "issue-359: mcp.globMatch backtracks across **/* boundary" { + // Pipeline filter (codedb_query) calls mcp.globMatch on each path. The + // iterative version forgot the outer ** position when it entered the + // inner *.zig star, so paths like src/sub/inner.zig were rejected by + // src/**/*.zig even though they should match. + try testing.expect(mcp_mod.globMatch("src/**/*.zig", "src/sub/inner.zig")); + try testing.expect(mcp_mod.globMatch("src/**/*.zig", "src/a/b/c.zig")); + + // Single * still must not cross /. + try testing.expect(!mcp_mod.globMatch("src/*.zig", "src/sub/inner.zig")); + + // Plain prefix matches still work. + try testing.expect(mcp_mod.globMatch("src/*.zig", "src/mcp.zig")); + try testing.expect(!mcp_mod.globMatch("docs/*.md", "src/mcp.zig")); +} + + +test "issue-359: globPaths recall — every matching path survives at every depth" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + // Plant files at varying depths under src/, plus a few outside it. + const planted = [_][]const u8{ + "src/a.zig", + "src/b.zig", + "src/sub/c.zig", + "src/sub/d.zig", + "src/sub/deep/e.zig", + "src/sub/deep/f.zig", + "src/sub/deep/deeper/g.zig", + "tests/h.zig", + "src/notes.md", + "src/sub/notes.md", + }; + for (planted) |p| try explorer.indexFile(p, "pub fn x() void {}"); + + // src/**/*.zig must reach every depth — this is the case the old + // iterative matcher silently dropped (single star slot lost the + // outer ** position when the inner *.zig star ran). + const all_src_zigs = try explorer.globPaths(testing.allocator, "src/**/*.zig", 100); + defer testing.allocator.free(all_src_zigs); + try testing.expectEqual(@as(usize, 7), all_src_zigs.len); + + // Single * does not cross /: only the two top-level src zigs. + const top = try explorer.globPaths(testing.allocator, "src/*.zig", 100); + defer testing.allocator.free(top); + try testing.expectEqual(@as(usize, 2), top.len); + + // **/*.md should find both markdown files no matter their depth. + const md = try explorer.globPaths(testing.allocator, "**/*.md", 100); + defer testing.allocator.free(md); + try testing.expectEqual(@as(usize, 2), md.len); + + // Anchored deep match: src/**/g.zig must find the deepest one only. + const g = try explorer.globPaths(testing.allocator, "src/**/g.zig", 100); + defer testing.allocator.free(g); + try testing.expectEqual(@as(usize, 1), g.len); + try testing.expectEqualStrings("src/sub/deep/deeper/g.zig", g[0]); + + // Pipeline filter must agree path-for-path with globPaths, since it + // now routes through the same matcher. Spot-check a few. + try testing.expect(mcp_mod.globMatch("src/**/*.zig", "src/sub/deep/deeper/g.zig")); + try testing.expect(mcp_mod.globMatch("**/*.md", "src/sub/notes.md")); + try testing.expect(!mcp_mod.globMatch("src/**/*.zig", "tests/h.zig")); +} + + +test "issue-359/360: retrieval recall — search/word/symbol/fuzzy/glob/deps all return ground truth" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + // Flat paths so dep_graph keys (raw import strings) line up with file paths. + try explorer.indexFile( + "auth.zig", + \\const std = @import("std"); + \\ + \\pub fn authenticate(token: []const u8) bool { + \\ _ = token; + \\ return true; + \\} + \\pub fn validateToken(token: []const u8) bool { + \\ return authenticate(token); + \\} + , + ); + try explorer.indexFile( + "handler.zig", + \\const auth = @import("auth.zig"); + \\ + \\pub fn handleLogin() void { + \\ if (auth.authenticate("x")) return; + \\} + , + ); + try explorer.indexFile( + "auth_test.zig", + \\const auth = @import("auth.zig"); + \\ + \\test "auth round-trip" { + \\ _ = auth.authenticate("x"); + \\} + , + ); + try explorer.indexFile( + "unrelated.zig", + \\pub fn formatNumber(n: i64) []const u8 { + \\ _ = n; + \\ return "0"; + \\} + , + ); + try explorer.indexFile("README.md", "# project\nauthenticate description here"); + + // 1. Full-text search: every file containing `authenticate` must appear. + { + const expected = [_][]const u8{ "auth.zig", "handler.zig", "auth_test.zig", "README.md" }; + const results = try explorer.searchContent("authenticate", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + var seen = std.StringHashMap(void).init(testing.allocator); + defer seen.deinit(); + for (results) |r| try seen.put(r.path, {}); + for (expected) |e| try testing.expect(seen.contains(e)); + try testing.expect(!seen.contains("unrelated.zig")); + } + + // 2. Word index: exact token `authenticate` must reach the same 4 files. + { + const hits = try explorer.searchWord("authenticate", testing.allocator); + defer testing.allocator.free(hits); + var seen = std.StringHashMap(void).init(testing.allocator); + defer seen.deinit(); + explorer.mu.lockShared(); + defer explorer.mu.unlockShared(); + for (hits) |h| try seen.put(explorer.word_index.hitPath(h), {}); + const expected = [_][]const u8{ "auth.zig", "handler.zig", "auth_test.zig", "README.md" }; + for (expected) |e| try testing.expect(seen.contains(e)); + } + + // 3. Symbol index: `authenticate` is defined once, in auth.zig. + { + const results = try explorer.findAllSymbols("authenticate", testing.allocator); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.symbol.name); + if (r.symbol.detail) |d| testing.allocator.free(d); + } + testing.allocator.free(results); + } + try testing.expect(results.len >= 1); + var found_def = false; + for (results) |r| { + if (std.mem.eql(u8, r.path, "auth.zig")) found_def = true; + } + try testing.expect(found_def); + } + + // 4. Fuzzy file find: query "auth" must reach both auth.zig and auth_test.zig. + { + const results = try explorer.fuzzyFindFiles("auth", testing.allocator, 50); + defer testing.allocator.free(results); + var seen = std.StringHashMap(void).init(testing.allocator); + defer seen.deinit(); + for (results) |r| try seen.put(r.path, {}); + try testing.expect(seen.contains("auth.zig")); + try testing.expect(seen.contains("auth_test.zig")); + } + + // 5. Glob: `auth*.zig` must include auth.zig and auth_test.zig only. + { + const matches = try explorer.globPaths(testing.allocator, "auth*.zig", 50); + defer testing.allocator.free(matches); + var found_auth = false; + var found_test = false; + for (matches) |m| { + if (std.mem.eql(u8, m, "auth.zig")) found_auth = true; + if (std.mem.eql(u8, m, "auth_test.zig")) found_test = true; + try testing.expect(!std.mem.eql(u8, m, "unrelated.zig")); + try testing.expect(!std.mem.eql(u8, m, "handler.zig")); + } + try testing.expect(found_auth); + try testing.expect(found_test); + } + + // 6. Dependency graph: handler.zig and auth_test.zig both import auth.zig. + { + const importers = try explorer.getImportedBy("auth.zig", testing.allocator); + defer { + for (importers) |p| testing.allocator.free(p); + testing.allocator.free(importers); + } + var saw_handler = false; + var saw_test = false; + for (importers) |p| { + if (std.mem.eql(u8, p, "handler.zig")) saw_handler = true; + if (std.mem.eql(u8, p, "auth_test.zig")) saw_test = true; + } + try testing.expect(saw_handler); + try testing.expect(saw_test); + } +} + + +test "issue-356-1: codedb_query returns partial results when a step fails" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + try explorer.indexFile("src/lib.zig", "pub fn helper() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + // Pipeline: step 0 (find) succeeds, step 1 (search) is missing 'query'. + // Pre-fix: bails on step 1, dropping step 0's output entirely. + // Post-fix: returns step 0's matched files + a "--- partial ---" tail + // naming the failing step. + const pipe_json = + \\{"pipeline":[ + \\ {"op":"find","query":"main"}, + \\ {"op":"search"} + \\]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, pipe_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_query, &parsed.value.object, &out, &store, &explorer, &agents); + + // Step 0's output (file matches) must survive even though step 1 failed. + try testing.expect(std.mem.indexOf(u8, out.items, "src/main.zig") != null); + // The partial-results tail must name the failing step so callers can + // recover instead of guessing what went wrong. + try testing.expect(std.mem.indexOf(u8, out.items, "--- partial ---") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "failed_at: 1") != null); +} + + +test "issue-356-2: codedb_outline suggests fuzzy alternatives for non-indexed paths" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + try explorer.indexFile("src/mcp.zig", "pub fn mcp() void {}\n"); + try explorer.indexFile("src/explore.zig", "pub fn explore() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + // Outline a path that doesn't index — typo on 'main.zig'. + const args_json = + \\{"path":"src/man.zig"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_outline, &parsed.value.object, &out, &store, &explorer, &agents); + + // Pre-fix: bare 'error: file not indexed' with no recovery hint. + // Post-fix: append fuzzy suggestions so the agent can self-correct. + try testing.expect(std.mem.indexOf(u8, out.items, "did you mean") != null); + // src/main.zig is the closest fuzzy match for src/man.zig. + try testing.expect(std.mem.indexOf(u8, out.items, "src/main.zig") != null); +} + + +test "issue-356-3: codedb_query surfaces received keys on missing-arg errors" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + // Single-step pipeline: search step missing 'query' but provided 'q' + // (common typo). The error should name the keys actually received so + // the caller can self-diagnose, mirroring the #357 bundle diagnostic. + const pipe_json = + \\{"pipeline":[{"op":"search","q":"main"}]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, pipe_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_query, &parsed.value.object, &out, &store, &explorer, &agents); + + // The legitimate missing-arg error must still appear. + try testing.expect(std.mem.indexOf(u8, out.items, "search needs 'query'") != null); + // And the diagnostic must surface what the step actually contained. + try testing.expect(std.mem.indexOf(u8, out.items, "received keys") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "q") != null); +} + + +test "issue-356-p2: codedb_outline missing path surfaces received keys" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"file_path":"src/main.zig"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_outline, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'path'") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "received keys") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "file_path") != null); +} + + +test "issue-356-p2: codedb_symbol missing name surfaces received keys" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"symbol":"main"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_symbol, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'name'") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "received keys") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "symbol") != null); +} + + +test "issue-356-p2: codedb_search missing query surfaces received keys" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"q":"main"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_search, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'query'") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "received keys") != null); +} + + +test "issue-356-p2: codedb_word missing word surfaces received keys" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"w":"main"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_word, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'word'") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "received keys") != null); +} + + +test "issue-356-p2: codedb_read missing path surfaces received keys" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"file":"src/main.zig"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_read, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'path'") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "received keys") != null); +} + + +test "issue-356-p2: codedb_deps missing path surfaces received keys" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"target":"src/main.zig"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_deps, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'path'") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "received keys") != null); +} + + +test "issue-356-p3: codedb_query emits per-stage summary tail on success" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + try explorer.indexFile("src/lib.zig", "pub fn helper() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + // Two-step pipeline that succeeds. Phase 3 emits a summary tail so + // callers can see which step did what without re-parsing the + // unstructured per-step output above it. + const pipe_json = + \\{"pipeline":[ + \\ {"op":"find","query":"main"}, + \\ {"op":"sort","by":"path"} + \\]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, pipe_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_query, &parsed.value.object, &out, &store, &explorer, &agents); + + // Stage summary appears at the end of a successful pipeline. + try testing.expect(std.mem.indexOf(u8, out.items, "--- stages ---") != null); + // Lists each step with op and outgoing file count. + try testing.expect(std.mem.indexOf(u8, out.items, "0: find") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "1: sort") != null); +} + + +test "issue-356-p3: codedb_outline includes actionable hint when parser fails" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + // Outline a path that's NOT indexed (no setRoot, so disk read won't + // help either). The "file not indexed" error already gets fuzzy + // suggestions from phase 1. This test pins that the hint format is + // actionable — specifically that a 'try codedb_index' suggestion + // appears so users know how to recover from a stale index. + const args_json = + \\{"path":"src/notindexed.zig"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_outline, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "file not indexed") != null); + // Phase 3 adds a 'codedb_index' hint so callers know how to recover + // from a stale index in addition to the 'did you mean' suggestions. + try testing.expect(std.mem.indexOf(u8, out.items, "codedb_index") != null); +} + + +test "issue-356-p3: codedb_read appends fuzzy suggestions when path is unreadable" { + const tmp_io = testing.io; + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + try tmp.dir.createDirPath(tmp_io, "src"); + try tmp.dir.writeFile(tmp_io, .{ + .sub_path = "src/main.zig", + .data = "pub fn main() void {}\n", + }); + + var project_path_buf: [std.fs.max_path_bytes]u8 = undefined; + const project_path_len = try tmp.dir.realPathFile(tmp_io, ".", &project_path_buf); + const project_path = project_path_buf[0..project_path_len]; + + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + explorer.setRoot(tmp_io, project_path); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + try explorer.indexFile("src/lib.zig", "pub fn helper() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, project_path, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + // Read a non-indexed, non-existent path. Pre-fix: bare 'failed to read file'. + // Post-fix: append fuzzy suggestions like outline already does. + const args_json = + \\{"path":"src/man.zig"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_read, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "failed to read file") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "did you mean") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "src/main.zig") != null); +} + diff --git a/src/test_search.zig b/src/test_search.zig new file mode 100644 index 0000000..afc194e --- /dev/null +++ b/src/test_search.zig @@ -0,0 +1,1598 @@ +const std = @import("std"); +const cio = @import("cio.zig"); +const testing = std.testing; +const io = std.testing.io; +const Store = @import("store.zig").Store; +const Explorer = @import("explore.zig").Explorer; +const SearchResult = @import("explore.zig").SearchResult; +const WordIndex = @import("index.zig").WordIndex; +const TrigramIndex = @import("index.zig").TrigramIndex; +const SparseNgramIndex = @import("index.zig").SparseNgramIndex; +const explore = @import("explore.zig"); +const Language = explore.Language; +const SymbolKind = explore.SymbolKind; +const DependencyGraph = explore.DependencyGraph; +const SymbolLocation = explore.SymbolLocation; +const mcp_mod = @import("mcp.zig"); +const AgentRegistry = @import("agent.zig").AgentRegistry; + + +test "issue-264: early exit at max_results misses valid matches in remaining candidates" { + // searchContent stops as soon as result_list.items.len >= max_results. + // The first-indexed file is iterated first (doc_id order). If it has + // many matches it fills the quota alone, and later files are never checked. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + // Index noisy file FIRST — it will be the first trigram candidate. + try explorer.indexFile("noisy.zig", + \\fn target_token() void {} + \\fn target_token_v2() void {} + \\const target_token_ptr = undefined; + \\var target_token_state = 0; + \\test "target_token works" {} + \\// calls target_token internally + ); + + // Index quiet file SECOND — it will be a later candidate. + try explorer.indexFile("quiet.zig", "fn target_token() void {}"); + + // max_results=5: noisy.zig has 6 matches, fills the quota. + const results = try explorer.searchContent("target_token", testing.allocator, 5); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + // quiet.zig must be represented in results even though noisy.zig + // has enough matches to fill max_results by itself. + var found_quiet = false; + for (results) |r| { + if (std.mem.eql(u8, r.path, "quiet.zig")) found_quiet = true; + } + try testing.expect(found_quiet); +} + + +test "search: line numbers correct with incremental counting" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + // File with target on specific lines + const content = "line1\nline2\ntarget_here\nline4\nline5\ntarget_here\nline7\n"; + try explorer.indexFile("test.zig", content); + + const results = try explorer.searchContent("target_here", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + try testing.expectEqual(@as(usize, 2), results.len); + try testing.expectEqual(@as(u32, 3), results[0].line_num); + try testing.expectEqual(@as(u32, 6), results[1].line_num); +} + + +test "issue-290: searchContent with hyphen query does not crash" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("a.zig", "const x = \"test-case\";\n"); + const results = try explorer.searchContent("test-case", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } +} + + +test "issue-292: searchContent with pipe query does not crash" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("a.zig", "const x = \"timestamp|activity|filter\";\n"); + const results = try explorer.searchContent("timestamp|activity|filter", testing.allocator, 5); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } +} + + +test "issue-292: codedb_search guidance hints regex=true on metachar query" { + const args_json = "{\"query\":\"timestamp|activity|filter\"}"; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(testing.allocator); + mcp_mod.mcpGenerateGuidance(testing.allocator, "codedb_search", &parsed.value.object, "", false, &buf); + try testing.expect(std.mem.indexOf(u8, buf.items, "regex=true") != null); +} + + +test "issue-292: codedb_search guidance does not warn when regex=true is set" { + const args_json = "{\"query\":\"timestamp|activity\",\"regex\":true}"; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(testing.allocator); + mcp_mod.mcpGenerateGuidance(testing.allocator, "codedb_search", &parsed.value.object, "", false, &buf); + try testing.expect(std.mem.indexOf(u8, buf.items, "regex=true") == null); +} + + +test "issue-290: codedb_search guidance does not warn on plain hyphen" { + const args_json = "{\"query\":\"test-case\"}"; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(testing.allocator); + mcp_mod.mcpGenerateGuidance(testing.allocator, "codedb_search", &parsed.value.object, "", false, &buf); + try testing.expect(std.mem.indexOf(u8, buf.items, "regex=true") == null); +} + + +test "issue-363b: fuzzyFindFiles ranks exact basename match above unrelated lib.rs" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + // Reproducer from #363: indexing the codegraff workspace, querying 'cli.rs' + // returned four `lib.rs` files before the actual `crates/forge_main/src/cli.rs`. + // Path layout matches the user's report. + try explorer.indexFile("crates/forge_ci/src/lib.rs", "pub fn ci() {}\n"); + try explorer.indexFile("crates/forge_fs/src/lib.rs", "pub fn fs() {}\n"); + try explorer.indexFile("crates/forge_app/src/lib.rs", "pub fn app_lib() {}\n"); + try explorer.indexFile("crates/forge_api/src/lib.rs", "pub fn api() {}\n"); + try explorer.indexFile( + "crates/forge_main/src/cli.rs", + "pub fn parse_args() -> Args {\n Args {}\n}\n", + ); + + const matches = try explorer.fuzzyFindFiles("cli.rs", testing.allocator, 5); + defer testing.allocator.free(matches); + + try testing.expect(matches.len > 0); + // Exact-basename match should be #1, not buried below unrelated lib.rs files. + try testing.expectEqualStrings("crates/forge_main/src/cli.rs", matches[0].path); +} + + +test "issue-363a: searchContent surfaces source-file matches even when doc files dominate the word index" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + // To hit Tier 0 of searchContent (explore.zig:1511-1535) the gate + // `word_hits.len <= max_results * 2` must hold. We pick small numbers: + // 4 docs × 4 mentions = 16 hits, then 2 source-file hits = 18 total, with + // max_results=10 → 18 ≤ 20 ✓ → Tier 0 runs. + var path_buf: [64]u8 = undefined; + var content_buf: [1024]u8 = undefined; + var i: usize = 0; + while (i < 4) : (i += 1) { + const path = try std.fmt.bufPrint(&path_buf, "docs/notes_{d}.md", .{i}); + const content = try std.fmt.bufPrint(&content_buf, + "## Notes {d}\n\n" ++ + "The searchContent function is documented here.\n" ++ + "We discuss searchContent at length.\n" ++ + "Note that searchContent is multi-tier.\n" ++ + "Performance: searchContent is fast.\n", + .{i}, + ); + try explorer.indexFile(path, content); + } + + // Index the source file LAST so its word-index hits land at the END of + // the posting list. Pre-fix, Tier 0 fills the result_list with doc hits + // and returns before reaching source-file hits. + try explorer.indexFile( + "src/explore.zig", + "pub fn searchContent(self: *Explorer, query: []const u8) !void {\n" ++ + " // searchContent is the multi-tier text search entrypoint.\n" ++ + " _ = self;\n" ++ + " _ = query;\n" ++ + "}\n", + ); + + const results = try explorer.searchContent("searchContent", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + var found_source = false; + for (results) |r| { + if (std.mem.eql(u8, r.path, "src/explore.zig")) { + found_source = true; + break; + } + } + // The source file MUST appear — it's the canonical match for the + // identifier. Pre-fix, doc-file hits saturated the 10-result quota in + // Tier 0 and src/explore.zig was dropped. + try testing.expect(found_source); +} + + +test "issue-recall: codedb_search supports path_glob filter" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "received keys foo\n"); + try explorer.indexFile("CHANGELOG.md", "received keys diagnostic\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"query":"received keys","path_glob":"*.zig"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_search, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "src/main.zig") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "CHANGELOG.md") == null); +} + + +test "issue-422: search header count must reflect post-filter visible results" { + // From the issue: a query whose ONLY match would be displayed instead + // shows `1 results` then `(0 shown, 1 truncated)` — every match hidden + // behind a misleading header. Root cause: the header reports the + // unfiltered `results.len` from the explorer, but path_glob/compact + // filters can drop items before they reach the renderer, so a "result" + // that was filtered is mis-labeled as "truncated". + // + // Repro shape mirrors the reporter's call: scope=true, compact=true, + // path_glob limited to a subtree. The match ITSELF is in-glob and not a + // comment — the bug is purely in the bookkeeping. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + // Two files: one in the path_glob subtree (the real match), one outside + // it (a decoy that the explorer would also return for the substring). + // Without the fix the header counts both, then the renderer drops the + // out-of-glob one and (because of unrelated bookkeeping) reports the + // in-glob one as "truncated" too. + try explorer.indexFile( + "crates/forge_api/src/forge_api.rs", + "// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\n// header\npub struct ForgeAPI {\n", + ); + // Decoy match outside the glob — explorer will return it, the renderer + // must NOT count it toward "truncated". + try explorer.indexFile("docs/forge_api.md", "struct ForgeAPI is documented here\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"query":"struct ForgeAPI","max_results":20,"scope":true,"compact":true,"regex":false,"path_glob":"crates/**/*.rs"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_search, &parsed.value.object, &out, &store, &explorer, &agents); + + // The actionable hit must be visible (path + line number). + try testing.expect(std.mem.indexOf(u8, out.items, "crates/forge_api/src/forge_api.rs") != null); + try testing.expect(std.mem.indexOf(u8, out.items, ":24:") != null); + // Out-of-glob decoy must be excluded from the rendered output. + try testing.expect(std.mem.indexOf(u8, out.items, "docs/forge_api.md") == null); + // The misleading "(N shown, M truncated)" footer must NOT fire when M + // is just the count of glob-filtered or compact-filtered items. Those + // weren't truncated — they were filtered out, and saying "truncated" + // implies the user could recover them by raising max_results. + try testing.expect(std.mem.indexOf(u8, out.items, " truncated)") == null); + // Header count must reflect post-filter visible matches (1), not the + // raw explorer count (2). Otherwise users see a misleading "2 results" + // when only 1 matched their glob. + try testing.expect(std.mem.indexOf(u8, out.items, "1 results for 'struct ForgeAPI'") != null); +} + + +test "issue-390: codedb_search scope=true caps matches per file" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + // Build a "dominant" file with 20 matches plus several files with 1 match + // each. Without a per-file cap on the scope=true path, the dominant file + // alone drowns the response. The plain/regex branches already enforce + // max_per_file=5 (mcp.zig:1141, 1198), but the scope=true branch does not. + var dominant_buf: std.ArrayList(u8) = .empty; + defer dominant_buf.deinit(testing.allocator); + try dominant_buf.appendSlice(testing.allocator, "pub fn dominant() void {\n"); + for (0..20) |_| try dominant_buf.appendSlice(testing.allocator, " // FROBNICATE token\n"); + try dominant_buf.appendSlice(testing.allocator, "}\n"); + try explorer.indexFile("src/dominant.zig", dominant_buf.items); + try explorer.indexFile("src/a.zig", "// FROBNICATE here\npub fn a() void {}\n"); + try explorer.indexFile("src/b.zig", "// FROBNICATE here\npub fn b() void {}\n"); + try explorer.indexFile("src/c.zig", "// FROBNICATE here\npub fn c() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{"query":"FROBNICATE","scope":true,"max_results":100} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_search, &parsed.value.object, &out, &store, &explorer, &agents); + + // Count "src/dominant.zig:" occurrences (one per emitted match line). + var dominant_lines: usize = 0; + var i: usize = 0; + while (std.mem.indexOfPos(u8, out.items, i, "src/dominant.zig:")) |pos| { + dominant_lines += 1; + i = pos + 1; + } + // The plain-search per-file cap is 5; scope=true should match. Without + // any cap, all 20 matches surface and starve the smaller files. + try testing.expect(dominant_lines <= 5); + // The other files still surface — the cap shouldn't tank recall, just + // bound the dominant file's share. + try testing.expect(std.mem.indexOf(u8, out.items, "src/a.zig:") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "src/b.zig:") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "src/c.zig:") != null); +} + + +test "issue-391: codedb_callers tool exists" { + // codedb_callers is the proposed reverse-callgraph tool: given a symbol + // name, return the call sites across the index. It fuses the existing + // word index with outline scopes, replacing the multi-step + // "codedb_word → eyeball → codedb_outline per file" workflow. + // + // The minimum surface contract: the Tool enum exposes a codedb_callers + // variant so dispatch can route to it. Today it does not, so the + // workflow has to be assembled by hand on the client side. + try testing.expect(@hasField(mcp_mod.Tool, "codedb_callers")); +} + + +test "issue-391: codedb_callers returns call sites with scope" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + try explorer.indexFile("def.zig", "pub fn fooBar() void {}\n"); + try explorer.indexFile("a.zig", "pub fn callerA() void {\n fooBar();\n}\n"); + try explorer.indexFile("b.zig", "pub fn callerB() void {\n fooBar();\n}\n"); + + const args_json = + \\{"name":"fooBar"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_callers, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.indexOf(u8, out.items, "2 call sites for 'fooBar'") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "a.zig:2") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "b.zig:2") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "callerA") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "callerB") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "def.zig:1") == null); +} + + +test "issue-391: codedb_callers rejects missing name" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const args_json = + \\{} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_callers, &parsed.value.object, &out, &store, &explorer, &agents); + + try testing.expect(std.mem.startsWith(u8, out.items, "error:")); + try testing.expect(std.mem.indexOf(u8, out.items, "name") != null); +} + + +test "issue-393: BM25 ranking surfaces high-density file before single-mention file" { + // Multi-term content queries today return matches in scan order with only + // a per-line occurrence count tiebreaker (explore.zig:1674-1688). On a + // large repo this dumps every match with no notion of which *file* is the + // most relevant — a file that mentions every query term many times ranks + // identically to one that mentions a single term once. + // + // BM25 over the existing trigram + word index would score documents by + // (per-term tf * idf) with length normalization, so the file densely + // covering both terms surfaces above the noise file. + // + // Minimum surface contract: Explorer exposes `searchContentRanked` which + // takes a multi-term query and returns results ordered by descending + // BM25 score across files (highest-scoring document's match comes first). + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + // dense.zig: hits both query terms many times across many lines. + try explorer.indexFile("src/dense.zig", + \\pub fn parseTokenStream() void { + \\ const token = nextToken(); + \\ parseToken(token); + \\ parseToken(token); + \\ parseToken(token); + \\ const stream = parseTokenStream(); + \\ parseTokenStream(); + \\ _ = token; + \\ _ = stream; + \\} + ); + // sparse.zig: mentions one term once, in passing. + try explorer.indexFile("src/sparse.zig", + \\pub fn unrelated() void { + \\ // a passing mention of parse here + \\ return; + \\} + ); + // Noise files dilute df-based scoring; BM25 must still rank dense first. + try explorer.indexFile("src/noise_a.zig", "pub fn a() void {}\n"); + try explorer.indexFile("src/noise_b.zig", "pub fn b() void {}\n"); + try explorer.indexFile("src/noise_c.zig", "pub fn c() void {}\n"); + + try testing.expect(@hasDecl(Explorer, "searchContentRanked")); + + const results = try explorer.searchContentRanked("parse Token", testing.allocator, 16); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + try testing.expect(results.len > 0); + // Top-ranked result must come from the dense file. + try testing.expectEqualStrings("src/dense.zig", results[0].path); + // Score must be populated and strictly positive when ranking is on. + try testing.expect(results[0].score > 0.0); + // Results must be sorted by score descending across distinct documents: + // the first dense.zig score must exceed the first sparse.zig score. + var dense_score: f32 = -1.0; + var sparse_score: f32 = -1.0; + for (results) |r| { + if (dense_score < 0 and std.mem.eql(u8, r.path, "src/dense.zig")) dense_score = r.score; + if (sparse_score < 0 and std.mem.eql(u8, r.path, "src/sparse.zig")) sparse_score = r.score; + } + if (sparse_score >= 0) { + try testing.expect(dense_score > sparse_score); + } +} + + +test "issue-400: BM25 ranks both-terms file above single-term files" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("both.zig", + \\pub fn parseToken() void { + \\ parseToken(); + \\ parseToken(); + \\} + ); + try explorer.indexFile("only_parse.zig", + \\pub fn parseFoo() void { + \\ parse(); + \\} + ); + try explorer.indexFile("only_token.zig", + \\pub fn tokenStream() void { + \\ token(); + \\} + ); + + const results = try explorer.searchContentRanked("parse Token", testing.allocator, 8); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + try testing.expect(results.len > 0); + try testing.expectEqualStrings("both.zig", results[0].path); + try testing.expect(results[0].score > 0.0); +} + + +test "issue-400-bug1: searchContentRanked returns ranked results when skip_file_words=true" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + explorer.word_index.skip_file_words = true; + try explorer.indexFile("a.zig", "apple banana\n"); + try explorer.indexFile("b.zig", "apple\n"); + const results = try explorer.searchContentRanked("apple", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + try testing.expect(results.len > 0); +} + + +test "issue-400-bug2: total_tokens stays consistent across re-index when skip_file_words=true" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + explorer.word_index.skip_file_words = true; + try explorer.indexFile("a.zig", "one two three four\n"); + try explorer.indexFile("a.zig", "five six seven\n"); + try explorer.indexFile("a.zig", "eight\n"); + try testing.expectEqual(@as(u64, 1), explorer.word_index.total_tokens); +} + + +test "bm25-recall-a: single-term tf ordering" { + // 3 docs with identical length but "apple" on different numbers of lines. + // The index deduplicates per (doc, line), so tf = number of lines with the term. + // Equal doc lengths mean length normalization is constant; higher tf must rank higher. + // Each doc has exactly 10 tokens (5 lines x 2 tokens each). + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + // doc1: apple on 1 of 5 lines + try explorer.indexFile("doc1.txt", "apple filler\nfiller filler\nfiller filler\nfiller filler\nfiller filler"); + // doc2: apple on 5 of 5 lines (max tf) + try explorer.indexFile("doc2.txt", "apple filler\napple filler\napple filler\napple filler\napple filler"); + // doc3: apple on 2 of 5 lines + try explorer.indexFile("doc3.txt", "apple filler\napple filler\nfiller filler\nfiller filler\nfiller filler"); + + const results = try explorer.searchContentRanked("apple", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + try testing.expectEqual(@as(usize, 3), results.len); + try testing.expectEqualStrings("doc2.txt", results[0].path); + try testing.expectEqualStrings("doc3.txt", results[1].path); + try testing.expectEqualStrings("doc1.txt", results[2].path); + try testing.expect(results[0].score > results[1].score); + try testing.expect(results[1].score > results[2].score); +} + + +test "bm25-recall-b: both-terms doc beats high-tf single-term doc" { + // doc1 has apple+banana (both query terms, one occurrence each). + // doc2 has only apple, but repeated 3x (high tf). + // doc3 has only banana, once. + // BM25 sums idf*tf_norm per term: doc1 accumulates two idf contributions + // while doc2 only gets one -- doc1 must rank first. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("doc1.txt", "apple banana cherry"); + try explorer.indexFile("doc2.txt", "apple apple apple"); + try explorer.indexFile("doc3.txt", "banana date elderberry"); + + const results = try explorer.searchContentRanked("apple banana", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + try testing.expect(results.len >= 2); + try testing.expectEqualStrings("doc1.txt", results[0].path); + try testing.expect(results[0].score > 0.0); + var doc2_score: f32 = -1.0; + for (results) |r| { + if (std.mem.eql(u8, r.path, "doc2.txt")) { + doc2_score = r.score; + break; + } + } + if (doc2_score >= 0.0) { + try testing.expect(results[0].score > doc2_score); + } +} + + +test "bm25-recall-c: df-saturation -- ubiquitous term has near-zero idf" { + // "the" appears in all 11 docs -> idf near zero, barely contributes. + // "unique_marker" appears only in special.txt -> high idf, special.txt ranks first. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("d1.txt", "the quick brown fox"); + try explorer.indexFile("d2.txt", "the lazy dog jumps"); + try explorer.indexFile("d3.txt", "the sun rises east"); + try explorer.indexFile("d4.txt", "the moon shines bright"); + try explorer.indexFile("d5.txt", "the rain in spain"); + try explorer.indexFile("d6.txt", "the cat sat mat"); + try explorer.indexFile("d7.txt", "the wind blows cold"); + try explorer.indexFile("d8.txt", "the tide comes in"); + try explorer.indexFile("d9.txt", "the stars align now"); + try explorer.indexFile("d10.txt", "the clock ticks forward"); + try explorer.indexFile("special.txt", "the unique_marker is here"); + + const results = try explorer.searchContentRanked("the unique_marker", testing.allocator, 20); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + try testing.expect(results.len > 0); + try testing.expectEqualStrings("special.txt", results[0].path); + if (results.len > 1) { + try testing.expect(results[0].score > results[1].score); + } +} + + +test "bm25-recall-d: length normalization favors shorter doc" { + // short.txt: 5 tokens, one "needle". + // long.txt: ~50 tokens, one "needle". + // BM25 with b=0.75 penalizes longer docs; short.txt must rank higher. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("short.txt", "needle alpha beta gamma delta"); + try explorer.indexFile("long.txt", + "aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx yy zz " ++ + "aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx needle yy zz" + ); + + const results = try explorer.searchContentRanked("needle", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + try testing.expectEqual(@as(usize, 2), results.len); + try testing.expectEqualStrings("short.txt", results[0].path); + try testing.expect(results[0].score > results[1].score); +} + + +test "bm25-recall-e: empty and pathological queries return empty without crash" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("file.txt", "some content here"); + + { + const r = try explorer.searchContentRanked("", testing.allocator, 10); + defer testing.allocator.free(r); + try testing.expectEqual(@as(usize, 0), r.len); + } + { + const r = try explorer.searchContentRanked(" ", testing.allocator, 10); + defer testing.allocator.free(r); + try testing.expectEqual(@as(usize, 0), r.len); + } + { + const r = try explorer.searchContentRanked("nonexistent_xyz_term_99", testing.allocator, 10); + defer testing.allocator.free(r); + try testing.expectEqual(@as(usize, 0), r.len); + } +} + + +test "bm25-stress: 1000-doc index, common token, max_results cap honored" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + var path_buf: [64]u8 = undefined; + var content_buf: [256]u8 = undefined; + for (0..1000) |i| { + const path = std.fmt.bufPrint(&path_buf, "stress/doc{d}.txt", .{i}) catch unreachable; + const content = std.fmt.bufPrint(&content_buf, + "common token alpha beta gamma doc{d} extra filler words here now", .{i} + ) catch unreachable; + try explorer.indexFile(path, content); + } + + const cap = 25; + const results = try explorer.searchContentRanked("common", testing.allocator, cap); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + try testing.expect(results.len <= cap); + try testing.expect(results.len > 0); + for (results) |r| { + try testing.expect(r.score > 0.0); + } + for (1..results.len) |i| { + try testing.expect(results[i - 1].score >= results[i].score); + } +} + + +test "bm25-state-sync: re-index and remove update total_tokens correctly" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + + try explorer.indexFile("sync.txt", "alpha beta gamma delta epsilon"); + try testing.expectEqual(@as(u64, 5), explorer.word_index.total_tokens); + + try explorer.indexFile("sync.txt", "alpha beta"); + try testing.expectEqual(@as(u64, 2), explorer.word_index.total_tokens); + + explorer.removeFile("sync.txt"); + try testing.expectEqual(@as(u64, 0), explorer.word_index.total_tokens); +} + + +test "issue-425: codedb_callers excludes substring matches in unrelated identifiers" { + // handleCallers (mcp.zig:1339) currently calls searchContentWithScope(name) + // which is a *substring* full-text search. The only de-dup it performs is + // dropping lines that match the canonical definition of `name` itself. + // That means a search for "fooBar" returns lines mentioning the unrelated + // identifier "fooBarExtended" — both its definition site and any reference + // — as if they were call sites. The fix is a whole-word check on the hit + // line so substring matches in longer identifiers are excluded. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + try explorer.indexFile("def.zig", "pub fn fooBar() void {}\n"); + // A different symbol whose name contains "fooBar" as a substring. + try explorer.indexFile("other.zig", "pub fn fooBarExtended() void {}\n"); + // A genuine call site. + try explorer.indexFile("a.zig", "pub fn callerA() void {\n fooBar();\n}\n"); + + const args_json = + \\{"name":"fooBar"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_callers, &parsed.value.object, &out, &store, &explorer, &agents); + + // Real call site must still appear. + try testing.expect(std.mem.indexOf(u8, out.items, "a.zig:2") != null); + // Substring-only matches in unrelated identifiers must NOT. + try testing.expect(std.mem.indexOf(u8, out.items, "other.zig") == null); + try testing.expect(std.mem.indexOf(u8, out.items, "fooBarExtended") == null); + // Header reports the real count (1), not the inflated count (2). + try testing.expect(std.mem.indexOf(u8, out.items, "1 call sites for 'fooBar'") != null); +} + + +test "issue-426: codedb_callers excludes non-code files (markdown, docs)" { + // handleCallers (mcp.zig:1339) feeds searchContentWithScope across every + // indexed file regardless of language. Markdown and other documentation + // files that mention the symbol in prose surface as if they were call + // sites. The fix is a language gate: skip results from non-code files. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + try explorer.indexFile("def.zig", "pub fn fooBar() void {}\n"); + try explorer.indexFile("a.zig", "pub fn callerA() void {\n fooBar();\n}\n"); + // Prose mention in a docs file — the identifier appears as a whole + // word, so this is independent of the substring-match bug (#425): + // even a perfect whole-word match on a markdown file is still not a + // call site. + try explorer.indexFile( + "docs/notes.md", + "# Notes\n\nThe fooBar helper is documented here for posterity.\n", + ); + + const args_json = + \\{"name":"fooBar"} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_callers, &parsed.value.object, &out, &store, &explorer, &agents); + + // Real call site present. + try testing.expect(std.mem.indexOf(u8, out.items, "a.zig:2") != null); + // Markdown mention must NOT appear as a call site. + try testing.expect(std.mem.indexOf(u8, out.items, "docs/notes.md") == null); + // Header reflects the real count. + try testing.expect(std.mem.indexOf(u8, out.items, "1 call sites for 'fooBar'") != null); +} + + +test "issue-427: searchContent Tier 1 sort starves the definition-dense file" { + // searchContent's Tier 1 (explore.zig:1590-1598) sorts trigram candidates + // by file content length ASCENDING and then applies a per-file cap of + // max(1, max_results / estimated_total). When several small unrelated + // files match the query, they each contribute one hit and saturate the + // result quota before the canonical (large, definition-dense) file is + // ever scanned — so the file with the most occurrences of the term is + // missing from the output. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + // 8 small files. Each contains one occurrence of the term as a whole + // word. They sort first under the length-ascending Tier 1 order. + const small_count: usize = 8; + var i: usize = 0; + while (i < small_count) : (i += 1) { + var path_buf: [32]u8 = undefined; + const path = try std.fmt.bufPrint(&path_buf, "small_{d}.zig", .{i}); + try explorer.indexFile(path, "fn s() void { _ = widgetX; }\n"); + } + + // Canonical file: many lines mentioning widgetX, padded so its content + // length is larger than every small file (sort key: content length). + const canonical_content = + "fn canonical() void {\n" ++ + " _ = widgetX;\n" ++ + " _ = widgetX;\n" ++ + " _ = widgetX;\n" ++ + " _ = widgetX;\n" ++ + " // padding line for content length, to push this file to the\n" ++ + " // tail of the length-ascending sort. The reranker should still\n" ++ + " // surface it because it has the most occurrences of the term.\n" ++ + " _ = 0;\n" ++ + "}\n"; + try explorer.indexFile("canonical.zig", canonical_content); + + // max_results small enough that 8 small files can saturate the quota. + // word_hits.len = small_count (8) + canonical occurrences (4) = 12. + // max_results * 2 = 10. 12 > 10 → Tier 0 gate fails → Tier 1 fires. + const results = try explorer.searchContent("widgetX", testing.allocator, 5); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + // The canonical file MUST appear in the result set. Pre-fix it does not: + // small files fill all 5 slots first under length-asc order, and the + // early-return at result_list.len >= max_results returns before the + // canonical file is ever read. + var found_canonical = false; + for (results) |r| { + if (std.mem.eql(u8, r.path, "canonical.zig")) { + found_canonical = true; + break; + } + } + try testing.expect(found_canonical); +} + + +test "issue-429-a: searchContent rerank boosts files whose basename matches the query" { + // Two files, same hit count, same content length. The current rerank + // (explore.zig:1700-1712) sorts ties by path-asc, so a file named + // "unrelated.zig" outranks "widgetX.zig" even though the latter's + // basename matches the query exactly. The basename match is a strong + // intent signal — the developer is asking about that file's subject. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("src/unrelated.zig", "pub fn process() void { _ = widgetX; }\n"); + try explorer.indexFile("src/widgetX.zig", "pub fn process() void { _ = widgetX; }\n"); + + const results = try explorer.searchContent("widgetX", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len >= 2); + try testing.expectEqualStrings("src/widgetX.zig", results[0].path); +} + + +test "issue-429-b: searchContent rerank penalizes test/vendor/examples paths" { + // Two files, same hit count, same content. Pre-fix the path-asc + // tiebreaker promotes "examples/sample.zig" (e < s) above + // "src/sample.zig". Post-fix path priors push code roots above + // example/test/vendor directories so the source-of-truth lands first. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("examples/sample.zig", "pub fn x() void { _ = someTerm; }\n"); + try explorer.indexFile("src/sample.zig", "pub fn x() void { _ = someTerm; }\n"); + + const results = try explorer.searchContent("someTerm", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len >= 2); + try testing.expectEqualStrings("src/sample.zig", results[0].path); +} + + +test "issue-429-c: searchContent rerank boosts lines that are symbol definitions" { + // Two files. "aaa.zig" has a passing comment mention of `fooSym`. The + // alphabetically-later "zzz_def.zig" has the actual definition. Both + // tie on per-line occurrence count. Pre-fix the path-asc tiebreaker + // promotes the comment mention ("aaa" < "zzz"). Post-fix the rerank + // recognises that the line in zzz_def.zig is a symbol definition and + // ranks it first. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("aaa.zig", "// fooSym is referenced here in a comment\n"); + try explorer.indexFile("zzz_def.zig", "pub fn fooSym() void {}\n"); + + const results = try explorer.searchContent("fooSym", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len >= 2); + try testing.expectEqualStrings("zzz_def.zig", results[0].path); +} + + +test "issue-430: Tier 0 markdown dominance starves canonical source file" { + // Tier 0 of searchContent (explore.zig:1525-1554) iterates the word + // index posting list in insertion order with a per-file cap of + // max(1, max_results/5). When a handful of markdown documents + // (CHANGELOG.md, benchmarks/*.md, design docs) each mention the query + // many times AND happen to appear earlier in the posting list than the + // canonical source file, they saturate result_list before the source + // file is reached. The existing #363a fix asserted *presence* with a + // small corpus; this is the high-density regime where presence still + // fails because Tier 0 hits max_results before the source file's + // posting-list entries are processed. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + // 5 markdown files each with 10 mentions of fooBar — indexed FIRST so + // they land at the head of the posting list. With max_results=50 and + // per-file cap=10, these 5 files alone fill all 50 slots. + const md_block = "fooBar mentioned here.\nfooBar mentioned here.\n" ++ + "fooBar mentioned here.\nfooBar mentioned here.\n" ++ + "fooBar mentioned here.\nfooBar mentioned here.\n" ++ + "fooBar mentioned here.\nfooBar mentioned here.\n" ++ + "fooBar mentioned here.\nfooBar mentioned here.\n"; + var i: usize = 0; + while (i < 5) : (i += 1) { + var path_buf: [64]u8 = undefined; + const path = try std.fmt.bufPrint(&path_buf, "docs/notes_{d}.md", .{i}); + try explorer.indexFile(path, md_block); + } + + // Source file with the canonical definition + several real call sites, + // indexed LAST so its posting-list entries come after the markdown noise. + try explorer.indexFile("src/foo.zig", + "pub fn fooBar() void {}\n" ++ + "pub fn caller1() void { fooBar(); }\n" ++ + "pub fn caller2() void { fooBar(); }\n" ++ + "pub fn caller3() void { fooBar(); }\n"); + + const results = try explorer.searchContent("fooBar", testing.allocator, 50); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + var found_source = false; + for (results) |r| { + if (std.mem.eql(u8, r.path, "src/foo.zig")) { + found_source = true; + break; + } + } + // The canonical source file MUST appear in the results. Pre-fix it does + // not: 5 markdown files × 10 hits = 50 entries fill result_list before + // the source file is reached, then Tier 0 returns at max_results. + try testing.expect(found_source); +} + + +test "issue-431: searchContent does not crash when query is longer than content" { + // searchInContent (explore.zig:3881) computes + // const end = content.len - query.len + 1; + // without checking that query.len <= content.len. When the query is + // longer than the file content, the subtraction underflows in usize + // and the binary panics with integer overflow (or aborts with SIGBUS + // in ReleaseFast). Reproducer: index a tiny file, search for a query + // longer than the file's content. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("a.zig", "fn x() void {}\n"); + + var q_buf: [256]u8 = undefined; + @memset(&q_buf, 'a'); + const q = q_buf[0..256]; + + const results = try explorer.searchContent(q, testing.allocator, 5); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len == 0); +} + + +test "issue-429-d: searchContent rerank boosts path-segment match" { + // Two files, same hit count, same content. The query "parser" appears + // as a directory segment of one path. Pre-fix the alphabetic tiebreak + // promotes "src/handlers/foo.zig" (h < p). Post-fix the path-segment + // match boost surfaces "src/parser/foo.zig" first. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("src/handlers/foo.zig", "// parser is mentioned here\n"); + try explorer.indexFile("src/parser/foo.zig", "// parser is mentioned here\n"); + + const results = try explorer.searchContent("parser", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len >= 2); + try testing.expectEqualStrings("src/parser/foo.zig", results[0].path); +} + + +test "issue-429-e: searchContent rerank penalises doc-language files so code beats markdown noise" { + // CHANGELOG.md and benchmark docs often mention an identifier many times + // in a single line, which under per-line frequency outscores any single + // code call site. The reranker now halves doc-language scores so a code + // call site with one occurrence still wins. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + // Doc file with the identifier mentioned four times on one line — + // pre-fix this scores 4 on per-line frequency. + try explorer.indexFile( + "CHANGELOG.md", + "# Changelog\n\nfooBar — fooBar fooBar fooBar in the changelog.\n", + ); + // Code call site with the identifier mentioned once. + try explorer.indexFile( + "src/caller.zig", + "pub fn caller() void {\n fooBar();\n}\n", + ); + + const results = try explorer.searchContent("fooBar", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len >= 2); + try testing.expectEqualStrings("src/caller.zig", results[0].path); +} + + +test "issue-448-a: rerank boosts basename when query contains stem" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("src/aaa.zig", "// Explorer is mentioned here\n"); + try explorer.indexFile("src/explore.zig", "// Explorer is mentioned here\n"); + + const results = try explorer.searchContent("Explorer", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + try testing.expect(results.len >= 2); + try testing.expectEqualStrings("src/explore.zig", results[0].path); +} + + +test "issue-448-b: rerank symbol definition boost is case-insensitive" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("aaa.zig", "// store is mentioned here\n"); + try explorer.indexFile("zzz.zig", "pub const Store = struct {};\n"); + + const results = try explorer.searchContent("store", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + try testing.expect(results.len >= 2); + try testing.expectEqualStrings("zzz.zig", results[0].path); +} + + +test "issue-449: popular markdown should not disable Tier 0 code-first behavior" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + const md_block = + "fooBar mentioned here.\n" ++ + "fooBar mentioned here.\n" ++ + "fooBar mentioned here.\n" ++ + "fooBar mentioned here.\n" ++ + "fooBar mentioned here.\n"; + + var i: usize = 0; + while (i < 10) : (i += 1) { + var path_buf: [64]u8 = undefined; + const path = try std.fmt.bufPrint(&path_buf, "docs/notes_{d}.md", .{i}); + try explorer.indexFile(path, md_block); + } + + try explorer.indexFile("src/foo.zig", + "pub fn fooBar() void {}\n" ++ + "pub fn caller1() void { fooBar(); }\n" ++ + "pub fn caller2() void { fooBar(); }\n" ++ + "pub fn caller3() void { fooBar(); }\n"); + + const results = try explorer.searchContent("fooBar", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + var found_source = false; + for (results) |r| { + if (std.mem.eql(u8, r.path, "src/foo.zig")) found_source = true; + } + try testing.expect(found_source); +} + + +test "issue-450: prefix tier respects max_results" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("a.zig", "const abcx = 1;\n"); + try explorer.indexFile("b.zig", "const abcy = 1;\n"); + try explorer.indexFile("c.zig", "const zzabczz = 1;\n"); + + const results = try explorer.searchContent("abc", testing.allocator, 2); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + try testing.expect(results.len <= 2); +} + + +test "rerank-trace: appends one JSON line per searchContent when enabled" { + const tmp_io = testing.io; + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const tmp_path_len = try tmp.dir.realPathFile(tmp_io, ".", &path_buf); + const tmp_path = path_buf[0..tmp_path_len]; + + const trace_path = try std.fmt.allocPrint(testing.allocator, "{s}/rerank-traces.jsonl", .{tmp_path}); + defer testing.allocator.free(trace_path); + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + explorer.io = tmp_io; + explorer.rerank_trace_path = trace_path; + + try explorer.indexFile("src/widgetX.zig", "pub fn process() void { _ = widgetX; }\n"); + try explorer.indexFile("src/unrelated.zig", "pub fn process() void { _ = widgetX; }\n"); + + const results = try explorer.searchContent("widgetX", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len >= 2); + + const f = try std.Io.Dir.cwd().openFile(tmp_io, trace_path, .{}); + defer f.close(tmp_io); + const size = try f.length(tmp_io); + try testing.expect(size > 0); + + const data = try testing.allocator.alloc(u8, @intCast(size)); + defer testing.allocator.free(data); + _ = try f.readPositionalAll(tmp_io, data, 0); + + try testing.expectEqual(@as(u8, '\n'), data[data.len - 1]); + var nl_count: usize = 0; + for (data) |c| if (c == '\n') { + nl_count += 1; + }; + try testing.expectEqual(@as(usize, 1), nl_count); + + try testing.expect(std.mem.indexOf(u8, data, "\"query\":\"widgetX\"") != null); + try testing.expect(std.mem.indexOf(u8, data, "src/widgetX.zig") != null); + try testing.expect(std.mem.indexOf(u8, data, "\"results\":[") != null); +} + + +test "rerank-trace: disabled by default — no file is created" { + const tmp_io = testing.io; + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const tmp_path_len = try tmp.dir.realPathFile(tmp_io, ".", &path_buf); + const tmp_path = path_buf[0..tmp_path_len]; + + const probe_path = try std.fmt.allocPrint(testing.allocator, "{s}/should-not-exist.jsonl", .{tmp_path}); + defer testing.allocator.free(probe_path); + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + explorer.io = tmp_io; + // rerank_trace_path stays null — opt-in only. + + try explorer.indexFile("a.zig", "pub fn t() void { _ = sym; }\n"); + try explorer.indexFile("b.zig", "pub fn t() void { _ = sym; }\n"); + + const results = try explorer.searchContent("sym", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len >= 1); + + const open_err = std.Io.Dir.cwd().openFile(tmp_io, probe_path, .{}); + try testing.expectError(error.FileNotFound, open_err); +} + + +test "rerank-trace: clobbers when file exceeds size limit" { + const tmp_io = testing.io; + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const tmp_path_len = try tmp.dir.realPathFile(tmp_io, ".", &path_buf); + const tmp_path = path_buf[0..tmp_path_len]; + + const trace_path = try std.fmt.allocPrint(testing.allocator, "{s}/big.jsonl", .{tmp_path}); + defer testing.allocator.free(trace_path); + + { + const f = try std.Io.Dir.cwd().createFile(tmp_io, trace_path, .{ .truncate = true }); + defer f.close(tmp_io); + const target_size: u64 = 11 * 1024 * 1024; + var chunk: [4096]u8 = undefined; + @memset(&chunk, 'x'); + var written: u64 = 0; + while (written < target_size) : (written += chunk.len) { + try f.writePositionalAll(tmp_io, &chunk, written); + } + } + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + explorer.io = tmp_io; + explorer.rerank_trace_path = trace_path; + + try explorer.indexFile("a.zig", "pub fn t() void { _ = sym; }\n"); + try explorer.indexFile("b.zig", "pub fn t() void { _ = sym; }\n"); + + const results = try explorer.searchContent("sym", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + + const f = try std.Io.Dir.cwd().openFile(tmp_io, trace_path, .{}); + defer f.close(tmp_io); + const new_size = try f.length(tmp_io); + try testing.expect(new_size > 0); + try testing.expect(new_size < 16 * 1024); +} + + +test "rerank-trace: single-result query records non-zero rerank score" { + // Pre-fix: rerankAndFinalize only scored when items.len > 1, so a + // single-result trace logged score=0.0 — misleading for offline analysis + // because it looked identical to a zero-confidence match. The fix runs + // scoring unconditionally and only sorts when there's more than one item. + const tmp_io = testing.io; + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const tmp_path_len = try tmp.dir.realPathFile(tmp_io, ".", &path_buf); + const tmp_path = path_buf[0..tmp_path_len]; + + const trace_path = try std.fmt.allocPrint(testing.allocator, "{s}/single.jsonl", .{tmp_path}); + defer testing.allocator.free(trace_path); + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + explorer.io = tmp_io; + explorer.rerank_trace_path = trace_path; + + // Only one file mentions the query — guarantees results.len == 1. + try explorer.indexFile("src/loneSym.zig", "pub fn loneSym() void {}\n"); + try explorer.indexFile("src/other.zig", "pub fn unrelated() void {}\n"); + + const results = try explorer.searchContent("loneSym", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expectEqual(@as(usize, 1), results.len); + // Symbol-def boost (+5) + basename-substring boost (+8) + per-line freq + // means score is well above zero — verifies scoring actually ran. + try testing.expect(results[0].score > 1.0); + + const f = try std.Io.Dir.cwd().openFile(tmp_io, trace_path, .{}); + defer f.close(tmp_io); + const size = try f.length(tmp_io); + const data = try testing.allocator.alloc(u8, @intCast(size)); + defer testing.allocator.free(data); + _ = try f.readPositionalAll(tmp_io, data, 0); + + try testing.expect(std.mem.indexOf(u8, data, "\"score\":0.0000") == null); + try testing.expect(std.mem.indexOf(u8, data, "src/loneSym.zig") != null); +} + + +test "issue-negq: negative-query search short-circuits Tier 5 full scan" { + // When a query contains trigrams that no indexed file contains (a + // definitively-negative query), searchContent should return [] without + // running the Tier 5 full-scan fallback. On the buggy path Tier 5 fires + // anyway, scanning every outline — measurable as 100ms+ p50 on real + // codebases (see benchmarks/search-shootout, react corpus). + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + // Index enough files that Tier 5 would be observably wasteful if it ran. + var i: usize = 0; + while (i < 50) : (i += 1) { + var buf: [32]u8 = undefined; + const path = try std.fmt.bufPrint(&buf, "file_{d}.zig", .{i}); + try explorer.indexFile(path, "fn process() void { _ = thing; }\n"); + } + + // 'zzqqxxnopematch' — trigrams 'zzq','zqq','qqx',... none of which appear + // in any indexed file. The trigram index can definitively rule this out + // without any content scan. + const results = try explorer.searchContent("zzqqxxnopematch", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + try testing.expectEqual(@as(usize, 0), results.len); + // The fix: Tier 5 must NOT fire when the trigram index has already + // ruled out a match. On main this expectation fails (count == 1). + try testing.expectEqual(@as(u64, 0), explorer.search_tier5_count); +} + + +test "issue-471a: codedb_find accepts query/name/path/pattern/q aliases" { + // Real-user telemetry (24h) showed 71% of codedb_find calls failing with + // "missing 'query'" because agents passed the search term under `name`, + // `path`, `pattern`, or `q` (misled by the "FILE-NAME search" framing in + // the tool description). Regression: every common alias must succeed. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + try explorer.indexFile("src/auth_middleware.go", "package auth\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const aliases = [_][]const u8{ "query", "name", "path", "pattern", "q" }; + for (aliases) |key| { + const bundle_json = try std.fmt.allocPrint( + testing.allocator, + "{{\"ops\":[{{\"tool\":\"codedb_find\",\"arguments\":{{\"{s}\":\"main\"}}}}]}}", + .{key}, + ); + defer testing.allocator.free(bundle_json); + + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, bundle_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed.value.object, &out, &store, &explorer, &agents); + + // Every alias must succeed: no "missing" error, and the matching + // file must appear in the response. + if (std.mem.indexOf(u8, out.items, "missing 'query'") != null) { + std.debug.print("alias '{s}' failed with: {s}\n", .{ key, out.items }); + return error.AliasRejected; + } + try testing.expect(std.mem.indexOf(u8, out.items, "main.zig") != null); + } +} + + +test "issue-471b: codedb_find error message enumerates accepted aliases" { + // If an agent calls codedb_find with no recognized key, the error message + // must enumerate the accepted aliases so the agent can self-correct on + // the next call instead of repeating the same broken call. + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/main.zig", "pub fn main() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const bundle_json = + \\{"ops":[{"tool":"codedb_find","arguments":{"bogus":"main"}}]} + ; + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, bundle_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_bundle, &parsed.value.object, &out, &store, &explorer, &agents); + + // Error must enumerate the alias list so the agent can self-correct. + try testing.expect(std.mem.indexOf(u8, out.items, "missing 'query'") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "name") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "path") != null); + try testing.expect(std.mem.indexOf(u8, out.items, "pattern") != null); +} + diff --git a/src/test_snapshot.zig b/src/test_snapshot.zig new file mode 100644 index 0000000..e12b22a --- /dev/null +++ b/src/test_snapshot.zig @@ -0,0 +1,714 @@ +const std = @import("std"); +const cio = @import("cio.zig"); +const testing = std.testing; +const io = std.testing.io; +const Store = @import("store.zig").Store; +const Explorer = @import("explore.zig").Explorer; +const explore = @import("explore.zig"); +const Language = explore.Language; +const SymbolKind = explore.SymbolKind; +const WordIndex = @import("index.zig").WordIndex; +const TrigramIndex = @import("index.zig").TrigramIndex; +const SparseNgramIndex = @import("index.zig").SparseNgramIndex; +const snapshot_mod = @import("snapshot.zig"); +const snapshot_json = @import("snapshot_json.zig"); +const watcher = @import("watcher.zig"); +const git_mod = @import("git.zig"); +const AgentRegistry = @import("agent.zig").AgentRegistry; +const edit_mod = @import("edit.zig"); + + +test "issue-35: edits immediately update explorer and snapshot output" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + const rel_path = try std.fmt.allocPrint(testing.allocator, ".zig-cache/tmp/{s}/edit-live-sync.zig", .{tmp.sub_path}); + defer testing.allocator.free(rel_path); + + var file = try tmp.dir.createFile(io, "edit-live-sync.zig", .{}); + defer file.close(io); + try file.writeStreamingAll(io, "pub fn oldName() void {}\n"); + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile(rel_path, "pub fn oldName() void {}\n"); + + var store = Store.init(testing.allocator); + defer store.deinit(); + _ = try store.recordSnapshot(rel_path, "pub fn oldName() void {}\n".len, std.hash.Wyhash.hash(0, "pub fn oldName() void {}\n")); + + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + const agent_id = try agents.register("issue-35-agent"); + + const before_snap = try snapshot_json.buildSnapshot(&explorer, &store, testing.allocator); + defer testing.allocator.free(before_snap); + try testing.expect(std.mem.indexOf(u8, before_snap, "oldName") != null); + + _ = try edit_mod.applyEdit(io, testing.allocator, &store, &agents, &explorer, .{ + .path = rel_path, + .agent_id = agent_id, + .op = .replace, + .range = .{ 1, 1 }, + .content = "pub fn newName() void {}", + }); + + const new_results = try explorer.searchContent("newName", testing.allocator, 10); + defer { + for (new_results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(new_results); + } + try testing.expect(new_results.len == 1); + + const old_results = try explorer.searchContent("oldName", testing.allocator, 10); + defer { + for (old_results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(old_results); + } + try testing.expect(old_results.len == 0); + + const after_snap = try snapshot_json.buildSnapshot(&explorer, &store, testing.allocator); + defer testing.allocator.free(after_snap); + try testing.expect(std.mem.indexOf(u8, after_snap, "newName") != null); + try testing.expect(std.mem.indexOf(u8, after_snap, "oldName") == null); +} + + +test "snapshot_json: snapshot builds and is valid JSON" { + // Explorer uses arena for internal data + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var explorer = Explorer.init(alloc, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try explorer.indexFile("src/main.zig", "pub fn main() void {}"); + try explorer.indexFile("src/lib.zig", "pub const version = 1;"); + + var store = @import("store.zig").Store.init(alloc); + defer store.deinit(); + _ = try store.recordSnapshot("src/main.zig", 100, 0xABC); + + const snap = try snapshot_json.buildSnapshot(&explorer, &store, testing.allocator); + defer testing.allocator.free(snap); + + // Must be valid JSON + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, snap, .{}); + defer parsed.deinit(); + + // Must have expected top-level keys (matches buildSnapshot output) + try testing.expect(parsed.value.object.contains("seq")); + try testing.expect(parsed.value.object.contains("tree")); + try testing.expect(parsed.value.object.contains("outlines")); + try testing.expect(parsed.value.object.contains("symbol_index")); + try testing.expect(parsed.value.object.contains("dep_graph")); + + const tree = parsed.value.object.get("tree").?.string; + try testing.expect(std.mem.indexOf(u8, tree, "src/") != null); + try testing.expect(std.mem.indexOf(u8, tree, "main.zig") != null); + + const symbol_index = parsed.value.object.get("symbol_index").?.object; + try testing.expect(symbol_index.contains("main")); + try testing.expect(symbol_index.contains("version")); +} + + +test "issue-44: snapshot stale after working tree changes cause stale query results" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + const snap_path = try std.fmt.allocPrint(testing.allocator, "{s}/test.snapshot", .{dir_path}); + defer testing.allocator.free(snap_path); + const file_abs = try std.fmt.allocPrint(testing.allocator, "{s}/stale.zig", .{dir_path}); + defer testing.allocator.free(file_abs); + + // Step 1: write file with old content, index it, write snapshot. + try tmp.dir.writeFile(io, .{ .sub_path = "stale.zig", .data = "pub fn oldFunc() void {}" }); + { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try exp.indexFile(file_abs, "pub fn oldFunc() void {}"); + try snapshot_mod.writeSnapshot(io, &exp, ".", snap_path, arena.allocator()); + } + + // Step 2: modify file AFTER snapshot creation (simulating uncommitted working tree change). + // Sleep 10ms so the file mtime is strictly greater than the snapshot's indexed_at timestamp. + cio.sleepMs(10); + try tmp.dir.writeFile(io, .{ .sub_path = "stale.zig", .data = "pub fn newFunc() void {}" }); + + // Step 3: load snapshot into a fresh explorer (what MCP startup does). + // scan_done is set to true immediately; watcher then builds known-FileMap + // from current disk mtimes, recording the already-modified file's mtime as + // the baseline. It will never be re-indexed unless changed a second time. + var arena2 = std.heap.ArenaAllocator.init(testing.allocator); + defer arena2.deinit(); + var exp2 = Explorer.init(arena2.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + var store2 = Store.init(testing.allocator); + defer store2.deinit(); + + const loaded = snapshot_mod.loadSnapshot(io, snap_path, &exp2, &store2, arena2.allocator()); + try testing.expect(loaded); + + // Step 4: after the fix, loadSnapshot should detect that the disk file's + // mtime > snapshot indexed_at and re-index it from disk, making "newFunc" + // visible. Currently no such path exists. + // Expected (after fix): results.len == 1 + // Current (bug): results.len == 0 — stale snapshot content is never evicted. + const results = try exp2.searchContent("newFunc", testing.allocator, 10); + defer { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); + } + try testing.expect(results.len == 1); +} + + +test "issue-46: empty-repo snapshot rejected on load" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + const snap_path = try std.fmt.allocPrint(testing.allocator, "{s}/test.codedb", .{dir_path}); + defer testing.allocator.free(snap_path); + + try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, testing.allocator); + + var arena2 = std.heap.ArenaAllocator.init(testing.allocator); + defer arena2.deinit(); + var exp2 = Explorer.init(arena2.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + var store = Store.init(testing.allocator); + defer store.deinit(); + + const loaded = snapshot_mod.loadSnapshot(io, snap_path, &exp2, &store, testing.allocator); + try testing.expect(!loaded); + try testing.expect(exp2.outlines.count() == 0); +} + + +test "issue-220: snapshot fast load restores outlines and lazily rebuilds word index" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const aa = arena.allocator(); + + var exp = Explorer.init(aa, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try exp.indexFile("src/store.zig", "pub const Store = struct {};\n"); + try exp.indexFile("src/main.zig", "const Store = @import(\"store.zig\").Store;\npub fn main() void {}\n"); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + const snap_path = try std.fmt.allocPrint(testing.allocator, "{s}/fast.codedb", .{dir_path}); + defer testing.allocator.free(snap_path); + try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, testing.allocator); + + var arena2 = std.heap.ArenaAllocator.init(testing.allocator); + defer arena2.deinit(); + var exp2 = Explorer.init(arena2.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + var store = Store.init(testing.allocator); + defer store.deinit(); + + const loaded = snapshot_mod.loadSnapshot(io, snap_path, &exp2, &store, arena2.allocator()); + try testing.expect(loaded); + try testing.expectEqual(@as(usize, 2), exp2.outlines.count()); + try testing.expectEqual(@as(u32, 0), exp2.trigram_index.fileCount()); + try testing.expectEqual(@as(usize, 0), exp2.word_index.index.count()); + try testing.expect(exp2.wordIndexCanLoadFromDisk()); + try testing.expect(!exp2.wordIndexIsComplete()); + try testing.expect(!exp2.wordIndexNeedsPersist()); + + const deps = try exp2.getImportedBy("src/store.zig", testing.allocator); + defer { + for (deps) |dep| testing.allocator.free(dep); + testing.allocator.free(deps); + } + try testing.expectEqual(@as(usize, 1), deps.len); + try testing.expect(std.mem.eql(u8, deps[0], "src/main.zig")); + + const hits = try exp2.searchWord("Store", testing.allocator); + defer testing.allocator.free(hits); + try testing.expect(hits.len >= 1); + try testing.expect(exp2.word_index.index.count() > 0); + try testing.expect(exp2.wordIndexIsComplete()); + try testing.expect(exp2.wordIndexNeedsPersist()); +} + + +test "snapshot: writer streams uncached file contents for large repos" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + try tmp.dir.createDirPath(io, "src"); + + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp.deinit(); + + var rel_buf: [64]u8 = undefined; + var content_buf: [128]u8 = undefined; + for (0..1002) |i| { + const rel = try std.fmt.bufPrint(&rel_buf, "src/file_{d}.zig", .{i}); + const content = try std.fmt.bufPrint(&content_buf, "pub fn func_{d}() usize {{ return {d}; }}\n", .{ i, i }); + try tmp.dir.writeFile(io, .{ .sub_path = rel, .data = content }); + try exp.indexFileOutlineOnly(rel, content); + } + + try testing.expectEqual(@as(usize, 1002), exp.outlines.count()); + // With CLOCK eviction (#208) the ContentCache holds up to 16384 entries — all 1002 fit. + try testing.expectEqual(@as(u32, 1002), exp.contents.count()); + + const snap_path = try std.fmt.allocPrint(testing.allocator, "{s}/large.codedb", .{dir_path}); + defer testing.allocator.free(snap_path); + try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, testing.allocator); + + var loaded_without_root = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer loaded_without_root.deinit(); + var store_without_root = Store.init(testing.allocator); + defer store_without_root.deinit(); + + try testing.expect(snapshot_mod.loadSnapshot(io, snap_path, &loaded_without_root, &store_without_root, testing.allocator)); + try testing.expectEqual(@as(usize, 1002), loaded_without_root.outlines.count()); + // CLOCK cache holds all 1002 — word index can be rebuilt from memory without root dir. + const hits_no_root = try loaded_without_root.searchWord("func_1001", testing.allocator); + defer testing.allocator.free(hits_no_root); + try testing.expectEqual(@as(usize, 1), hits_no_root.len); + + var loaded = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + loaded.setRoot(io, dir_path); + defer loaded.deinit(); + var store = Store.init(testing.allocator); + defer store.deinit(); + + try testing.expect(snapshot_mod.loadSnapshot(io, snap_path, &loaded, &store, testing.allocator)); + try testing.expectEqual(@as(usize, 1002), loaded.outlines.count()); + + const hits = try loaded.searchWord("func_1001", testing.allocator); + defer testing.allocator.free(hits); + try testing.expectEqual(@as(usize, 1), hits.len); + try testing.expectEqualStrings("src/file_1001.zig", loaded.word_index.hitPath(hits[0])); + try testing.expect(loaded.wordIndexIsComplete()); +} + + +test "issue-220: partial word index state rebuilds before search" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp.deinit(); + try exp.indexFile("src/a.zig", "pub const Alpha = 1;\n"); + try exp.indexFile("src/b.zig", "pub const Beta = 2;\n"); + + const snap_path = try std.fmt.allocPrint(testing.allocator, "{s}/partial.codedb", .{dir_path}); + defer testing.allocator.free(snap_path); + try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, testing.allocator); + + var exp2 = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp2.deinit(); + var store = Store.init(testing.allocator); + defer store.deinit(); + + try testing.expect(snapshot_mod.loadSnapshot(io, snap_path, &exp2, &store, testing.allocator)); + try testing.expect(exp2.wordIndexCanLoadFromDisk()); + try testing.expect(!exp2.wordIndexIsComplete()); + + try exp2.indexFileSkipTrigram("src/b.zig", "pub const Gamma = 3;\n"); + try testing.expect(!exp2.wordIndexCanLoadFromDisk()); + try testing.expect(!exp2.wordIndexIsComplete()); + + const alpha_hits = try exp2.searchWord("Alpha", testing.allocator); + defer testing.allocator.free(alpha_hits); + try testing.expectEqual(@as(usize, 1), alpha_hits.len); + try testing.expect(std.mem.eql(u8, exp2.word_index.hitPath(alpha_hits[0]), "src/a.zig")); + + const gamma_hits = try exp2.searchWord("Gamma", testing.allocator); + defer testing.allocator.free(gamma_hits); + try testing.expectEqual(@as(usize, 1), gamma_hits.len); + try testing.expect(std.mem.eql(u8, exp2.word_index.hitPath(gamma_hits[0]), "src/b.zig")); + try testing.expect(exp2.wordIndexIsComplete()); + try testing.expect(exp2.wordIndexNeedsPersist()); +} + + +test "issue-220: word index persistence tracking skips redundant rewrites" { + var exp = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp.deinit(); + + try exp.indexFile("src/a.zig", "pub const Alpha = 1;\n"); + try testing.expect(exp.wordIndexIsComplete()); + try testing.expect(exp.wordIndexNeedsPersist()); + + const first_gen = exp.wordIndexGenerationToPersist() orelse return error.TestUnexpectedResult; + exp.markWordIndexPersisted(first_gen); + try testing.expect(!exp.wordIndexNeedsPersist()); + try testing.expect(exp.wordIndexGenerationToPersist() == null); + + try exp.indexFile("src/a.zig", "pub const Beta = 2;\n"); + try testing.expect(exp.wordIndexNeedsPersist()); + + const second_gen = exp.wordIndexGenerationToPersist() orelse return error.TestUnexpectedResult; + try testing.expect(second_gen != first_gen); + exp.markWordIndexPersisted(first_gen); + try testing.expect(exp.wordIndexNeedsPersist()); + exp.markWordIndexPersisted(second_gen); + try testing.expect(!exp.wordIndexNeedsPersist()); +} + + +test "issue-45: snapshot written in non-git directory cannot be loaded" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const aa = arena.allocator(); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + var exp = Explorer.init(aa, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try exp.indexFile("dummy.zig", "const x = 1;"); + + const snap_path = try std.fs.path.join(aa, &.{ dir_path, "test.codedb" }); + + // Write snapshot with a non-git root_path — git_head will be all-zeros + try snapshot_mod.writeSnapshot(io, &exp, "/tmp", snap_path, aa); + + // Snapshot file was created + std.Io.Dir.cwd().access(io, snap_path, .{}) catch { + return error.TestUnexpectedResult; + }; + + // readSnapshotGitHead returns null for non-git dirs (all-zero sentinel). + // The snapshot loading logic in main.zig handles this by checking if the + // current project also has no git — if so, it loads the snapshot. + const snap_head = snapshot_mod.readSnapshotGitHead(io, snap_path); + try testing.expect(snap_head == null); +} + + +test "issue-47: concurrent snapshot writes from parallel instances corrupt file" { + // BUG: Two codedb instances indexing the same repo write codedb.snapshot + // concurrently with no file locking. The second writer can overwrite a + // partially-written snapshot, producing a corrupt file that loadSnapshot + // rejects or — worse — reads garbage section offsets from. + // + // Simulate: two threads write snapshots to the same path concurrently, + // then verify the final file is still loadable. + var arena1 = std.heap.ArenaAllocator.init(testing.allocator); + defer arena1.deinit(); + var arena2 = std.heap.ArenaAllocator.init(testing.allocator); + defer arena2.deinit(); + + var exp1 = Explorer.init(arena1.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try exp1.indexFile("a.zig", "pub fn alpha() void {}"); + var exp2 = Explorer.init(arena2.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try exp2.indexFile("b.zig", "pub fn beta() void {}"); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + const snap_path = try std.fmt.allocPrint(testing.allocator, "{s}/concurrent.codedb", .{dir_path}); + defer testing.allocator.free(snap_path); + + const WriterCtx = struct { + exp: *Explorer, + path: []const u8, + dir: []const u8, + alloc: std.mem.Allocator, + failed: std.atomic.Value(bool) = std.atomic.Value(bool).init(false), + + fn run(ctx: *@This()) void { + for (0..10) |_| { + snapshot_mod.writeSnapshot(io, ctx.exp, ctx.dir, ctx.path, ctx.alloc) catch { + ctx.failed.store(true, .release); + return; + }; + } + } + }; + + var ctx1 = WriterCtx{ .exp = &exp1, .path = snap_path, .dir = dir_path, .alloc = arena1.allocator() }; + var ctx2 = WriterCtx{ .exp = &exp2, .path = snap_path, .dir = dir_path, .alloc = arena2.allocator() }; + + const t1 = try std.Thread.spawn(.{}, WriterCtx.run, .{&ctx1}); + const t2 = try std.Thread.spawn(.{}, WriterCtx.run, .{&ctx2}); + t1.join(); + t2.join(); + + // Neither writer should have errored + try testing.expect(!ctx1.failed.load(.acquire)); + try testing.expect(!ctx2.failed.load(.acquire)); + + // The final snapshot must be loadable (not corrupt) + var arena3 = std.heap.ArenaAllocator.init(testing.allocator); + defer arena3.deinit(); + var exp3 = Explorer.init(arena3.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + var store3 = Store.init(testing.allocator); + defer store3.deinit(); + const loaded = snapshot_mod.loadSnapshot(io, snap_path, &exp3, &store3, arena3.allocator()); + + // Expected: loaded == true (snapshot is valid, written atomically) + // Current (bug): may be false — last writer's rename can land mid-write of + // the first writer's tmp file, or both rename the same .tmp path. + try testing.expect(loaded); +} + + +test "issue-42: scan thread is joined before allocator-backed state is freed" { + var gpa = std.heap.DebugAllocator(.{}){}; + const allocator = gpa.allocator(); + + const data_dir = try allocator.dupe(u8, "/tmp/codedb_test_issue42"); + + const SharedCtx = struct { + data_dir: []const u8, + done: std.atomic.Value(bool) = std.atomic.Value(bool).init(false), + ok: std.atomic.Value(bool) = std.atomic.Value(bool).init(false), + + fn run(ctx: *@This()) void { + cio.sleepMs(10); + if (ctx.data_dir.len > 0) { + _ = ctx.data_dir[0]; + ctx.ok.store(true, .release); + } + ctx.done.store(true, .release); + } + }; + + var ctx = SharedCtx{ .data_dir = data_dir }; + const t = try std.Thread.spawn(.{}, SharedCtx.run, .{&ctx}); + t.join(); + + try testing.expect(ctx.done.load(.acquire)); + try testing.expect(ctx.ok.load(.acquire)); + allocator.free(data_dir); + _ = gpa.deinit(); +} + + +test "issue-40: truncated snapshot silently loads partial data" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try exp.indexFile("src/a.zig", "const a = 1;\n"); + try exp.indexFile("src/b.zig", "const b = 2;\n"); + try exp.indexFile("src/c.zig", "const c = 3;\n"); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + const snap_path = try std.fmt.allocPrint(testing.allocator, "{s}/test.codedb", .{dir_path}); + defer testing.allocator.free(snap_path); + + try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, testing.allocator); + + const trunc_path = try std.fmt.allocPrint(testing.allocator, "{s}/trunc.codedb", .{dir_path}); + defer testing.allocator.free(trunc_path); + { + const orig = try std.Io.Dir.cwd().readFileAlloc(io, snap_path, testing.allocator, .limited(1024 * 1024)); + defer testing.allocator.free(orig); + const trunc_file = try std.Io.Dir.cwd().createFile(io, trunc_path, .{}); + defer trunc_file.close(io); + // Keep only header (256 bytes) — content section data will be missing + try trunc_file.writeStreamingAll(io, orig[0..@min(256, orig.len)]); + } + + var arena2 = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena2.deinit(); + var exp2 = Explorer.init(arena2.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + var store = Store.init(arena2.allocator()); + + const loaded = snapshot_mod.loadSnapshot(io, trunc_path, &exp2, &store, arena2.allocator()); + try testing.expect(!loaded); +} + + +test "issue-41: snapshot not validated against repo identity allows cross-project loading" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var exp = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try exp.indexFile("src/projectA.zig", "const project = \"A\";\n"); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + + const snap_path = try std.fmt.allocPrint(testing.allocator, "{s}/test.codedb", .{dir_path}); + defer testing.allocator.free(snap_path); + + try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, testing.allocator); + + var arena2 = std.heap.ArenaAllocator.init(testing.allocator); + defer arena2.deinit(); + var exp2 = Explorer.init(arena2.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + var store = Store.init(testing.allocator); + defer store.deinit(); + + const loaded = snapshot_mod.loadSnapshotValidated(io, snap_path, "/some/other/project", &exp2, &store, testing.allocator); + try testing.expect(!loaded); +} + + +test "snapshot: symbol detail longer than 4096 bytes survives round-trip" { + // Regression for readSectionString rejecting names/details > 4096 bytes. + // Before the fix max_len was 4096; any detail longer than that triggered + // error.InvalidData and loadSnapshot returned false. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const aa = arena.allocator(); + + // Build a Zig source whose first function line exceeds 4 096 characters. + var src: std.ArrayList(u8) = .empty; + defer src.deinit(testing.allocator); + try src.appendSlice(testing.allocator, "pub fn bigSig("); + var param_i: usize = 0; + while (src.items.len < 5000) : (param_i += 1) { + var pb: [20]u8 = undefined; + const ps = std.fmt.bufPrint(&pb, "p{d}: u8, ", .{param_i}) catch break; + try src.appendSlice(testing.allocator, ps); + } + try src.appendSlice(testing.allocator, ") void {}\n"); + try testing.expect(src.items.len > 4096); // guard: ensure we actually generated a long line + var exp = Explorer.init(aa, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try exp.indexFile("src/big.zig", src.items); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + const snap_path = try std.fmt.allocPrint(testing.allocator, "{s}/big.codedb", .{dir_path}); + defer testing.allocator.free(snap_path); + try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, testing.allocator); + + var exp2 = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp2.deinit(); + var store2 = Store.init(testing.allocator); + defer store2.deinit(); + + const loaded = snapshot_mod.loadSnapshot(io, snap_path, &exp2, &store2, testing.allocator); + try testing.expect(loaded); // must survive long detail + + var sym_arena = std.heap.ArenaAllocator.init(testing.allocator); + defer sym_arena.deinit(); + const results = try exp2.findAllSymbols("bigSig", sym_arena.allocator()); + try testing.expect(results.len >= 1); +} + + +test "snapshot: corrupted OUTLINE_STATE section falls back to CONTENT load" { + // Regression for the codedb 0.2.56 writer u16 overflow bug: when OUTLINE_STATE + // contains a detail that overflows u16 the section cursor de-syncs, making + // subsequent file records parse as garbage and loadOutlineStateMap throws. + // The catch fallback must produce an empty map so loadSnapshotFast falls + // through to indexFileOutlineOnly for every file in CONTENT. + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const aa = arena.allocator(); + + var exp = Explorer.init(aa, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + try exp.indexFile("src/a.zig", "pub fn aFunc() void {}\n"); + try exp.indexFile("src/b.zig", "pub fn bFunc() void {}\n"); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + const snap_path = try std.fmt.allocPrint(testing.allocator, "{s}/corrupt.codedb", .{dir_path}); + defer testing.allocator.free(snap_path); + try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, testing.allocator); + + // Overwrite the first 16 bytes of OUTLINE_STATE data with 0xFF. + // This makes the file_count field read as 0xFFFFFFFF — far more records + // than the data contains — causing readSectionString to eventually fail + // with error.InvalidData (runs off the end of the bytes slice). + { + var sections = (try snapshot_mod.readSections(io, snap_path, testing.allocator)).?; + defer sections.deinit(); + const ols = sections.get(@intFromEnum(snapshot_mod.SectionId.outline_state)) orelse return; + const f = try std.Io.Dir.cwd().openFile(io, snap_path, .{ .mode = .read_write }); + defer f.close(io); + try f.writePositionalAll(io, &([_]u8{0xFF} ** 16), ols.offset); + } + + var exp2 = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp2.deinit(); + var store2 = Store.init(testing.allocator); + defer store2.deinit(); + + const loaded = snapshot_mod.loadSnapshot(io, snap_path, &exp2, &store2, testing.allocator); + try testing.expect(loaded); // must survive OUTLINE_STATE corruption + + // Symbols must still be found — re-indexed from CONTENT + var sym_arena = std.heap.ArenaAllocator.init(testing.allocator); + defer sym_arena.deinit(); + const results = try exp2.findAllSymbols("aFunc", sym_arena.allocator()); + try testing.expect(results.len >= 1); +} + + +test "issue-379: snapshot loader returns true with zero outlines for empty-explorer snapshot" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const aa = arena.allocator(); + + var exp = Explorer.init(aa, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const dir_path = path_buf[0..dir_path_len]; + const snap_path = try std.fmt.allocPrint(testing.allocator, "{s}/empty.codedb", .{dir_path}); + defer testing.allocator.free(snap_path); + + try snapshot_mod.writeSnapshot(io, &exp, dir_path, snap_path, testing.allocator); + + var exp2 = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer exp2.deinit(); + var store2 = Store.init(testing.allocator); + defer store2.deinit(); + + const loaded = snapshot_mod.loadSnapshot(io, snap_path, &exp2, &store2, testing.allocator); + if (loaded) { + try testing.expect(exp2.outlines.count() > 0); + } +} + diff --git a/src/watcher.zig b/src/watcher.zig index c1aa6b6..1633380 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -1145,7 +1145,7 @@ pub fn isSensitivePath(path: []const u8) bool { } // .env, .env.; do NOT match .envoy, .envrc, .environment, etc. if (basename.len >= 4 and std.mem.eql(u8, basename[0..4], ".env") and - (basename.len == 4 or basename[4] == '.')) return true; + (basename.len == 4 or basename[4] == '.' or basename[4] == '-' or basename[4] == '_')) return true; // Exact matches const sensitive_names = [_][]const u8{ ".dev.vars", ".npmrc", ".pypirc", ".netrc",