justrach · justrach · May 25, 2026 · May 24, 2026 · May 24, 2026 · May 24, 2026
diff --git a/build.zig b/build.zig
@@ -53,28 +53,44 @@ pub fn build(b: *std.Build) void {
     const run_step = b.step("run", "Run codedb daemon");
     run_step.dependOn(&run_cmd.step);
 
-    // ── Tests ──
+    // ── Tests (split into independent binaries for faster compilation) ──
     const test_filter = b.option([]const u8, "test-filter", "Only run tests whose name contains this substring");
-    const tests = b.addTest(.{
-        .root_module = b.createModule(.{
-            .root_source_file = b.path("src/tests.zig"),
-            .target = target,
-            .optimize = optimize,
-            .link_libc = true,
-        }),
-    });
-    tests.root_module.addImport("mcp", mcp_dep.module("mcp"));
-    tests.root_module.addImport("nanoregex", nanoregex_dep.module("nanoregex"));
-    if (test_filter) |f| {
-        const filters = b.allocator.alloc([]const u8, 1) catch @panic("oom");
-        filters[0] = f;
-        tests.filters = filters;
+    const test_step = b.step("test", "Run all tests");
+
+    const test_files = [_]struct { name: []const u8, path: []const u8, needs_mcp: bool, needs_nanoregex: bool }{
+        .{ .name = "test-core",     .path = "src/test_core.zig",     .needs_mcp = false, .needs_nanoregex = false },
+        .{ .name = "test-explore",  .path = "src/test_explore.zig",  .needs_mcp = false, .needs_nanoregex = true },
+        .{ .name = "test-index",    .path = "src/test_index.zig",    .needs_mcp = true,  .needs_nanoregex = true },
+        .{ .name = "test-parser",   .path = "src/test_parser.zig",   .needs_mcp = false, .needs_nanoregex = true },
+        .{ .name = "test-search",   .path = "src/test_search.zig",   .needs_mcp = true,  .needs_nanoregex = true },
+        .{ .name = "test-snapshot", .path = "src/test_snapshot.zig", .needs_mcp = false, .needs_nanoregex = true },
+        .{ .name = "test-mcp",      .path = "src/test_mcp.zig",      .needs_mcp = true,  .needs_nanoregex = true },
+        .{ .name = "test-query",    .path = "src/test_query.zig",    .needs_mcp = true,  .needs_nanoregex = true },
+    };
+
+    for (test_files) |tf| {
+        const t = b.addTest(.{
+            .root_module = b.createModule(.{
+                .root_source_file = b.path(tf.path),
+                .target = target,
+                .optimize = optimize,
+                .link_libc = true,
+            }),
+        });
+        if (tf.needs_mcp) t.root_module.addImport("mcp", mcp_dep.module("mcp"));
+        if (tf.needs_nanoregex) t.root_module.addImport("nanoregex", nanoregex_dep.module("nanoregex"));
+        if (test_filter) |f| {
+            const filters = b.allocator.alloc([]const u8, 1) catch @panic("oom");
+            filters[0] = f;
+            t.filters = filters;
+        }
+        const run = b.addRunArtifact(t);
+        test_step.dependOn(&run.step);
+
+        const individual_step = b.step(tf.name, b.fmt("Run {s}", .{tf.name}));
+        individual_step.dependOn(&run.step);
     }
 
-    const test_step = b.step("test", "Run tests");
-    const tests_run = b.addRunArtifact(tests);
-    test_step.dependOn(&tests_run.step);
-
 
     // ── Library tests (verify the module root compiles) ──
     const lib_tests = b.addTest(.{

diff --git a/codedb.snapshot b/codedb.snapshot
diff --git a/src/cio.zig b/src/cio.zig
@@ -16,6 +16,15 @@ extern "c" fn clock_gettime(id: c_int, ts: *std.c.timespec) c_int;
 extern "c" fn pipe(fds: *[2]c_int) c_int;
 extern "c" fn close(fd: c_int) c_int;
 
+pub fn ignoreSigpipe() void {
+    var act: std.posix.Sigaction = .{
+        .handler = .{ .handler = std.posix.SIG.IGN },
+        .mask = 0,
+        .flags = 0,
+    };
+    std.posix.sigaction(std.posix.SIG.PIPE, &act, null);
+}
+
 const CLOCK_REALTIME: c_int = 0;
 const CLOCK_MONOTONIC: c_int = if (builtin.os.tag == .macos) 6 else 1;
 

diff --git a/src/explore.zig b/src/explore.zig
@@ -181,6 +181,20 @@ pub const SearchResult = struct {
     score: f32 = 0.0,
 };
 
+pub const SearchBreakdown = struct {
+    tier0_ns: i128 = 0,
+    tier05_ns: i128 = 0,
+    tier1_ns: i128 = 0,
+    tier2_ns: i128 = 0,
+    tier3_ns: i128 = 0,
+    tier4_ns: i128 = 0,
+    tier5_ns: i128 = 0,
+    rerank_ns: i128 = 0,
+    tier_reached: u8 = 0,
+    candidate_count: u32 = 0,
+    result_count: u32 = 0,
+};
+
 pub const DependencyGraph = struct {
     forward: std.StringHashMap(std.ArrayList([]const u8)),
     reverse: std.StringHashMap(std.StringHashMap(void)),
@@ -522,6 +536,7 @@ pub const Explorer = struct {
     /// assert the short-circuit holds (issue: negative-query slow path).
     /// Production code does not read this field.
     search_tier5_count: u64 = 0,
+    last_search_breakdown: SearchBreakdown = .{},
 
     pub const DEFAULT_CONTENT_CACHE_CAPACITY: u32 = 16384;
 
@@ -646,8 +661,8 @@ pub const Explorer = struct {
 
         persistent_outline.path = stable_path;
 
+        const prior_content = self.contents.get(stable_path);
         try self.contents.put(stable_path, content);
-        const prior_content: ?[]const u8 = null;
 
         if (full_index) {
             if (!self.word_index_complete) {
@@ -1520,6 +1535,9 @@ pub const Explorer = struct {
 
         if (max_results == 0) return try allocator.alloc(SearchResult, 0);
 
+        var breakdown: SearchBreakdown = .{};
+        defer self.last_search_breakdown = breakdown;
+
         var result_list: std.ArrayList(SearchResult) = .empty;
         errdefer result_list.deinit(allocator);
 
@@ -1533,6 +1551,7 @@ pub const Explorer = struct {
         // docs, and files with more exact word hits are considered first so
         // popular identifiers and skip-trigram canonical files are not hidden
         // behind earlier low-signal posting-list entries.
+        const t0_start = cio.nanoTimestamp();
         const word_hits = self.word_index.search(query);
         if (word_hits.len > 0) {
             const Tier0File = struct {
@@ -1587,13 +1606,19 @@ pub const Explorer = struct {
                 searched.put(stats.path, {}) catch {};
                 try searchInContent(stats.path, ref.data, query, allocator, tier0_per_file_cap, max_results, &result_list);
             }
-            if (result_list.items.len >= max_results)
-                return self.rerankAndFinalize(&result_list, query, allocator);
+            if (result_list.items.len >= max_results) {
+                breakdown.tier0_ns = cio.nanoTimestamp() - t0_start;
+                breakdown.tier_reached = 0;
+                breakdown.result_count = @intCast(result_list.items.len);
+                const t_rerank = cio.nanoTimestamp();
+                const res = self.rerankAndFinalize(&result_list, query, allocator);
+                breakdown.rerank_ns = cio.nanoTimestamp() - t_rerank;
+                return res;
+            }
         }
+        breakdown.tier0_ns = cio.nanoTimestamp() - t0_start;
 
-        // Tier 0.5: prefix expansion — find all indexed keys that begin with the query.
-        // Activates when Tier 0 found nothing and query is ≥3 chars, catching partial
-        // identifier queries like "searchC" that match "searchContent" in the word index.
+        const t05_start = cio.nanoTimestamp();
         if (result_list.items.len == 0 and query.len >= 3) {
             const prefix_hits = try self.word_index.searchPrefix(query, allocator, max_results);
             defer allocator.free(prefix_hits);
@@ -1616,14 +1641,23 @@ pub const Explorer = struct {
                 searched.put(hit_path, {}) catch {};
                 if (result_list.items.len >= max_results) break;
             }
-            if (result_list.items.len >= max_results)
-                return self.rerankAndFinalize(&result_list, query, allocator);
+            if (result_list.items.len >= max_results) {
+                breakdown.tier05_ns = cio.nanoTimestamp() - t05_start;
+                breakdown.tier_reached = 1;
+                breakdown.result_count = @intCast(result_list.items.len);
+                const t_rerank = cio.nanoTimestamp();
+                const res = self.rerankAndFinalize(&result_list, query, allocator);
+                breakdown.rerank_ns = cio.nanoTimestamp() - t_rerank;
+                return res;
+            }
         }
+        breakdown.tier05_ns = cio.nanoTimestamp() - t05_start;
 
+        const t1_start = cio.nanoTimestamp();
         const candidate_paths = self.trigram_index.candidates(query, allocator);
         defer if (candidate_paths) |cp| allocator.free(cp);
+        if (candidate_paths) |cp| breakdown.candidate_count = @intCast(cp.len);
 
-        // Tier 1: trigram candidates — fast path, skips files already found by Tier 0.
         if (candidate_paths) |cp| {
             if (cp.len > 0) {
                 // Issue #427: rank candidates by per-file word-index hit count
@@ -1662,18 +1696,25 @@ pub const Explorer = struct {
                     const ref = self.readContentForSearch(path, allocator) orelse continue;
                     defer ref.deinit();
                     try searchInContent(path, ref.data, query, allocator, max_per_file, max_results, &result_list);
-                    if (result_list.items.len >= max_results)
-                        return self.rerankAndFinalize(&result_list, query, allocator);
+                    if (result_list.items.len >= max_results) {
+                        breakdown.tier1_ns = cio.nanoTimestamp() - t1_start;
+                        breakdown.tier_reached = 2;
+                        breakdown.result_count = @intCast(result_list.items.len);
+                        const t_rerank = cio.nanoTimestamp();
+                        const res = self.rerankAndFinalize(&result_list, query, allocator);
+                        breakdown.rerank_ns = cio.nanoTimestamp() - t_rerank;
+                        return res;
+                    }
                 }
             }
         }
 
-        // Mark all Tier 1 candidates as searched.
         if (candidate_paths) |cp| {
             for (cp) |p| searched.put(p, {}) catch {};
         }
+        breakdown.tier1_ns = cio.nanoTimestamp() - t1_start;
 
-        // Tier 2: sparse candidates — LAZY, only computed when Tier 1 found nothing.
+        const t2_start = cio.nanoTimestamp();
         if (result_list.items.len == 0) {
             const sparse_paths = self.sparse_ngram_index.candidates(query, allocator);
             defer if (sparse_paths) |sp| allocator.free(sp);
@@ -1688,8 +1729,9 @@ pub const Explorer = struct {
                 }
             }
         }
+        breakdown.tier2_ns = cio.nanoTimestamp() - t2_start;
 
-        // Tier 3: skip_trigram_files not already searched.
+        const t3_start = cio.nanoTimestamp();
         if (result_list.items.len < max_results) {
             var skip_iter = self.skip_trigram_files.keyIterator();
             while (skip_iter.next()) |key_ptr| {
@@ -1701,8 +1743,9 @@ pub const Explorer = struct {
                 if (result_list.items.len >= max_results) break;
             }
         }
+        breakdown.tier3_ns = cio.nanoTimestamp() - t3_start;
 
-        // Tier 4: word index scan — for files not yet searched.
+        const t4_start = cio.nanoTimestamp();
         if (result_list.items.len < max_results) {
             const tier4_hits = self.word_index.search(query);
             if (tier4_hits.len > 0) {
@@ -1720,21 +1763,9 @@ pub const Explorer = struct {
                 }
             }
         }
+        breakdown.tier4_ns = cio.nanoTimestamp() - t4_start;
 
-        // Tier 5: full scan fallback — only when NO results from any tier.
-        // Avoids 100ms+ scans on large repos when indices already found matches.
-        //
-        // Short-circuit Tier 5 whenever the trigram index was consulted with
-        // a query long enough to fully cover it (query.len >= 3). The trigram
-        // filter returns a SUPERSET of files containing the substring (every
-        // file containing the substring necessarily contains all its
-        // trigrams). If Tier 1 scanned that superset and found 0 results, no
-        // other trigram-indexed file can match either; skip_trigram_files
-        // were handled separately by Tier 3. Tier 5 would otherwise re-scan
-        // every indexed file for nothing — a measurable 2–3 ms p50 cost on
-        // queries whose constituent trigrams are common-but-not-co-occurring
-        // syllables (e.g. `Suspense` on a Rust corpus). The cp.len == 0
-        // sub-case of this was already short-circuited before this change.
+        const t5_start = cio.nanoTimestamp();
         const trigram_ruled_out = if (candidate_paths) |_|
             (query.len >= 3)
         else
@@ -1750,7 +1781,23 @@ pub const Explorer = struct {
                 if (result_list.items.len >= max_results) break;
             }
         }
-        return self.rerankAndFinalize(&result_list, query, allocator);
+        breakdown.tier5_ns = cio.nanoTimestamp() - t5_start;
+
+        if (result_list.items.len > 0) {
+            breakdown.tier_reached = if (breakdown.tier5_ns > 0 and result_list.items.len > 0) 7
+                else if (breakdown.tier4_ns > 0 and result_list.items.len > 0) 6
+                else if (breakdown.tier3_ns > 0) 5
+                else if (breakdown.tier2_ns > 0) 4
+                else if (breakdown.tier1_ns > 0) 3
+                else if (breakdown.tier05_ns > 0) 1
+                else 0;
+        }
+        breakdown.result_count = @intCast(result_list.items.len);
+
+        const t_rerank = cio.nanoTimestamp();
+        const res = self.rerankAndFinalize(&result_list, query, allocator);
+        breakdown.rerank_ns = cio.nanoTimestamp() - t_rerank;
+        return res;
     }
 
     /// Run the multi-signal rerank in place, then transfer ownership of
@@ -1770,7 +1817,9 @@ pub const Explorer = struct {
         if (result_list.items.len > 1) {
             std.sort.block(SearchResult, result_list.items, {}, struct {
                 pub fn lessThan(_: void, a: SearchResult, b: SearchResult) bool {
-                    if (a.score != b.score) return a.score > b.score;
+                    const sa = if (a.score == a.score) a.score else 0;
+                    const sb = if (b.score == b.score) b.score else 0;
+                    if (sa != sb) return sa > sb;
                     const ord = std.mem.order(u8, a.path, b.path);
                     if (ord != .eq) return ord == .lt;
                     return a.line_num < b.line_num;

diff --git a/src/index.zig b/src/index.zig
@@ -351,7 +351,8 @@ pub const WordIndex = struct {
     pub fn avgDocLength(self: *const WordIndex) f32 {
         const n = self.doc_lengths.count();
         if (n == 0) return 1.0;
-        return @as(f32, @floatFromInt(self.total_tokens)) / @as(f32, @floatFromInt(n));
+        const avg = @as(f32, @floatFromInt(self.total_tokens)) / @as(f32, @floatFromInt(n));
+        return if (avg > 0) avg else 1.0;
     }
 
     /// Shrink all hit lists and per-file word sets to release excess capacity.

diff --git a/src/main.zig b/src/main.zig
@@ -80,6 +80,7 @@ fn mainInner() void {
 fn mainImpl() !void {
     // Use c_allocator (libc malloc) — better page reclamation than GPA
     const allocator = std.heap.c_allocator;
+    cio.ignoreSigpipe();
 
     // 0.16: single Threaded I/O instance passed down through every subsystem
     // that touches fs/subprocess. See issue #282. `io` flows into mcp.run,
@@ -1048,7 +1049,7 @@ fn mainImpl() !void {
 
         std.log.info("codedb mcp: root={s} files={d} data={s} scan={s}", .{ abs_root, store.currentSeq(), data_dir, mcp_server.getScanState().name() });
 
-        mcp_server.run(io, allocator, &store, &explorer, &agents, abs_root, cfg.max_cached, &telem, maybe_deferred);
+        mcp_server.run(io, allocator, &store, &explorer, &agents, abs_root, cfg.max_cached, &telem, maybe_deferred, &shutdown);
 
         shutdown.store(true, .release);
         if (scan_thread) |st| st.join();