Skip to content

Commit 68bb629

Browse files
dcramercodex
andauthored
build(vitest)!: Drop 3.x and improve eval reporter (#38)
Drop Vitest 3.x support and switch the eval reporter to Vitest 4's verbose reporter path. The reporter change streams eval cases as they finish instead of flushing them when the test module ends, while still keeping the eval score suffix. That implementation depends on the Vitest 4 reporter surface in `vitest/node`. Vitest 3 uses a materially different verbose reporter path, so keeping 3.x compatibility here would mean carrying a second internal code path just for reporter behavior. This narrows the Vitest peer range to 4.x, removes the deprecated `vitest/reporters` import, tightens the reporter tests to use real Vitest state values, explicitly covers failed eval output, and adds a clarifying comment around the `DefaultReporter` bookkeeping handoff used for eval cases. Validated with `pnpm run test -- src/reporter.test.ts` and `pnpm build`. --------- Co-authored-by: Codex <[email protected]>
1 parent 538160a commit 68bb629

3 files changed

Lines changed: 172 additions & 22 deletions

File tree

package.json

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
"types": "./dist/index.d.ts",
66
"main": "./dist/index.js",
77
"module": "./dist/index.mjs",
8-
"files": [
9-
"dist"
10-
],
8+
"files": ["dist"],
119
"exports": {
1210
".": {
1311
"types": "./dist/index.d.ts",
@@ -59,7 +57,7 @@
5957
"peerDependencies": {
6058
"ai": ">=4 <7",
6159
"tinyrainbow": ">=2 <4",
62-
"vitest": ">=3 <5",
60+
"vitest": ">=4 <5",
6361
"zod": ">=3 <5"
6462
},
6563
"peerDependenciesMeta": {

src/reporter.test.ts

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import { stripVTControlCharacters } from "node:util";
2+
import { describe, expect, test, vi } from "vitest";
3+
import DefaultEvalReporter from "./reporter";
4+
5+
function createReporter() {
6+
const logger = {
7+
log: vi.fn(),
8+
error: vi.fn(),
9+
printBanner: vi.fn(),
10+
printNoTestFound: vi.fn(),
11+
};
12+
13+
const reporter = new DefaultEvalReporter({ isTTY: false });
14+
reporter.onInit({
15+
logger,
16+
config: {
17+
hideSkippedTests: false,
18+
slowTestThreshold: 300,
19+
root: process.cwd(),
20+
},
21+
} as any);
22+
23+
return { reporter, logger };
24+
}
25+
26+
function createTestCase({
27+
avgScore,
28+
state = "passed",
29+
}: {
30+
avgScore?: number;
31+
state?: "passed" | "failed";
32+
}) {
33+
return {
34+
task: {
35+
name: "streams eval progress",
36+
type: "test",
37+
mode: "run",
38+
file: {
39+
name: "fixtures/reporter.eval.test.ts",
40+
},
41+
result: {
42+
state,
43+
duration: 42,
44+
},
45+
},
46+
module: {
47+
task: {
48+
name: "fixtures/reporter.eval.test.ts",
49+
},
50+
project: {
51+
name: "",
52+
},
53+
},
54+
project: {
55+
name: "",
56+
},
57+
options: {},
58+
location: {
59+
line: 12,
60+
column: 3,
61+
},
62+
meta: () => (avgScore == null ? {} : { eval: { avgScore } }),
63+
result: () => ({
64+
state,
65+
errors: state === "failed" ? [{ message: "threshold not met" }] : [],
66+
}),
67+
diagnostic: () => ({
68+
duration: 42,
69+
}),
70+
annotations: () => [],
71+
};
72+
}
73+
74+
describe("DefaultEvalReporter", () => {
75+
test("streams eval test cases with scores and avoids a file-end flush", () => {
76+
const { reporter, logger } = createReporter();
77+
const testCase = createTestCase({ avgScore: 0.82 });
78+
79+
reporter.onTestCaseResult(testCase as any);
80+
81+
expect(logger.log).toHaveBeenCalledTimes(1);
82+
expect(stripVTControlCharacters(logger.log.mock.calls[0][0])).toContain(
83+
"fixtures/reporter.eval.test.ts:12:3 > streams eval progress [0.82] 42ms",
84+
);
85+
86+
reporter.onTestModuleEnd({
87+
state: () => "passed",
88+
task: {},
89+
} as any);
90+
91+
expect(logger.log).toHaveBeenCalledTimes(1);
92+
});
93+
94+
test("falls back to verbose output for non-eval tests", () => {
95+
const { reporter, logger } = createReporter();
96+
97+
reporter.onTestCaseResult(createTestCase({}) as any);
98+
99+
expect(stripVTControlCharacters(logger.log.mock.calls[0][0])).toContain(
100+
"fixtures/reporter.eval.test.ts:12:3 > streams eval progress 42ms",
101+
);
102+
expect(stripVTControlCharacters(logger.log.mock.calls[0][0])).not.toContain(
103+
"[0.",
104+
);
105+
});
106+
107+
test("logs failed eval test details with the score suffix", () => {
108+
const { reporter, logger } = createReporter();
109+
110+
reporter.onTestCaseResult(
111+
createTestCase({ avgScore: 0.2, state: "failed" }) as any,
112+
);
113+
114+
expect(logger.log).toHaveBeenCalledTimes(2);
115+
expect(stripVTControlCharacters(logger.log.mock.calls[0][0])).toContain(
116+
"fixtures/reporter.eval.test.ts:12:3 > streams eval progress [0.20] 42ms",
117+
);
118+
expect(stripVTControlCharacters(logger.log.mock.calls[1][0])).toContain(
119+
"threshold not met",
120+
);
121+
});
122+
});

src/reporter.ts

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,58 @@
1-
// import type { RunnerTask, RunnerTestFile } from "vitest";
2-
// TODO: Switch to "vitest/node" when we drop Vitest 3 support.
3-
import { DefaultReporter } from "vitest/reporters";
1+
import { DefaultReporter, VerboseReporter } from "vitest/node";
42
import c from "tinyrainbow";
53

6-
export default class DefaultEvalReporter extends DefaultReporter {
7-
protected override printTestCase(moduleState: any, test: any): void {
8-
const meta = test.meta();
9-
const testResult = test.result();
4+
const TEST_NAME_SEPARATOR = c.dim(" > ");
105

6+
export default class DefaultEvalReporter extends VerboseReporter {
7+
override onTestCaseResult(test: any): void {
8+
const meta = test.meta();
119
if (!meta.eval) {
12-
super.printTestCase(moduleState, test);
10+
super.onTestCaseResult(test);
1311
return;
1412
}
1513

16-
const padding = this.getTestIndentation(test.task);
17-
const icon = testResult.state === "failed" ? c.red("✗ ") : " ";
14+
// Preserve DefaultReporter's bookkeeping without letting VerboseReporter
15+
// print the stock per-test line; eval cases need custom score output.
16+
DefaultReporter.prototype.onTestCaseResult.call(this, test);
17+
18+
const testResult = test.result();
19+
if (
20+
this.ctx.config.hideSkippedTests &&
21+
testResult.state === "skipped" &&
22+
test.options?.mode !== "todo"
23+
) {
24+
return;
25+
}
26+
27+
this.logEvalTestCase(test, meta.eval.avgScore);
28+
29+
if (testResult.state === "failed") {
30+
for (const error of testResult.errors) {
31+
this.log(c.red(` → ${error.message}`));
32+
}
33+
}
34+
35+
if (test.annotations().length) {
36+
this.log();
37+
this.printAnnotations(test, "log", 3);
38+
this.log();
39+
}
40+
}
41+
42+
private logEvalTestCase(test: any, avgScore: number): void {
1843
const colorFn =
19-
meta.eval.avgScore < 0.5
20-
? c.red
21-
: meta.eval.avgScore < 0.75
22-
? c.yellow
23-
: c.green;
24-
this.log(
25-
`${padding}${icon}${this.getTestName(test.task, c.dim(" > "))} [${colorFn(meta.eval.avgScore.toFixed(2))}]`,
26-
);
44+
avgScore < 0.5 ? c.red : avgScore < 0.75 ? c.yellow : c.green;
45+
46+
let title = ` ${this.getEntityPrefix(test)} `;
47+
title += test.module.task.name;
48+
if (test.location) {
49+
title += c.dim(`:${test.location.line}:${test.location.column}`);
50+
}
51+
title += TEST_NAME_SEPARATOR;
52+
title += this.getTestName(test.task, TEST_NAME_SEPARATOR);
53+
title += ` [${colorFn(avgScore.toFixed(2))}]`;
54+
title += this.getTestCaseSuffix(test);
55+
56+
this.log(title);
2757
}
2858
}

0 commit comments

Comments
 (0)