Skip to content

Commit 8920564

Browse files
Merge pull request #172 from ember-tooling/copilot/improve-bench-compare-reliability
Switch to mitata for benchmark comparison with built-in boxplots and summaries
2 parents 5d1e26c + 08bfd13 commit 8920564

7 files changed

Lines changed: 336 additions & 382 deletions

File tree

.github/workflows/bench-compare.yml

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ concurrency:
1010

1111
jobs:
1212
bench-compare:
13-
name: "Benchmark Comparison"
13+
name: 'Benchmark Comparison'
1414
runs-on: ubuntu-latest
1515
permissions:
1616
pull-requests: write
@@ -19,25 +19,18 @@ jobs:
1919
steps:
2020
- uses: actions/checkout@v6
2121
with:
22-
# Full history so the script can checkout main for baseline benchmarks.
22+
# Full history so the script can git-archive the base branch.
2323
fetch-depth: 0
24-
# Check out as a named branch (not detached HEAD) so the script can
25-
# read the branch name and restore it after switching to main.
2624
ref: ${{ github.head_ref }}
2725

2826
- uses: wyvox/action-setup-pnpm@v3
2927

3028
- name: Run benchmark comparison
31-
# Strip ANSI color codes so the output is readable in a PR comment.
32-
# Write to RUNNER_TEMP (outside the repo) so the file is not seen as an
33-
# untracked change by git — bench-compare.mjs stashes untracked files
34-
# before switching branches, which would stash (and later restore) an
35-
# empty copy of the file, discarding the real content.
3629
env:
3730
BENCH_JSON_OUTPUT: ${{ runner.temp }}/bench-results.json
3831
run: |
3932
set -o pipefail
40-
pnpm bench:compare 2>&1 | sed 's/\x1b\[[0-9;]*m//g' > "$RUNNER_TEMP/bench-output.txt"
33+
pnpm bench:compare | sed 's/\x1b\[[0-9;]*m//g' > "$RUNNER_TEMP/bench-output.txt"
4134
4235
- name: Format PR comment
4336
if: always()

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
"lint:js": "eslint . --max-warnings=0",
2525
"lint:js:fix": "eslint . --fix --max-warnings=0",
2626
"lint:package": "pnpm publint",
27-
"bench": "vitest bench",
27+
"bench": "node --expose-gc tests/parser.bench.mjs",
2828
"bench:compare": "node scripts/bench-compare.mjs",
2929
"test": "vitest run"
3030
},
@@ -53,6 +53,7 @@
5353
"eslint-plugin-promise": "^6.0.0",
5454
"execa": "^8.0.1",
5555
"fs-extra": "^11.2.0",
56+
"mitata": "1.0.34",
5657
"prettier": "^3.2.5",
5758
"publint": "^0.2.6",
5859
"release-plan": "^0.17.4",

pnpm-lock.yaml

Lines changed: 12 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scripts/bench-compare.mjs

Lines changed: 78 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
/* eslint-disable n/no-process-exit */
22
/**
3-
* Benchmark comparison script.
3+
* Benchmark comparison script using mitata.
44
*
5-
* Runs `pnpm bench` on the current branch and on `main`, then prints a
6-
* side-by-side table showing the hz delta for every benchmark.
5+
* Copies the base branch's source to a temp directory, installs its
6+
* dependencies, then runs the mitata bench script with --control-dir so that
7+
* both control (base) and experiment (current) parsers are benchmarked in the
8+
* same process — giving mitata a fair, head-to-head comparison with built-in
9+
* summary tables and boxplots.
710
*
811
* Usage:
912
* node scripts/bench-compare.mjs [--base <branch>]
@@ -13,10 +16,9 @@
1316
*/
1417

1518
import { execSync, spawnSync } from 'node:child_process';
16-
import { readFileSync, writeFileSync, unlinkSync, existsSync } from 'node:fs';
19+
import { existsSync, mkdirSync, rmSync } from 'node:fs';
1720
import { tmpdir } from 'node:os';
1821
import { join } from 'node:path';
19-
import { styleText } from 'node:util';
2022

2123
// ---------------------------------------------------------------------------
2224
// CLI args
@@ -34,180 +36,101 @@ function run(cmd, opts = {}) {
3436
return execSync(cmd, { stdio: 'inherit', ...opts });
3537
}
3638

37-
function currentBranch() {
38-
return execSync('git rev-parse --abbrev-ref HEAD', { encoding: 'utf8' }).trim();
39-
}
40-
41-
function hasUncommittedChanges() {
42-
const result = execSync('git status --porcelain', { encoding: 'utf8' });
43-
return result.trim().length > 0;
44-
}
45-
46-
function runBench(outputFile) {
47-
const result = spawnSync('pnpm', ['vitest', 'bench', '--outputJson', outputFile, '--run'], {
48-
stdio: 'inherit',
49-
});
50-
if (result.status !== 0) {
51-
console.error('\n❌ Benchmark run failed.');
52-
process.exit(1);
53-
}
54-
}
55-
56-
function loadResults(file) {
57-
const raw = JSON.parse(readFileSync(file, 'utf8'));
58-
// Build a map of "Suite > name" → benchmark entry.
59-
// fullName is e.g. "tests/parser.bench.js > gts parser"; strip the file prefix.
60-
const map = new Map();
61-
for (const suite of raw.files ?? []) {
62-
for (const group of suite.groups ?? []) {
63-
const suiteName = (group.fullName ?? '').replace(/^.*?>\s*/, '');
64-
for (const bench of group.benchmarks ?? []) {
65-
const key = `${suiteName} > ${bench.name}`;
66-
map.set(key, bench);
67-
}
68-
}
39+
/**
40+
* Resolve a branch name to a commit SHA. Tries `origin/<branch>` first (for CI
41+
* where only the PR branch is checked out locally), then falls back to `<branch>`.
42+
*/
43+
function resolveRef(branch) {
44+
for (const candidate of [`origin/${branch}`, branch]) {
45+
const result = spawnSync('git', ['rev-parse', '--verify', candidate], {
46+
encoding: 'utf8',
47+
stdio: ['pipe', 'pipe', 'pipe'],
48+
});
49+
if (result.status === 0) return result.stdout.trim();
6950
}
70-
return map;
71-
}
72-
73-
function fmt(n) {
74-
return n.toLocaleString('en-US', { maximumFractionDigits: 2 });
75-
}
76-
77-
function delta(current, base) {
78-
const pct = ((current - base) / base) * 100;
79-
const sign = pct >= 0 ? '+' : '';
80-
return `${sign}${pct.toFixed(1)}%`;
81-
}
82-
83-
function colorize(pct) {
84-
const num = Number.parseFloat(pct);
85-
if (num >= 5) return styleText('green', pct);
86-
if (num <= -5) return styleText('red', pct);
87-
return styleText('yellow', pct);
51+
throw new Error(`Could not resolve ref for branch "${branch}". Is it fetched?`);
8852
}
8953

9054
// ---------------------------------------------------------------------------
9155
// Main
9256
// ---------------------------------------------------------------------------
9357

94-
const CURRENT_BRANCH = currentBranch();
95-
96-
if (CURRENT_BRANCH === BASE_BRANCH) {
97-
console.error(`❌ Already on '${BASE_BRANCH}'. Check out your feature branch first.`);
98-
process.exit(1);
99-
}
58+
const ROOT = process.cwd();
59+
const CONTROL_DIR = join(tmpdir(), `bench-control-${BASE_BRANCH}-${Date.now()}`);
10060

101-
const stashed = hasUncommittedChanges();
102-
if (stashed) {
103-
console.log('📦 Stashing uncommitted changes…');
104-
run('git stash --include-untracked');
105-
}
61+
console.error(`\n🔧 Setting up control (${BASE_BRANCH}) in ${CONTROL_DIR}\n`);
10662

107-
const tmpCurrent = join(tmpdir(), 'bench-current.json');
108-
const tmpBase = join(tmpdir(), `bench-${BASE_BRANCH}.json`);
63+
const BASE_REF = resolveRef(BASE_BRANCH);
64+
console.error(` Resolved ${BASE_BRANCH}${BASE_REF.slice(0, 10)}\n`);
10965

110-
// Clean up temp files on exit
66+
// Clean up temp dir on exit
11167
function cleanup() {
112-
for (const f of [tmpCurrent, tmpBase]) {
113-
if (existsSync(f)) unlinkSync(f);
68+
if (existsSync(CONTROL_DIR)) {
69+
try {
70+
rmSync(CONTROL_DIR, { recursive: true, force: true });
71+
} catch {
72+
// best-effort cleanup
73+
}
11474
}
11575
}
11676
process.on('exit', cleanup);
11777
process.on('SIGINT', () => process.exit(130));
11878
process.on('SIGTERM', () => process.exit(143));
11979

12080
try {
121-
// ── 1. Benchmark current branch ──────────────────────────────────────────
122-
console.log(`\n🔧 Benchmarking current branch: \x1b[36m${CURRENT_BRANCH}\x1b[0m\n`);
123-
runBench(tmpCurrent);
124-
125-
// ── 2. Switch to base branch ──────────────────────────────────────────────
126-
console.log(`\n🔀 Switching to base branch: \x1b[36m${BASE_BRANCH}\x1b[0m\n`);
127-
run(`git checkout ${BASE_BRANCH}`);
128-
run('pnpm install --frozen-lockfile');
129-
130-
// ── 3. Benchmark base branch ──────────────────────────────────────────────
131-
console.log(`\n🔧 Benchmarking base branch: \x1b[36m${BASE_BRANCH}\x1b[0m\n`);
132-
runBench(tmpBase);
133-
} finally {
134-
// ── 4. Restore original branch ────────────────────────────────────────────
135-
console.log(`\n🔀 Restoring branch: \x1b[36m${CURRENT_BRANCH}\x1b[0m\n`);
136-
run(`git checkout ${CURRENT_BRANCH}`);
137-
run('pnpm install --frozen-lockfile');
138-
139-
if (stashed) {
140-
console.log('📦 Restoring stash…');
141-
run('git stash pop');
142-
}
143-
}
144-
145-
// ── 5. Compare ───────────────────────────────────────────────────────────────
146-
147-
const currentResults = loadResults(tmpCurrent);
148-
const baseResults = loadResults(tmpBase);
149-
150-
const allKeys = new Set([...currentResults.keys(), ...baseResults.keys()]);
151-
152-
const COL = { name: 44, base: 18, current: 18, delta: 12 };
153-
const line = (name, base, cur, diff) =>
154-
name.padEnd(COL.name) +
155-
base.padStart(COL.base) +
156-
cur.padStart(COL.current) +
157-
diff.padStart(COL.delta);
158-
159-
const ruler = '─'.repeat(COL.name + COL.base + COL.current + COL.delta);
81+
// ── 1. Export base branch source to temp dir ─────────────────────────────
82+
mkdirSync(CONTROL_DIR, { recursive: true });
16083

161-
console.log(`\n${'─'.repeat(ruler.length)}`);
162-
console.log(
163-
` Benchmark comparison: ${styleText('cyan', CURRENT_BRANCH)} vs ${styleText('cyan', BASE_BRANCH)}`
164-
);
165-
console.log(`${'─'.repeat(ruler.length)}`);
166-
console.log(
167-
styleText('bold', line('Benchmark', `${BASE_BRANCH} (hz)`, `${CURRENT_BRANCH} (hz)`, 'Δ'))
168-
);
169-
console.log(ruler);
84+
// Copy package manifests and source (use resolved SHA for reliability)
85+
run(
86+
`git archive ${BASE_REF} -- package.json pnpm-lock.yaml pnpm-workspace.yaml src/ | tar -x -C "${CONTROL_DIR}"`
87+
);
17088

171-
const benchResults = [];
89+
// ── 2. Install dependencies in control dir ───────────────────────────────
90+
console.error(`\n📦 Installing dependencies for control (${BASE_BRANCH})…\n`);
91+
run('pnpm install --frozen-lockfile', {
92+
cwd: CONTROL_DIR,
93+
stdio: ['inherit', 'pipe', 'inherit'],
94+
});
17295

173-
let lastSuite = '';
174-
for (const key of [...allKeys].sort()) {
175-
const [suite] = key.split(' > ');
176-
if (suite !== lastSuite) {
177-
if (lastSuite) console.log('');
178-
lastSuite = suite;
96+
// ── 3. Run mitata bench with --control-dir ───────────────────────────────
97+
console.error(`\n🏎️ Running benchmarks (experiment vs control)…\n`);
98+
99+
const benchScript = join(ROOT, 'tests/parser.bench.mjs');
100+
const benchArgs = [
101+
'--expose-gc',
102+
'--max-old-space-size=4096',
103+
benchScript,
104+
'--control-dir',
105+
CONTROL_DIR,
106+
];
107+
108+
// CPU pinning on Linux to reduce cross-core migration variance
109+
const IS_LINUX = process.platform === 'linux';
110+
const HAS_TASKSET = IS_LINUX && spawnSync('which', ['taskset'], { stdio: 'pipe' }).status === 0;
111+
112+
let cmd = 'node';
113+
let fullArgs = benchArgs;
114+
115+
if (HAS_TASKSET) {
116+
cmd = 'taskset';
117+
fullArgs = ['-c', '0', 'node', ...benchArgs];
118+
console.error('📌 CPU pinning enabled (taskset -c 0)\n');
179119
}
180120

181-
const b = baseResults.get(key);
182-
const c = currentResults.get(key);
121+
const result = spawnSync(cmd, fullArgs, {
122+
stdio: 'inherit',
123+
cwd: ROOT,
124+
env: { ...process.env },
125+
});
183126

184-
if (!b || !c) {
185-
const note = !b ? '(missing in base)' : '(missing in current)';
186-
console.log(line(` ${key}`, '-', '-', note));
187-
benchResults.push({ key, baseHz: null, currentHz: null, delta: null, note });
188-
continue;
127+
if (result.status !== 0) {
128+
console.error('\n❌ Benchmark run failed.');
129+
process.exit(1);
189130
}
190131

191-
const pct = delta(c.hz, b.hz);
192-
console.log(line(` ${key}`, fmt(b.hz), fmt(c.hz), colorize(pct)));
193-
benchResults.push({ key, baseHz: b.hz, currentHz: c.hz, delta: pct, note: null });
194-
}
195-
196-
console.log(ruler);
197-
console.log(
198-
'\n ' +
199-
styleText('green', '■') +
200-
' ≥ +5% faster ' +
201-
styleText('red', '■') +
202-
' ≤ −5% slower ' +
203-
styleText('yellow', '■') +
204-
' within ±5% similar\n'
205-
);
206-
207-
const jsonOutputPath = process.env.BENCH_JSON_OUTPUT;
208-
if (jsonOutputPath) {
209-
writeFileSync(
210-
jsonOutputPath,
211-
JSON.stringify({ branch: CURRENT_BRANCH, base: BASE_BRANCH, results: benchResults }, null, 2)
212-
);
132+
console.error('\n✅ Benchmark comparison complete.\n');
133+
} catch (e) {
134+
console.error('❌ Error:', e.message);
135+
process.exit(1);
213136
}

0 commit comments

Comments
 (0)