Skip to content

Commit 23fa308

Browse files
NullVoxPopuliclaude
andcommitted
DRY up bench scripts, remove sudo logic, add reliability tips
- Extract shared formatTime, deltaEmoji, parsePairs into bench-utils.mjs - Simplify run-bench.sh to just CPU pinning (no sudo/nice complexity) - Simplify bench-compare.mjs to just CPU pinning - Replace interactive sudo prompt in local-bench-summary.sh with a "tips for more reliable results" section printed after results Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
1 parent ba324f7 commit 23fa308

6 files changed

Lines changed: 112 additions & 206 deletions

File tree

scripts/bench-compare.mjs

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -105,37 +105,19 @@ try {
105105
CONTROL_DIR,
106106
];
107107

108-
// CPU pinning + high priority on Linux for lower variance
108+
// CPU pinning on Linux to reduce cross-core migration variance
109109
const IS_LINUX = process.platform === 'linux';
110110
const HAS_TASKSET = IS_LINUX && spawnSync('which', ['taskset'], { stdio: 'pipe' }).status === 0;
111-
const HAS_NICE = IS_LINUX && spawnSync('which', ['nice'], { stdio: 'pipe' }).status === 0;
112111

113112
let cmd = 'node';
114113
let fullArgs = benchArgs;
115114

116115
if (HAS_TASKSET) {
117116
cmd = 'taskset';
118117
fullArgs = ['-c', '0', 'node', ...benchArgs];
119-
console.error('📌 CPU pinning enabled (taskset -c 0)');
118+
console.error('📌 CPU pinning enabled (taskset -c 0)\n');
120119
}
121120

122-
const IS_ROOT = process.getuid?.() === 0;
123-
const HAS_SUDO = IS_LINUX && spawnSync('sudo', ['-n', 'true'], { stdio: 'pipe' }).status === 0;
124-
125-
if (HAS_NICE && IS_ROOT) {
126-
fullArgs = ['-n', '-20', cmd, ...fullArgs];
127-
cmd = 'nice';
128-
console.error('⚡ High priority enabled (nice -n -20)');
129-
} else if (HAS_NICE && HAS_SUDO) {
130-
fullArgs = ['nice', '-n', '-20', cmd, ...fullArgs];
131-
cmd = 'sudo';
132-
console.error('⚡ High priority enabled (sudo nice -n -20)');
133-
} else if (HAS_NICE) {
134-
console.error('💡 High priority unavailable (needs passwordless sudo)');
135-
}
136-
137-
console.error('');
138-
139121
const result = spawnSync(cmd, fullArgs, {
140122
stdio: 'inherit',
141123
cwd: ROOT,

scripts/bench-utils.mjs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/**
2+
* Shared utilities for benchmark formatting scripts.
3+
*/
4+
5+
import { readFileSync } from 'node:fs';
6+
7+
export function formatTime(ns) {
8+
if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`;
9+
if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} µs`;
10+
return `${ns.toFixed(2)} ns`;
11+
}
12+
13+
export function deltaEmoji(pct) {
14+
const abs = Math.abs(pct);
15+
if (abs < 2) return '⚪';
16+
if (pct <= -5) return '🟢';
17+
if (pct >= 5) return '🔴';
18+
if (pct < 0) return '🟢';
19+
return '🟠';
20+
}
21+
22+
/**
23+
* Parse benchmark JSON results into control/experiment pairs with deltas.
24+
* Uses p50 (median) which is more robust to outliers than avg.
25+
*/
26+
export function parsePairs(json) {
27+
const pairs = new Map();
28+
29+
for (const trial of json.benchmarks || []) {
30+
for (const r of trial.runs || []) {
31+
if (!r.stats) continue;
32+
const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/);
33+
if (!m) continue;
34+
const [, key, role] = m;
35+
if (!pairs.has(key)) pairs.set(key, {});
36+
pairs.get(key)[role] = r.stats;
37+
}
38+
}
39+
40+
const rows = [];
41+
for (const [name, { control, experiment }] of pairs) {
42+
if (!control || !experiment) continue;
43+
const ctrlVal = control.p50 ?? control.avg;
44+
const expVal = experiment.p50 ?? experiment.avg;
45+
const delta = ((expVal - ctrlVal) / ctrlVal) * 100;
46+
rows.push({ name, control: ctrlVal, experiment: expVal, delta });
47+
}
48+
49+
return rows;
50+
}
51+
52+
/**
53+
* Read and parse the benchmark JSON results file.
54+
*/
55+
export function readBenchJSON(path) {
56+
return JSON.parse(readFileSync(path, 'utf8'));
57+
}

scripts/format-bench-cli.mjs

Lines changed: 3 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* BENCH_JSON_OUTPUT - Path to the JSON bench results
77
*/
88

9-
import { readFileSync } from 'node:fs';
9+
import { formatTime, deltaEmoji, parsePairs, readBenchJSON } from './bench-utils.mjs';
1010

1111
const jsonPath = process.env.BENCH_JSON_OUTPUT;
1212

@@ -18,67 +18,13 @@ if (!jsonPath) {
1818
let json;
1919

2020
try {
21-
json = JSON.parse(readFileSync(jsonPath, 'utf8'));
21+
json = readBenchJSON(jsonPath);
2222
} catch (e) {
2323
console.error(`Could not read ${jsonPath}: ${e.message}`);
2424
process.exit(1);
2525
}
2626

27-
function formatTime(ns) {
28-
if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`;
29-
if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} µs`;
30-
31-
return `${ns.toFixed(2)} ns`;
32-
}
33-
34-
function deltaEmoji(pct) {
35-
const abs = Math.abs(pct);
36-
37-
if (abs < 2) return '⚪';
38-
if (pct <= -5) return '🟢';
39-
if (pct >= 5) return '🔴';
40-
if (pct < 0) return '🟢';
41-
42-
return '🟠';
43-
}
44-
45-
// Group control/experiment pairs
46-
const pairs = new Map();
47-
48-
for (const trial of json.benchmarks || []) {
49-
for (const r of trial.runs || []) {
50-
if (!r.stats) continue;
51-
52-
const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/);
53-
54-
if (!m) continue;
55-
56-
const [, key, role] = m;
57-
58-
if (!pairs.has(key)) pairs.set(key, {});
59-
60-
pairs.get(key)[role] = r.stats;
61-
}
62-
}
63-
64-
if (pairs.size === 0) {
65-
console.log('No comparison data found.');
66-
process.exit(0);
67-
}
68-
69-
// Build rows — use median (p50) which is far more robust to outliers from
70-
// CPU frequency scaling, GC pauses, and other system noise than the mean.
71-
const rows = [];
72-
73-
for (const [name, { control, experiment }] of pairs) {
74-
if (!control || !experiment) continue;
75-
76-
const ctrlVal = control.p50 ?? control.avg;
77-
const expVal = experiment.p50 ?? experiment.avg;
78-
const delta = ((expVal - ctrlVal) / ctrlVal) * 100;
79-
80-
rows.push({ name, control: ctrlVal, experiment: expVal, delta });
81-
}
27+
const rows = parsePairs(json);
8228

8329
if (rows.length === 0) {
8430
console.log('No comparison data found.');
@@ -96,7 +42,6 @@ const deltaW = Math.max(
9642
'Δ'.length,
9743
...rows.map((r) => {
9844
const sign = r.delta > 0 ? '+' : '';
99-
10045
return `${sign}${r.delta.toFixed(1)}%`.length;
10146
})
10247
);

scripts/format-bench-comment.mjs

Lines changed: 20 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
*/
1414

1515
import { readFileSync } from 'node:fs';
16+
import { formatTime, deltaEmoji, parsePairs, readBenchJSON } from './bench-utils.mjs';
1617

1718
const marker = '<!-- bench-compare -->';
1819

@@ -43,76 +44,30 @@ const jsonPath = process.env.BENCH_JSON_OUTPUT;
4344

4445
if (jsonPath) {
4546
try {
46-
const json = JSON.parse(readFileSync(jsonPath, 'utf8'));
47-
summarySection = buildSummary(json);
47+
const rows = parsePairs(readBenchJSON(jsonPath));
48+
49+
if (rows.length > 0) {
50+
const tableRows = rows.map(({ name, control, experiment, delta }) => {
51+
const emoji = deltaEmoji(delta);
52+
const sign = delta > 0 ? '+' : '';
53+
return `| ${emoji} | ${name} | ${formatTime(control)} | ${formatTime(experiment)} | ${sign}${delta.toFixed(1)}% |`;
54+
});
55+
56+
summarySection = [
57+
'',
58+
'| | Benchmark | Control (p50) | Experiment (p50) | Δ |',
59+
'|---|---|---:|---:|---:|',
60+
...tableRows,
61+
'',
62+
'> 🟢 faster · 🔴 slower · 🟠 slightly slower · ⚪ within 2%',
63+
'',
64+
].join('\n');
65+
}
4866
} catch {
4967
// JSON not available or malformed — skip summary
5068
}
5169
}
5270

53-
function formatTime(ns) {
54-
if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`;
55-
if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} µs`;
56-
return `${ns.toFixed(2)} ns`;
57-
}
58-
59-
function deltaEmoji(pct) {
60-
const abs = Math.abs(pct);
61-
// negative pct means experiment is faster (lower time = better)
62-
if (abs < 2) return '⚪';
63-
if (pct <= -5) return '🟢';
64-
if (pct >= 5) return '🔴';
65-
if (pct < 0) return '🟢';
66-
return '🟠';
67-
}
68-
69-
function buildSummary(json) {
70-
const benchmarks = json.benchmarks || [];
71-
72-
// In comparison mode, benchmarks come in pairs inside summary groups.
73-
// Each benchmark alias is like "gts small (control)" / "gts small (experiment)".
74-
// Group them by stripping the suffix.
75-
const pairs = new Map();
76-
77-
for (const trial of benchmarks) {
78-
for (const r of trial.runs || []) {
79-
if (!r.stats) continue;
80-
const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/);
81-
if (!m) continue;
82-
const [, key, role] = m;
83-
if (!pairs.has(key)) pairs.set(key, {});
84-
pairs.get(key)[role] = r.stats;
85-
}
86-
}
87-
88-
if (pairs.size === 0) return '';
89-
90-
const rows = [];
91-
for (const [name, { control, experiment }] of pairs) {
92-
if (!control || !experiment) continue;
93-
const ctrlVal = control.p50 ?? control.avg;
94-
const expVal = experiment.p50 ?? experiment.avg;
95-
const delta = ((expVal - ctrlVal) / ctrlVal) * 100;
96-
const emoji = deltaEmoji(delta);
97-
const sign = delta > 0 ? '+' : '';
98-
rows.push(
99-
`| ${emoji} | ${name} | ${formatTime(ctrlVal)} | ${formatTime(expVal)} | ${sign}${delta.toFixed(1)}% |`
100-
);
101-
}
102-
103-
if (rows.length === 0) return '';
104-
105-
return [
106-
'',
107-
'| | Benchmark | Control (p50) | Experiment (p50) | Δ |',
108-
'|---|---|---:|---:|---:|',
109-
...rows,
110-
'',
111-
'> 🟢 faster · 🔴 slower · 🟠 slightly slower · ⚪ within 2%',
112-
'',
113-
].join('\n');
114-
}
115-
11671
// ---------------------------------------------------------------------------
11772
// Assemble comment
11873
// ---------------------------------------------------------------------------

scripts/local-bench-summary.sh

Lines changed: 27 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,48 @@
11
#!/usr/bin/env bash
22

3-
# Check CPU tuning on Linux — poor settings cause massive variance
4-
hw_warnings=""
5-
hw_fixes=()
3+
export BENCH_JSON_OUTPUT=./bench-results.json
4+
5+
pnpm bench:compare
6+
7+
echo ""
8+
echo "━━━ Summary ━━━"
9+
node scripts/format-bench-cli.mjs
10+
11+
# Print tips for reducing variance
12+
echo "━━━ Tips for more reliable results ━━━"
13+
echo ""
14+
15+
tips=()
616

717
if [ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then
818
gov=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
919
if [ "$gov" != "performance" ]; then
10-
hw_warnings+="⚠️ CPU governor is '$gov' — benchmark results will be noisy.
11-
"
12-
hw_fixes+=("sudo cpupower frequency-set -g performance")
20+
tips+=("CPU governor is '$gov' — set to 'performance' for fixed frequency:")
21+
tips+=(" sudo cpupower frequency-set -g performance")
22+
tips+=("")
1323
fi
1424
fi
1525

1626
if [ -f /sys/devices/system/cpu/cpufreq/boost ]; then
1727
boost=$(cat /sys/devices/system/cpu/cpufreq/boost)
1828
if [ "$boost" = "1" ]; then
19-
hw_warnings+="⚠️ CPU boost is enabled — frequency varies with thermals.
20-
"
21-
hw_fixes+=("echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost")
29+
tips+=("CPU boost is enabled — disable to prevent thermal-dependent frequency:")
30+
tips+=(" echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost")
31+
tips+=("")
2232
fi
2333
elif [ -f /sys/devices/system/cpu/intel_pstate/no_turbo ]; then
2434
no_turbo=$(cat /sys/devices/system/cpu/intel_pstate/no_turbo)
2535
if [ "$no_turbo" = "0" ]; then
26-
hw_warnings+="⚠️ Intel Turbo Boost is enabled — frequency varies with thermals.
27-
"
28-
hw_fixes+=("echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo")
36+
tips+=("Intel Turbo Boost is enabled — disable to prevent thermal-dependent frequency:")
37+
tips+=(" echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo")
38+
tips+=("")
2939
fi
3040
fi
3141

32-
if [ -n "$hw_warnings" ]; then
33-
echo ""
34-
echo "$hw_warnings"
35-
36-
if [ ${#hw_fixes[@]} -gt 0 ] && [ -t 0 ]; then
37-
echo "Fix with:"
38-
for fix in "${hw_fixes[@]}"; do
39-
echo " $fix"
40-
done
41-
echo ""
42-
read -rp "Apply these fixes now? [y/N] " answer
43-
if [[ "$answer" =~ ^[Yy]$ ]]; then
44-
for fix in "${hw_fixes[@]}"; do
45-
echo "$fix"
46-
eval "$fix"
47-
done
48-
echo ""
49-
echo "✅ CPU tuning applied."
50-
echo ""
51-
fi
52-
fi
53-
fi
54-
55-
export BENCH_JSON_OUTPUT=./bench-results.json
56-
57-
pnpm bench:compare
42+
tips+=("Close other applications to reduce CPU contention")
43+
tips+=("Run multiple times — if deltas flip sign between runs, they're noise")
5844

45+
for tip in "${tips[@]}"; do
46+
echo " $tip"
47+
done
5948
echo ""
60-
echo "━━━ Summary ━━━"
61-
node scripts/format-bench-cli.mjs
62-
63-
if [ -n "$hw_warnings" ]; then
64-
echo "$hw_warnings"
65-
fi

0 commit comments

Comments
 (0)