Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"lint:js": "eslint . --max-warnings=0",
"lint:js:fix": "eslint . --fix --max-warnings=0",
"lint:package": "publint",
"bench": "node --expose-gc tests/parser.bench.mjs",
"bench": "./scripts/run-bench.sh tests/parser.bench.mjs",
"bench:compare": "node scripts/bench-compare.mjs",
"bench:summary": "./scripts/local-bench-summary.sh",
"test": "vitest run"
Expand Down
57 changes: 57 additions & 0 deletions scripts/bench-utils.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/**
* Shared utilities for benchmark formatting scripts.
*/

import { readFileSync } from 'node:fs';

export function formatTime(ns) {
if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`;
if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} µs`;
return `${ns.toFixed(2)} ns`;
}

export function deltaEmoji(pct) {
const abs = Math.abs(pct);
if (abs < 2) return '⚪';
if (pct <= -5) return '🟢';
if (pct >= 5) return '🔴';
if (pct < 0) return '🟢';
return '🟠';
}

/**
* Parse benchmark JSON results into control/experiment pairs with deltas.
* Uses p50 (median) which is more robust to outliers than avg.
*/
export function parsePairs(json) {
const pairs = new Map();

for (const trial of json.benchmarks || []) {
for (const r of trial.runs || []) {
if (!r.stats) continue;
const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/);
if (!m) continue;
const [, key, role] = m;
if (!pairs.has(key)) pairs.set(key, {});
pairs.get(key)[role] = r.stats;
}
}

const rows = [];
for (const [name, { control, experiment }] of pairs) {
if (!control || !experiment) continue;
const ctrlVal = control.p50 ?? control.avg;
const expVal = experiment.p50 ?? experiment.avg;
const delta = ((expVal - ctrlVal) / ctrlVal) * 100;
rows.push({ name, control: ctrlVal, experiment: expVal, delta });
}

return rows;
}

/**
* Read and parse the benchmark JSON results file.
*/
export function readBenchJSON(path) {
return JSON.parse(readFileSync(path, 'utf8'));
}
61 changes: 3 additions & 58 deletions scripts/format-bench-cli.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* BENCH_JSON_OUTPUT - Path to the JSON bench results
*/

import { readFileSync } from 'node:fs';
import { formatTime, deltaEmoji, parsePairs, readBenchJSON } from './bench-utils.mjs';

const jsonPath = process.env.BENCH_JSON_OUTPUT;

Expand All @@ -18,67 +18,13 @@ if (!jsonPath) {
let json;

try {
json = JSON.parse(readFileSync(jsonPath, 'utf8'));
json = readBenchJSON(jsonPath);
} catch (e) {
console.error(`Could not read ${jsonPath}: ${e.message}`);
process.exit(1);
}

function formatTime(ns) {
if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`;
if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} µs`;

return `${ns.toFixed(2)} ns`;
}

function deltaEmoji(pct) {
const abs = Math.abs(pct);

if (abs < 2) return '⚪';
if (pct <= -5) return '🟢';
if (pct >= 5) return '🔴';
if (pct < 0) return '🟢';

return '🟠';
}

// Group control/experiment pairs
const pairs = new Map();

for (const trial of json.benchmarks || []) {
for (const r of trial.runs || []) {
if (!r.stats) continue;

const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/);

if (!m) continue;

const [, key, role] = m;

if (!pairs.has(key)) pairs.set(key, {});

pairs.get(key)[role] = r.stats;
}
}

if (pairs.size === 0) {
console.log('No comparison data found.');
process.exit(0);
}

// Build rows — use median (p50) which is far more robust to outliers from
// CPU frequency scaling, GC pauses, and other system noise than the mean.
const rows = [];

for (const [name, { control, experiment }] of pairs) {
if (!control || !experiment) continue;

const ctrlVal = control.p50 ?? control.avg;
const expVal = experiment.p50 ?? experiment.avg;
const delta = ((expVal - ctrlVal) / ctrlVal) * 100;

rows.push({ name, control: ctrlVal, experiment: expVal, delta });
}
const rows = parsePairs(json);

if (rows.length === 0) {
console.log('No comparison data found.');
Expand All @@ -96,7 +42,6 @@ const deltaW = Math.max(
'Δ'.length,
...rows.map((r) => {
const sign = r.delta > 0 ? '+' : '';

return `${sign}${r.delta.toFixed(1)}%`.length;
})
);
Expand Down
83 changes: 20 additions & 63 deletions scripts/format-bench-comment.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/

import { readFileSync } from 'node:fs';
import { formatTime, deltaEmoji, parsePairs, readBenchJSON } from './bench-utils.mjs';

const marker = '<!-- bench-compare -->';

Expand Down Expand Up @@ -43,74 +44,30 @@ const jsonPath = process.env.BENCH_JSON_OUTPUT;

if (jsonPath) {
try {
const json = JSON.parse(readFileSync(jsonPath, 'utf8'));
summarySection = buildSummary(json);
const rows = parsePairs(readBenchJSON(jsonPath));

if (rows.length > 0) {
const tableRows = rows.map(({ name, control, experiment, delta }) => {
const emoji = deltaEmoji(delta);
const sign = delta > 0 ? '+' : '';
return `| ${emoji} | ${name} | ${formatTime(control)} | ${formatTime(experiment)} | ${sign}${delta.toFixed(1)}% |`;
});

summarySection = [
'',
'| | Benchmark | Control (p50) | Experiment (p50) | Δ |',
'|---|---|---:|---:|---:|',
...tableRows,
'',
'> 🟢 faster · 🔴 slower · 🟠 slightly slower · ⚪ within 2%',
'',
].join('\n');
}
} catch {
// JSON not available or malformed — skip summary
}
}

function formatTime(ns) {
if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`;
if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} µs`;
return `${ns.toFixed(2)} ns`;
}

function deltaEmoji(pct) {
const abs = Math.abs(pct);
// negative pct means experiment is faster (lower time = better)
if (abs < 2) return '⚪';
if (pct <= -5) return '🟢';
if (pct >= 5) return '🔴';
if (pct < 0) return '🟢';
return '🟠';
}

function buildSummary(json) {
const benchmarks = json.benchmarks || [];

// In comparison mode, benchmarks come in pairs inside summary groups.
// Each benchmark alias is like "gts small (control)" / "gts small (experiment)".
// Group them by stripping the suffix.
const pairs = new Map();

for (const trial of benchmarks) {
for (const r of trial.runs || []) {
if (!r.stats) continue;
const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/);
if (!m) continue;
const [, key, role] = m;
if (!pairs.has(key)) pairs.set(key, {});
pairs.get(key)[role] = r.stats;
}
}

if (pairs.size === 0) return '';

const rows = [];
for (const [name, { control, experiment }] of pairs) {
if (!control || !experiment) continue;
const delta = ((experiment.avg - control.avg) / control.avg) * 100;
const emoji = deltaEmoji(delta);
const sign = delta > 0 ? '+' : '';
rows.push(
`| ${emoji} | ${name} | ${formatTime(control.avg)} | ${formatTime(experiment.avg)} | ${sign}${delta.toFixed(1)}% |`
);
}

if (rows.length === 0) return '';

return [
'',
'| | Benchmark | Control (avg) | Experiment (avg) | Δ |',
'|---|---|---:|---:|---:|',
...rows,
'',
'> 🟢 faster · 🔴 slower · 🟠 slightly slower · ⚪ within 2%',
'',
].join('\n');
}

// ---------------------------------------------------------------------------
// Assemble comment
// ---------------------------------------------------------------------------
Expand Down
53 changes: 31 additions & 22 deletions scripts/local-bench-summary.sh
Original file line number Diff line number Diff line change
@@ -1,39 +1,48 @@
#!/usr/bin/env bash

# Check CPU tuning on Linux — poor settings cause massive variance
hw_warnings=""
export BENCH_JSON_OUTPUT=./bench-results.json

pnpm bench:compare

echo ""
echo "━━━ Summary ━━━"
node scripts/format-bench-cli.mjs

# Print tips for reducing variance
echo "━━━ Tips for more reliable results ━━━"
echo ""

tips=()

if [ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then
gov=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
if [ "$gov" != "performance" ]; then
hw_warnings+="⚠️ CPU governor is '$gov' — benchmark results will be noisy.
Fix with: sudo cpupower frequency-set -g performance
"
tips+=("CPU governor is '$gov' — set to 'performance' for fixed frequency:")
tips+=(" sudo cpupower frequency-set -g performance")
tips+=("")
fi
fi

if [ -f /sys/devices/system/cpu/cpufreq/boost ]; then
boost=$(cat /sys/devices/system/cpu/cpufreq/boost)
if [ "$boost" = "1" ]; then
hw_warnings+="⚠️ CPU boost is enabled — frequency varies with thermals.
Fix with: echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost
"
tips+=("CPU boost is enabled — disable to prevent thermal-dependent frequency:")
tips+=(" echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost")
tips+=("")
fi
elif [ -f /sys/devices/system/cpu/intel_pstate/no_turbo ]; then
no_turbo=$(cat /sys/devices/system/cpu/intel_pstate/no_turbo)
if [ "$no_turbo" = "0" ]; then
tips+=("Intel Turbo Boost is enabled — disable to prevent thermal-dependent frequency:")
tips+=(" echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo")
tips+=("")
fi
fi

if [ -n "$hw_warnings" ]; then
echo ""
echo "$hw_warnings"
fi

export BENCH_JSON_OUTPUT=./bench-results.json

pnpm bench:compare
tips+=("Close other applications to reduce CPU contention")
tips+=("Run multiple times — if deltas flip sign between runs, they're noise")

for tip in "${tips[@]}"; do
echo " $tip"
done
echo ""
echo "━━━ Summary ━━━"
node scripts/format-bench-cli.mjs

if [ -n "$hw_warnings" ]; then
echo "$hw_warnings"
fi
18 changes: 18 additions & 0 deletions scripts/run-bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash
#
# Wrapper that runs a node command with CPU pinning when available.
#
# Usage: ./scripts/run-bench.sh <node args...>

set -euo pipefail

CMD=(node --expose-gc "$@")

# CPU pinning on Linux — keep the process on a single core
if command -v taskset &>/dev/null; then
CMD=(taskset -c 0 "${CMD[@]}")
echo "📌 CPU pinning enabled (taskset -c 0)" >&2
echo "" >&2
fi

exec "${CMD[@]}"
20 changes: 16 additions & 4 deletions tests/parser.bench.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { createRequire } from 'node:module';
import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { resolve } from 'node:path';
import { run, bench, boxplot, summary } from 'mitata';
import { run, bench, boxplot, summary, do_not_optimize } from 'mitata';

// ---------------------------------------------------------------------------
// CLI args
Expand Down Expand Up @@ -111,10 +111,16 @@ for (const { type, ext, experimentParse, controlParse } of PARSERS) {

globalThis.gc?.();

// More iterations per sample → individual GC spikes get diluted, reducing
// variance on noisy CI runners. Scale down for larger fixtures so each
// sample doesn't take too long (mitata needs many samples for stable stats).
const BENCH_ITERS = { small: 2000, medium: 500, large: 100 };
Comment thread
NullVoxPopuli marked this conversation as resolved.
Outdated

for (const { type, ext, experimentParse, controlParse } of PARSERS) {
for (const size of SIZES) {
const code = FIXTURES[type][size];
const opts = { ...PARSE_OPTIONS, filePath: `${size}${ext}` };
const iters = BENCH_ITERS[size];

// Force a full GC before each benchmark group to reduce GC-triggered variance
globalThis.gc?.();
Expand All @@ -123,13 +129,19 @@ for (const { type, ext, experimentParse, controlParse } of PARSERS) {
// Side-by-side comparison with boxplots
boxplot(() => {
summary(() => {
bench(`${type} ${size} (control)`, () => controlParse(code, opts));
bench(`${type} ${size} (experiment)`, () => experimentParse(code, opts));
bench(`${type} ${size} (control)`, () => {
for (let i = 0; i < iters; i++) do_not_optimize(controlParse(code, opts));
});
bench(`${type} ${size} (experiment)`, () => {
for (let i = 0; i < iters; i++) do_not_optimize(experimentParse(code, opts));
});
});
});
} else {
// Standalone mode — just benchmark the local parsers
bench(`${type} ${size}`, () => experimentParse(code, opts));
bench(`${type} ${size}`, () => {
for (let i = 0; i < iters; i++) do_not_optimize(experimentParse(code, opts));
});
}
}
}
Expand Down
Loading