Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"lint:js": "eslint . --max-warnings=0",
"lint:js:fix": "eslint . --fix --max-warnings=0",
"lint:package": "publint",
"bench": "node --expose-gc tests/parser.bench.mjs",
"bench": "./scripts/run-bench.sh tests/parser.bench.mjs",
"bench:compare": "node scripts/bench-compare.mjs",
"bench:summary": "./scripts/local-bench-summary.sh",
"test": "vitest run"
Expand Down
57 changes: 57 additions & 0 deletions scripts/bench-utils.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/**
* Shared utilities for benchmark formatting scripts.
*/

import { readFileSync } from 'node:fs';

export function formatTime(ns) {
if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`;
if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} µs`;
return `${ns.toFixed(2)} ns`;
}

export function deltaEmoji(pct) {
const abs = Math.abs(pct);
if (abs < 2) return '⚪';
if (pct <= -5) return '🟢';
if (pct >= 5) return '🔴';
if (pct < 0) return '🟢';
return '🟠';
}

/**
* Parse benchmark JSON results into control/experiment pairs with deltas.
* Uses p50 (median) which is more robust to outliers than avg.
*/
export function parsePairs(json) {
const pairs = new Map();

for (const trial of json.benchmarks || []) {
for (const r of trial.runs || []) {
if (!r.stats) continue;
const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/);
if (!m) continue;
const [, key, role] = m;
if (!pairs.has(key)) pairs.set(key, {});
pairs.get(key)[role] = r.stats;
}
}

const rows = [];
for (const [name, { control, experiment }] of pairs) {
if (!control || !experiment) continue;
const ctrlVal = control.p50 ?? control.avg;
const expVal = experiment.p50 ?? experiment.avg;
const delta = ((expVal - ctrlVal) / ctrlVal) * 100;
rows.push({ name, control: ctrlVal, experiment: expVal, delta });
}

return rows;
}

/**
* Read and parse the benchmark JSON results file.
*/
export function readBenchJSON(path) {
return JSON.parse(readFileSync(path, 'utf8'));
}
61 changes: 3 additions & 58 deletions scripts/format-bench-cli.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* BENCH_JSON_OUTPUT - Path to the JSON bench results
*/

import { readFileSync } from 'node:fs';
import { formatTime, deltaEmoji, parsePairs, readBenchJSON } from './bench-utils.mjs';

const jsonPath = process.env.BENCH_JSON_OUTPUT;

Expand All @@ -18,67 +18,13 @@ if (!jsonPath) {
let json;

try {
json = JSON.parse(readFileSync(jsonPath, 'utf8'));
json = readBenchJSON(jsonPath);
} catch (e) {
console.error(`Could not read ${jsonPath}: ${e.message}`);
process.exit(1);
}

function formatTime(ns) {
if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`;
if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} µs`;

return `${ns.toFixed(2)} ns`;
}

function deltaEmoji(pct) {
const abs = Math.abs(pct);

if (abs < 2) return '⚪';
if (pct <= -5) return '🟢';
if (pct >= 5) return '🔴';
if (pct < 0) return '🟢';

return '🟠';
}

// Group control/experiment pairs
const pairs = new Map();

for (const trial of json.benchmarks || []) {
for (const r of trial.runs || []) {
if (!r.stats) continue;

const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/);

if (!m) continue;

const [, key, role] = m;

if (!pairs.has(key)) pairs.set(key, {});

pairs.get(key)[role] = r.stats;
}
}

if (pairs.size === 0) {
console.log('No comparison data found.');
process.exit(0);
}

// Build rows — use median (p50) which is far more robust to outliers from
// CPU frequency scaling, GC pauses, and other system noise than the mean.
const rows = [];

for (const [name, { control, experiment }] of pairs) {
if (!control || !experiment) continue;

const ctrlVal = control.p50 ?? control.avg;
const expVal = experiment.p50 ?? experiment.avg;
const delta = ((expVal - ctrlVal) / ctrlVal) * 100;

rows.push({ name, control: ctrlVal, experiment: expVal, delta });
}
const rows = parsePairs(json);

if (rows.length === 0) {
console.log('No comparison data found.');
Expand All @@ -96,7 +42,6 @@ const deltaW = Math.max(
'Δ'.length,
...rows.map((r) => {
const sign = r.delta > 0 ? '+' : '';

return `${sign}${r.delta.toFixed(1)}%`.length;
})
);
Expand Down
83 changes: 20 additions & 63 deletions scripts/format-bench-comment.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/

import { readFileSync } from 'node:fs';
import { formatTime, deltaEmoji, parsePairs, readBenchJSON } from './bench-utils.mjs';

const marker = '<!-- bench-compare -->';

Expand Down Expand Up @@ -43,74 +44,30 @@ const jsonPath = process.env.BENCH_JSON_OUTPUT;

if (jsonPath) {
try {
const json = JSON.parse(readFileSync(jsonPath, 'utf8'));
summarySection = buildSummary(json);
const rows = parsePairs(readBenchJSON(jsonPath));

if (rows.length > 0) {
const tableRows = rows.map(({ name, control, experiment, delta }) => {
const emoji = deltaEmoji(delta);
const sign = delta > 0 ? '+' : '';
return `| ${emoji} | ${name} | ${formatTime(control)} | ${formatTime(experiment)} | ${sign}${delta.toFixed(1)}% |`;
});

summarySection = [
'',
'| | Benchmark | Control (p50) | Experiment (p50) | Δ |',
'|---|---|---:|---:|---:|',
...tableRows,
'',
'> 🟢 faster · 🔴 slower · 🟠 slightly slower · ⚪ within 2%',
'',
].join('\n');
}
} catch {
// JSON not available or malformed — skip summary
}
}

function formatTime(ns) {
if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`;
if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} µs`;
return `${ns.toFixed(2)} ns`;
}

function deltaEmoji(pct) {
const abs = Math.abs(pct);
// negative pct means experiment is faster (lower time = better)
if (abs < 2) return '⚪';
if (pct <= -5) return '🟢';
if (pct >= 5) return '🔴';
if (pct < 0) return '🟢';
return '🟠';
}

function buildSummary(json) {
const benchmarks = json.benchmarks || [];

// In comparison mode, benchmarks come in pairs inside summary groups.
// Each benchmark alias is like "gts small (control)" / "gts small (experiment)".
// Group them by stripping the suffix.
const pairs = new Map();

for (const trial of benchmarks) {
for (const r of trial.runs || []) {
if (!r.stats) continue;
const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/);
if (!m) continue;
const [, key, role] = m;
if (!pairs.has(key)) pairs.set(key, {});
pairs.get(key)[role] = r.stats;
}
}

if (pairs.size === 0) return '';

const rows = [];
for (const [name, { control, experiment }] of pairs) {
if (!control || !experiment) continue;
const delta = ((experiment.avg - control.avg) / control.avg) * 100;
const emoji = deltaEmoji(delta);
const sign = delta > 0 ? '+' : '';
rows.push(
`| ${emoji} | ${name} | ${formatTime(control.avg)} | ${formatTime(experiment.avg)} | ${sign}${delta.toFixed(1)}% |`
);
}

if (rows.length === 0) return '';

return [
'',
'| | Benchmark | Control (avg) | Experiment (avg) | Δ |',
'|---|---|---:|---:|---:|',
...rows,
'',
'> 🟢 faster · 🔴 slower · 🟠 slightly slower · ⚪ within 2%',
'',
].join('\n');
}

// ---------------------------------------------------------------------------
// Assemble comment
// ---------------------------------------------------------------------------
Expand Down
53 changes: 31 additions & 22 deletions scripts/local-bench-summary.sh
Original file line number Diff line number Diff line change
@@ -1,39 +1,48 @@
#!/usr/bin/env bash

# Check CPU tuning on Linux — poor settings cause massive variance
hw_warnings=""
export BENCH_JSON_OUTPUT=./bench-results.json

pnpm bench:compare

echo ""
echo "━━━ Summary ━━━"
node scripts/format-bench-cli.mjs

# Print tips for reducing variance
echo "━━━ Tips for more reliable results ━━━"
echo ""

tips=()

if [ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then
gov=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
if [ "$gov" != "performance" ]; then
hw_warnings+="⚠️ CPU governor is '$gov' — benchmark results will be noisy.
Fix with: sudo cpupower frequency-set -g performance
"
tips+=("CPU governor is '$gov' — set to 'performance' for fixed frequency:")
tips+=(" sudo cpupower frequency-set -g performance")
tips+=("")
fi
fi

if [ -f /sys/devices/system/cpu/cpufreq/boost ]; then
boost=$(cat /sys/devices/system/cpu/cpufreq/boost)
if [ "$boost" = "1" ]; then
hw_warnings+="⚠️ CPU boost is enabled — frequency varies with thermals.
Fix with: echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost
"
tips+=("CPU boost is enabled — disable to prevent thermal-dependent frequency:")
tips+=(" echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost")
tips+=("")
fi
elif [ -f /sys/devices/system/cpu/intel_pstate/no_turbo ]; then
no_turbo=$(cat /sys/devices/system/cpu/intel_pstate/no_turbo)
if [ "$no_turbo" = "0" ]; then
tips+=("Intel Turbo Boost is enabled — disable to prevent thermal-dependent frequency:")
tips+=(" echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo")
tips+=("")
fi
fi

if [ -n "$hw_warnings" ]; then
echo ""
echo "$hw_warnings"
fi

export BENCH_JSON_OUTPUT=./bench-results.json

pnpm bench:compare
tips+=("Close other applications to reduce CPU contention")
tips+=("Run multiple times — if deltas flip sign between runs, they're noise")

for tip in "${tips[@]}"; do
echo " $tip"
done
echo ""
echo "━━━ Summary ━━━"
node scripts/format-bench-cli.mjs

if [ -n "$hw_warnings" ]; then
echo "$hw_warnings"
fi
18 changes: 18 additions & 0 deletions scripts/run-bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash
#
# Wrapper that runs a node command with CPU pinning when available.
#
# Usage: ./scripts/run-bench.sh <node args...>

set -euo pipefail

CMD=(node --expose-gc "$@")

# CPU pinning on Linux — keep the process on a single core
if command -v taskset &>/dev/null; then
CMD=(taskset -c 0 "${CMD[@]}")
echo "📌 CPU pinning enabled (taskset -c 0)" >&2
echo "" >&2
fi

exec "${CMD[@]}"
20 changes: 16 additions & 4 deletions tests/parser.bench.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { createRequire } from 'node:module';
import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { resolve } from 'node:path';
import { run, bench, boxplot, summary } from 'mitata';
import { run, bench, boxplot, summary, do_not_optimize as doNotOptimize } from 'mitata';

// ---------------------------------------------------------------------------
// CLI args
Expand Down Expand Up @@ -111,10 +111,16 @@ for (const { type, ext, experimentParse, controlParse } of PARSERS) {

globalThis.gc?.();

// More iterations per sample → individual GC spikes get diluted, reducing
// variance on noisy CI runners. Scale down for larger fixtures so each
// sample doesn't take too long (mitata needs many samples for stable stats).
const BENCH_ITERS = { small: 1000, medium: 500, large: 100 };

for (const { type, ext, experimentParse, controlParse } of PARSERS) {
for (const size of SIZES) {
const code = FIXTURES[type][size];
const opts = { ...PARSE_OPTIONS, filePath: `${size}${ext}` };
const iters = BENCH_ITERS[size];

// Force a full GC before each benchmark group to reduce GC-triggered variance
globalThis.gc?.();
Expand All @@ -123,13 +129,19 @@ for (const { type, ext, experimentParse, controlParse } of PARSERS) {
// Side-by-side comparison with boxplots
boxplot(() => {
summary(() => {
bench(`${type} ${size} (control)`, () => controlParse(code, opts));
bench(`${type} ${size} (experiment)`, () => experimentParse(code, opts));
bench(`${type} ${size} (control)`, () => {
for (let i = 0; i < iters; i++) doNotOptimize(controlParse(code, opts));
});
bench(`${type} ${size} (experiment)`, () => {
for (let i = 0; i < iters; i++) doNotOptimize(experimentParse(code, opts));
});
});
});
} else {
// Standalone mode — just benchmark the local parsers
bench(`${type} ${size}`, () => experimentParse(code, opts));
bench(`${type} ${size}`, () => {
for (let i = 0; i < iters; i++) doNotOptimize(experimentParse(code, opts));
});
}
}
}
Expand Down
Loading