diff --git a/package.json b/package.json index d00ab0a..c584149 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ "lint:js": "eslint . --max-warnings=0", "lint:js:fix": "eslint . --fix --max-warnings=0", "lint:package": "publint", - "bench": "node --expose-gc tests/parser.bench.mjs", + "bench": "./scripts/run-bench.sh tests/parser.bench.mjs", "bench:compare": "node scripts/bench-compare.mjs", "bench:summary": "./scripts/local-bench-summary.sh", "test": "vitest run" diff --git a/scripts/bench-utils.mjs b/scripts/bench-utils.mjs new file mode 100644 index 0000000..995e882 --- /dev/null +++ b/scripts/bench-utils.mjs @@ -0,0 +1,57 @@ +/** + * Shared utilities for benchmark formatting scripts. + */ + +import { readFileSync } from 'node:fs'; + +export function formatTime(ns) { + if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`; + if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} ยตs`; + return `${ns.toFixed(2)} ns`; +} + +export function deltaEmoji(pct) { + const abs = Math.abs(pct); + if (abs < 2) return 'โšช'; + if (pct <= -5) return '๐ŸŸข'; + if (pct >= 5) return '๐Ÿ”ด'; + if (pct < 0) return '๐ŸŸข'; + return '๐ŸŸ '; +} + +/** + * Parse benchmark JSON results into control/experiment pairs with deltas. + * Uses p50 (median) which is more robust to outliers than avg. + */ +export function parsePairs(json) { + const pairs = new Map(); + + for (const trial of json.benchmarks || []) { + for (const r of trial.runs || []) { + if (!r.stats) continue; + const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/); + if (!m) continue; + const [, key, role] = m; + if (!pairs.has(key)) pairs.set(key, {}); + pairs.get(key)[role] = r.stats; + } + } + + const rows = []; + for (const [name, { control, experiment }] of pairs) { + if (!control || !experiment) continue; + const ctrlVal = control.p50 ?? control.avg; + const expVal = experiment.p50 ?? experiment.avg; + const delta = ((expVal - ctrlVal) / ctrlVal) * 100; + rows.push({ name, control: ctrlVal, experiment: expVal, delta }); + } + + return rows; +} + +/** + * Read and parse the benchmark JSON results file. + */ +export function readBenchJSON(path) { + return JSON.parse(readFileSync(path, 'utf8')); +} diff --git a/scripts/format-bench-cli.mjs b/scripts/format-bench-cli.mjs index 516d5c3..3289af5 100644 --- a/scripts/format-bench-cli.mjs +++ b/scripts/format-bench-cli.mjs @@ -6,7 +6,7 @@ * BENCH_JSON_OUTPUT - Path to the JSON bench results */ -import { readFileSync } from 'node:fs'; +import { formatTime, deltaEmoji, parsePairs, readBenchJSON } from './bench-utils.mjs'; const jsonPath = process.env.BENCH_JSON_OUTPUT; @@ -18,67 +18,13 @@ if (!jsonPath) { let json; try { - json = JSON.parse(readFileSync(jsonPath, 'utf8')); + json = readBenchJSON(jsonPath); } catch (e) { console.error(`Could not read ${jsonPath}: ${e.message}`); process.exit(1); } -function formatTime(ns) { - if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`; - if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} ยตs`; - - return `${ns.toFixed(2)} ns`; -} - -function deltaEmoji(pct) { - const abs = Math.abs(pct); - - if (abs < 2) return 'โšช'; - if (pct <= -5) return '๐ŸŸข'; - if (pct >= 5) return '๐Ÿ”ด'; - if (pct < 0) return '๐ŸŸข'; - - return '๐ŸŸ '; -} - -// Group control/experiment pairs -const pairs = new Map(); - -for (const trial of json.benchmarks || []) { - for (const r of trial.runs || []) { - if (!r.stats) continue; - - const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/); - - if (!m) continue; - - const [, key, role] = m; - - if (!pairs.has(key)) pairs.set(key, {}); - - pairs.get(key)[role] = r.stats; - } -} - -if (pairs.size === 0) { - console.log('No comparison data found.'); - process.exit(0); -} - -// Build rows โ€” use median (p50) which is far more robust to outliers from -// CPU frequency scaling, GC pauses, and other system noise than the mean. -const rows = []; - -for (const [name, { control, experiment }] of pairs) { - if (!control || !experiment) continue; - - const ctrlVal = control.p50 ?? control.avg; - const expVal = experiment.p50 ?? experiment.avg; - const delta = ((expVal - ctrlVal) / ctrlVal) * 100; - - rows.push({ name, control: ctrlVal, experiment: expVal, delta }); -} +const rows = parsePairs(json); if (rows.length === 0) { console.log('No comparison data found.'); @@ -96,7 +42,6 @@ const deltaW = Math.max( 'ฮ”'.length, ...rows.map((r) => { const sign = r.delta > 0 ? '+' : ''; - return `${sign}${r.delta.toFixed(1)}%`.length; }) ); diff --git a/scripts/format-bench-comment.mjs b/scripts/format-bench-comment.mjs index 2be4eb0..89ab9e9 100644 --- a/scripts/format-bench-comment.mjs +++ b/scripts/format-bench-comment.mjs @@ -13,6 +13,7 @@ */ import { readFileSync } from 'node:fs'; +import { formatTime, deltaEmoji, parsePairs, readBenchJSON } from './bench-utils.mjs'; const marker = ''; @@ -43,74 +44,30 @@ const jsonPath = process.env.BENCH_JSON_OUTPUT; if (jsonPath) { try { - const json = JSON.parse(readFileSync(jsonPath, 'utf8')); - summarySection = buildSummary(json); + const rows = parsePairs(readBenchJSON(jsonPath)); + + if (rows.length > 0) { + const tableRows = rows.map(({ name, control, experiment, delta }) => { + const emoji = deltaEmoji(delta); + const sign = delta > 0 ? '+' : ''; + return `| ${emoji} | ${name} | ${formatTime(control)} | ${formatTime(experiment)} | ${sign}${delta.toFixed(1)}% |`; + }); + + summarySection = [ + '', + '| | Benchmark | Control (p50) | Experiment (p50) | ฮ” |', + '|---|---|---:|---:|---:|', + ...tableRows, + '', + '> ๐ŸŸข faster ยท ๐Ÿ”ด slower ยท ๐ŸŸ  slightly slower ยท โšช within 2%', + '', + ].join('\n'); + } } catch { // JSON not available or malformed โ€” skip summary } } -function formatTime(ns) { - if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`; - if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} ยตs`; - return `${ns.toFixed(2)} ns`; -} - -function deltaEmoji(pct) { - const abs = Math.abs(pct); - // negative pct means experiment is faster (lower time = better) - if (abs < 2) return 'โšช'; - if (pct <= -5) return '๐ŸŸข'; - if (pct >= 5) return '๐Ÿ”ด'; - if (pct < 0) return '๐ŸŸข'; - return '๐ŸŸ '; -} - -function buildSummary(json) { - const benchmarks = json.benchmarks || []; - - // In comparison mode, benchmarks come in pairs inside summary groups. - // Each benchmark alias is like "gts small (control)" / "gts small (experiment)". - // Group them by stripping the suffix. - const pairs = new Map(); - - for (const trial of benchmarks) { - for (const r of trial.runs || []) { - if (!r.stats) continue; - const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/); - if (!m) continue; - const [, key, role] = m; - if (!pairs.has(key)) pairs.set(key, {}); - pairs.get(key)[role] = r.stats; - } - } - - if (pairs.size === 0) return ''; - - const rows = []; - for (const [name, { control, experiment }] of pairs) { - if (!control || !experiment) continue; - const delta = ((experiment.avg - control.avg) / control.avg) * 100; - const emoji = deltaEmoji(delta); - const sign = delta > 0 ? '+' : ''; - rows.push( - `| ${emoji} | ${name} | ${formatTime(control.avg)} | ${formatTime(experiment.avg)} | ${sign}${delta.toFixed(1)}% |` - ); - } - - if (rows.length === 0) return ''; - - return [ - '', - '| | Benchmark | Control (avg) | Experiment (avg) | ฮ” |', - '|---|---|---:|---:|---:|', - ...rows, - '', - '> ๐ŸŸข faster ยท ๐Ÿ”ด slower ยท ๐ŸŸ  slightly slower ยท โšช within 2%', - '', - ].join('\n'); -} - // --------------------------------------------------------------------------- // Assemble comment // --------------------------------------------------------------------------- diff --git a/scripts/local-bench-summary.sh b/scripts/local-bench-summary.sh index 2fc4d7f..b2c6915 100755 --- a/scripts/local-bench-summary.sh +++ b/scripts/local-bench-summary.sh @@ -1,39 +1,48 @@ #!/usr/bin/env bash -# Check CPU tuning on Linux โ€” poor settings cause massive variance -hw_warnings="" +export BENCH_JSON_OUTPUT=./bench-results.json + +pnpm bench:compare + +echo "" +echo "โ”โ”โ” Summary โ”โ”โ”" +node scripts/format-bench-cli.mjs + +# Print tips for reducing variance +echo "โ”โ”โ” Tips for more reliable results โ”โ”โ”" +echo "" + +tips=() if [ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then gov=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor) if [ "$gov" != "performance" ]; then - hw_warnings+="โš ๏ธ CPU governor is '$gov' โ€” benchmark results will be noisy. - Fix with: sudo cpupower frequency-set -g performance -" + tips+=("CPU governor is '$gov' โ€” set to 'performance' for fixed frequency:") + tips+=(" sudo cpupower frequency-set -g performance") + tips+=("") fi fi if [ -f /sys/devices/system/cpu/cpufreq/boost ]; then boost=$(cat /sys/devices/system/cpu/cpufreq/boost) if [ "$boost" = "1" ]; then - hw_warnings+="โš ๏ธ CPU boost is enabled โ€” frequency varies with thermals. - Fix with: echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost -" + tips+=("CPU boost is enabled โ€” disable to prevent thermal-dependent frequency:") + tips+=(" echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost") + tips+=("") + fi +elif [ -f /sys/devices/system/cpu/intel_pstate/no_turbo ]; then + no_turbo=$(cat /sys/devices/system/cpu/intel_pstate/no_turbo) + if [ "$no_turbo" = "0" ]; then + tips+=("Intel Turbo Boost is enabled โ€” disable to prevent thermal-dependent frequency:") + tips+=(" echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo") + tips+=("") fi fi -if [ -n "$hw_warnings" ]; then - echo "" - echo "$hw_warnings" -fi - -export BENCH_JSON_OUTPUT=./bench-results.json - -pnpm bench:compare +tips+=("Close other applications to reduce CPU contention") +tips+=("Run multiple times โ€” if deltas flip sign between runs, they're noise") +for tip in "${tips[@]}"; do + echo " $tip" +done echo "" -echo "โ”โ”โ” Summary โ”โ”โ”" -node scripts/format-bench-cli.mjs - -if [ -n "$hw_warnings" ]; then - echo "$hw_warnings" -fi diff --git a/scripts/run-bench.sh b/scripts/run-bench.sh new file mode 100755 index 0000000..fcf066c --- /dev/null +++ b/scripts/run-bench.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# +# Wrapper that runs a node command with CPU pinning when available. +# +# Usage: ./scripts/run-bench.sh + +set -euo pipefail + +CMD=(node --expose-gc "$@") + +# CPU pinning on Linux โ€” keep the process on a single core +if command -v taskset &>/dev/null; then + CMD=(taskset -c 0 "${CMD[@]}") + echo "๐Ÿ“Œ CPU pinning enabled (taskset -c 0)" >&2 + echo "" >&2 +fi + +exec "${CMD[@]}" diff --git a/tests/parser.bench.mjs b/tests/parser.bench.mjs index c1d4520..bc4a7ae 100644 --- a/tests/parser.bench.mjs +++ b/tests/parser.bench.mjs @@ -15,7 +15,7 @@ import { createRequire } from 'node:module'; import { readFileSync } from 'node:fs'; import { fileURLToPath } from 'node:url'; import { resolve } from 'node:path'; -import { run, bench, boxplot, summary } from 'mitata'; +import { run, bench, boxplot, summary, do_not_optimize as doNotOptimize } from 'mitata'; // --------------------------------------------------------------------------- // CLI args @@ -111,10 +111,16 @@ for (const { type, ext, experimentParse, controlParse } of PARSERS) { globalThis.gc?.(); +// More iterations per sample โ†’ individual GC spikes get diluted, reducing +// variance on noisy CI runners. Scale down for larger fixtures so each +// sample doesn't take too long (mitata needs many samples for stable stats). +const BENCH_ITERS = { small: 1000, medium: 500, large: 100 }; + for (const { type, ext, experimentParse, controlParse } of PARSERS) { for (const size of SIZES) { const code = FIXTURES[type][size]; const opts = { ...PARSE_OPTIONS, filePath: `${size}${ext}` }; + const iters = BENCH_ITERS[size]; // Force a full GC before each benchmark group to reduce GC-triggered variance globalThis.gc?.(); @@ -123,13 +129,19 @@ for (const { type, ext, experimentParse, controlParse } of PARSERS) { // Side-by-side comparison with boxplots boxplot(() => { summary(() => { - bench(`${type} ${size} (control)`, () => controlParse(code, opts)); - bench(`${type} ${size} (experiment)`, () => experimentParse(code, opts)); + bench(`${type} ${size} (control)`, () => { + for (let i = 0; i < iters; i++) doNotOptimize(controlParse(code, opts)); + }); + bench(`${type} ${size} (experiment)`, () => { + for (let i = 0; i < iters; i++) doNotOptimize(experimentParse(code, opts)); + }); }); }); } else { // Standalone mode โ€” just benchmark the local parsers - bench(`${type} ${size}`, () => experimentParse(code, opts)); + bench(`${type} ${size}`, () => { + for (let i = 0; i < iters; i++) doNotOptimize(experimentParse(code, opts)); + }); } } }