From 7db6dcde57c55a5803f55d44c31f480400ddd330 Mon Sep 17 00:00:00 2001 From: NullVoxPopuli <199018+NullVoxPopuli@users.noreply.github.com> Date: Thu, 19 Mar 2026 23:14:31 -0400 Subject: [PATCH 01/10] Increase benchmark sampling 5x for more stable CI results Uses a Node.js module loader hook to intercept mitata's lib.mjs at load time and increase k_min_cpu_time and k_min_samples by 5x, without modifying node_modules on disk. Co-Authored-By: Claude Opus 4.6 (1M context) --- package.json | 2 +- scripts/bench-compare.mjs | 3 +++ tests/bench-sampler-hooks.mjs | 31 +++++++++++++++++++++++++++++++ tests/bench-sampler.mjs | 13 +++++++++++++ 4 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tests/bench-sampler-hooks.mjs create mode 100644 tests/bench-sampler.mjs diff --git a/package.json b/package.json index d00ab0a..e1d3576 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ "lint:js": "eslint . --max-warnings=0", "lint:js:fix": "eslint . --fix --max-warnings=0", "lint:package": "publint", - "bench": "node --expose-gc tests/parser.bench.mjs", + "bench": "node --expose-gc --import ./tests/bench-sampler.mjs tests/parser.bench.mjs", "bench:compare": "node scripts/bench-compare.mjs", "bench:summary": "./scripts/local-bench-summary.sh", "test": "vitest run" diff --git a/scripts/bench-compare.mjs b/scripts/bench-compare.mjs index 2d8339f..a20cdca 100644 --- a/scripts/bench-compare.mjs +++ b/scripts/bench-compare.mjs @@ -97,9 +97,12 @@ try { console.error(`\n๐ŸŽ๏ธ Running benchmarks (experiment vs control)โ€ฆ\n`); const benchScript = join(ROOT, 'tests/parser.bench.mjs'); + const samplerHook = join(ROOT, 'tests/bench-sampler.mjs'); const benchArgs = [ '--expose-gc', '--max-old-space-size=4096', + '--import', + samplerHook, benchScript, '--control-dir', CONTROL_DIR, diff --git a/tests/bench-sampler-hooks.mjs b/tests/bench-sampler-hooks.mjs new file mode 100644 index 0000000..9d69870 --- /dev/null +++ b/tests/bench-sampler-hooks.mjs @@ -0,0 +1,31 @@ +/** + * Module loader hooks for increasing mitata sampling. + * + * Intercepts mitata's lib.mjs at load time and rewrites the + * k_min_cpu_time and k_min_samples constants to 5ร— their defaults. + */ + +const SAMPLE_MULTIPLIER = 5; + +export async function load(url, context, nextLoad) { + if (url.includes('mitata') && url.endsWith('lib.mjs')) { + const result = await nextLoad(url, context); + let source = typeof result.source === 'string' + ? result.source + : new TextDecoder().decode(result.source); + + source = source + .replace( + /export const k_min_cpu_time = .+/, + `export const k_min_cpu_time = ${SAMPLE_MULTIPLIER} * 642 * 1e6;`, + ) + .replace( + /export const k_min_samples = .+/, + `export const k_min_samples = ${SAMPLE_MULTIPLIER} * 12;`, + ); + + return { ...result, source, shortCircuit: true }; + } + + return nextLoad(url, context); +} diff --git a/tests/bench-sampler.mjs b/tests/bench-sampler.mjs new file mode 100644 index 0000000..c676c26 --- /dev/null +++ b/tests/bench-sampler.mjs @@ -0,0 +1,13 @@ +/** + * Node.js module-resolution hook that increases mitata's sampling constants. + * + * Register via: node --import ./tests/bench-sampler.mjs ... + * + * This uses Node's module customization API to intercept mitata's lib.mjs + * at load time and replace the min_cpu_time / min_samples constants with + * higher values (5ร—) for more stable results on CI. + */ + +import { register } from 'node:module'; + +register(new URL('./bench-sampler-hooks.mjs', import.meta.url)); From 18d2ead6e708b314c276610eae1dfb9974f27d53 Mon Sep 17 00:00:00 2001 From: NullVoxPopuli <199018+NullVoxPopuli@users.noreply.github.com> Date: Thu, 19 Mar 2026 23:20:43 -0400 Subject: [PATCH 02/10] Use mitata gc('inner') and p50 for stable CI benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Enable .gc('inner') on all bench calls โ€” forces GC between iterations and subtracts GC time from measurements, eliminating GC-spike noise - Switch PR comment summary from avg to p50 (median), matching the CLI formatter โ€” median is far more robust to outliers from CPU scaling and system noise on shared CI runners Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/format-bench-comment.mjs | 8 +++++--- tests/parser.bench.mjs | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/format-bench-comment.mjs b/scripts/format-bench-comment.mjs index 2be4eb0..f59e453 100644 --- a/scripts/format-bench-comment.mjs +++ b/scripts/format-bench-comment.mjs @@ -90,11 +90,13 @@ function buildSummary(json) { const rows = []; for (const [name, { control, experiment }] of pairs) { if (!control || !experiment) continue; - const delta = ((experiment.avg - control.avg) / control.avg) * 100; + const ctrlVal = control.p50 ?? control.avg; + const expVal = experiment.p50 ?? experiment.avg; + const delta = ((expVal - ctrlVal) / ctrlVal) * 100; const emoji = deltaEmoji(delta); const sign = delta > 0 ? '+' : ''; rows.push( - `| ${emoji} | ${name} | ${formatTime(control.avg)} | ${formatTime(experiment.avg)} | ${sign}${delta.toFixed(1)}% |` + `| ${emoji} | ${name} | ${formatTime(ctrlVal)} | ${formatTime(expVal)} | ${sign}${delta.toFixed(1)}% |` ); } @@ -102,7 +104,7 @@ function buildSummary(json) { return [ '', - '| | Benchmark | Control (avg) | Experiment (avg) | ฮ” |', + '| | Benchmark | Control (p50) | Experiment (p50) | ฮ” |', '|---|---|---:|---:|---:|', ...rows, '', diff --git a/tests/parser.bench.mjs b/tests/parser.bench.mjs index c1d4520..f7e46f7 100644 --- a/tests/parser.bench.mjs +++ b/tests/parser.bench.mjs @@ -123,13 +123,13 @@ for (const { type, ext, experimentParse, controlParse } of PARSERS) { // Side-by-side comparison with boxplots boxplot(() => { summary(() => { - bench(`${type} ${size} (control)`, () => controlParse(code, opts)); - bench(`${type} ${size} (experiment)`, () => experimentParse(code, opts)); + bench(`${type} ${size} (control)`, () => controlParse(code, opts)).gc('inner'); + bench(`${type} ${size} (experiment)`, () => experimentParse(code, opts)).gc('inner'); }); }); } else { // Standalone mode โ€” just benchmark the local parsers - bench(`${type} ${size}`, () => experimentParse(code, opts)); + bench(`${type} ${size}`, () => experimentParse(code, opts)).gc('inner'); } } } From e20879844f9f7ee4556942b08ddd910c6971de7e Mon Sep 17 00:00:00 2001 From: NullVoxPopuli <199018+NullVoxPopuli@users.noreply.github.com> Date: Thu, 19 Mar 2026 23:32:01 -0400 Subject: [PATCH 03/10] Replace loader hook with iteration loops for CI stability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of patching mitata internals via ESM loader hooks, run each parse call in a loop (1000/200/100 for small/medium/large) so that individual GC spikes get diluted within each sample. This is simpler and more effective than increasing mitata's sampling constants. Also reverts gc('inner') which was counterproductive โ€” the 44-71ms GC between iterations trashed CPU caches and increased variance. Co-Authored-By: Claude Opus 4.6 (1M context) --- package.json | 2 +- scripts/bench-compare.mjs | 3 --- tests/bench-sampler-hooks.mjs | 31 ------------------------------- tests/bench-sampler.mjs | 13 ------------- tests/parser.bench.mjs | 18 +++++++++++++++--- 5 files changed, 16 insertions(+), 51 deletions(-) delete mode 100644 tests/bench-sampler-hooks.mjs delete mode 100644 tests/bench-sampler.mjs diff --git a/package.json b/package.json index e1d3576..d00ab0a 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ "lint:js": "eslint . --max-warnings=0", "lint:js:fix": "eslint . --fix --max-warnings=0", "lint:package": "publint", - "bench": "node --expose-gc --import ./tests/bench-sampler.mjs tests/parser.bench.mjs", + "bench": "node --expose-gc tests/parser.bench.mjs", "bench:compare": "node scripts/bench-compare.mjs", "bench:summary": "./scripts/local-bench-summary.sh", "test": "vitest run" diff --git a/scripts/bench-compare.mjs b/scripts/bench-compare.mjs index a20cdca..2d8339f 100644 --- a/scripts/bench-compare.mjs +++ b/scripts/bench-compare.mjs @@ -97,12 +97,9 @@ try { console.error(`\n๐ŸŽ๏ธ Running benchmarks (experiment vs control)โ€ฆ\n`); const benchScript = join(ROOT, 'tests/parser.bench.mjs'); - const samplerHook = join(ROOT, 'tests/bench-sampler.mjs'); const benchArgs = [ '--expose-gc', '--max-old-space-size=4096', - '--import', - samplerHook, benchScript, '--control-dir', CONTROL_DIR, diff --git a/tests/bench-sampler-hooks.mjs b/tests/bench-sampler-hooks.mjs deleted file mode 100644 index 9d69870..0000000 --- a/tests/bench-sampler-hooks.mjs +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Module loader hooks for increasing mitata sampling. - * - * Intercepts mitata's lib.mjs at load time and rewrites the - * k_min_cpu_time and k_min_samples constants to 5ร— their defaults. - */ - -const SAMPLE_MULTIPLIER = 5; - -export async function load(url, context, nextLoad) { - if (url.includes('mitata') && url.endsWith('lib.mjs')) { - const result = await nextLoad(url, context); - let source = typeof result.source === 'string' - ? result.source - : new TextDecoder().decode(result.source); - - source = source - .replace( - /export const k_min_cpu_time = .+/, - `export const k_min_cpu_time = ${SAMPLE_MULTIPLIER} * 642 * 1e6;`, - ) - .replace( - /export const k_min_samples = .+/, - `export const k_min_samples = ${SAMPLE_MULTIPLIER} * 12;`, - ); - - return { ...result, source, shortCircuit: true }; - } - - return nextLoad(url, context); -} diff --git a/tests/bench-sampler.mjs b/tests/bench-sampler.mjs deleted file mode 100644 index c676c26..0000000 --- a/tests/bench-sampler.mjs +++ /dev/null @@ -1,13 +0,0 @@ -/** - * Node.js module-resolution hook that increases mitata's sampling constants. - * - * Register via: node --import ./tests/bench-sampler.mjs ... - * - * This uses Node's module customization API to intercept mitata's lib.mjs - * at load time and replace the min_cpu_time / min_samples constants with - * higher values (5ร—) for more stable results on CI. - */ - -import { register } from 'node:module'; - -register(new URL('./bench-sampler-hooks.mjs', import.meta.url)); diff --git a/tests/parser.bench.mjs b/tests/parser.bench.mjs index f7e46f7..6f37423 100644 --- a/tests/parser.bench.mjs +++ b/tests/parser.bench.mjs @@ -111,10 +111,16 @@ for (const { type, ext, experimentParse, controlParse } of PARSERS) { globalThis.gc?.(); +// More iterations per sample โ†’ individual GC spikes get diluted, reducing +// variance on noisy CI runners. Scale down for larger fixtures so each +// sample doesn't take too long (mitata needs many samples for stable stats). +const BENCH_ITERS = { small: 1000, medium: 200, large: 100 }; + for (const { type, ext, experimentParse, controlParse } of PARSERS) { for (const size of SIZES) { const code = FIXTURES[type][size]; const opts = { ...PARSE_OPTIONS, filePath: `${size}${ext}` }; + const iters = BENCH_ITERS[size]; // Force a full GC before each benchmark group to reduce GC-triggered variance globalThis.gc?.(); @@ -123,13 +129,19 @@ for (const { type, ext, experimentParse, controlParse } of PARSERS) { // Side-by-side comparison with boxplots boxplot(() => { summary(() => { - bench(`${type} ${size} (control)`, () => controlParse(code, opts)).gc('inner'); - bench(`${type} ${size} (experiment)`, () => experimentParse(code, opts)).gc('inner'); + bench(`${type} ${size} (control)`, () => { + for (let i = 0; i < iters; i++) controlParse(code, opts); + }); + bench(`${type} ${size} (experiment)`, () => { + for (let i = 0; i < iters; i++) experimentParse(code, opts); + }); }); }); } else { // Standalone mode โ€” just benchmark the local parsers - bench(`${type} ${size}`, () => experimentParse(code, opts)).gc('inner'); + bench(`${type} ${size}`, () => { + for (let i = 0; i < iters; i++) experimentParse(code, opts); + }); } } } From 87c663ccd4245106b5cba3c3279be9c3581881f6 Mon Sep 17 00:00:00 2001 From: NullVoxPopuli <199018+NullVoxPopuli@users.noreply.github.com> Date: Thu, 19 Mar 2026 23:38:59 -0400 Subject: [PATCH 04/10] Increase iteration counts for small/medium benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Small: 1000โ†’2000, medium: 200โ†’500. The previous run showed gts small still drifting at -3.4% while medium/large were within 1%. More iterations per sample further dilutes GC and scheduling noise. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/parser.bench.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/parser.bench.mjs b/tests/parser.bench.mjs index 6f37423..4d6f9ce 100644 --- a/tests/parser.bench.mjs +++ b/tests/parser.bench.mjs @@ -114,7 +114,7 @@ globalThis.gc?.(); // More iterations per sample โ†’ individual GC spikes get diluted, reducing // variance on noisy CI runners. Scale down for larger fixtures so each // sample doesn't take too long (mitata needs many samples for stable stats). -const BENCH_ITERS = { small: 1000, medium: 200, large: 100 }; +const BENCH_ITERS = { small: 2000, medium: 500, large: 100 }; for (const { type, ext, experimentParse, controlParse } of PARSERS) { for (const size of SIZES) { From c864d12690c463136aae35f0b8182f559ed76ba6 Mon Sep 17 00:00:00 2001 From: NullVoxPopuli <199018+NullVoxPopuli@users.noreply.github.com> Date: Fri, 20 Mar 2026 10:10:59 -0400 Subject: [PATCH 05/10] Add CPU pinning and priority for local benchmarks - New run-bench.sh wrapper: applies taskset -c 0 (CPU pinning) and nice -n -20 (high priority, when running as root) for pnpm bench - bench-compare.mjs: also uses nice -n -20 when running as root - local-bench-summary.sh: offers to apply CPU governor and boost fixes interactively instead of just warning Co-Authored-By: Claude Opus 4.6 (1M context) --- package.json | 2 +- scripts/bench-compare.mjs | 15 +++++++++++++-- scripts/local-bench-summary.sh | 30 ++++++++++++++++++++++++++++-- scripts/run-bench.sh | 31 +++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 5 deletions(-) create mode 100755 scripts/run-bench.sh diff --git a/package.json b/package.json index d00ab0a..c584149 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ "lint:js": "eslint . --max-warnings=0", "lint:js:fix": "eslint . --fix --max-warnings=0", "lint:package": "publint", - "bench": "node --expose-gc tests/parser.bench.mjs", + "bench": "./scripts/run-bench.sh tests/parser.bench.mjs", "bench:compare": "node scripts/bench-compare.mjs", "bench:summary": "./scripts/local-bench-summary.sh", "test": "vitest run" diff --git a/scripts/bench-compare.mjs b/scripts/bench-compare.mjs index 2d8339f..5703d04 100644 --- a/scripts/bench-compare.mjs +++ b/scripts/bench-compare.mjs @@ -105,9 +105,10 @@ try { CONTROL_DIR, ]; - // CPU pinning on Linux to reduce cross-core migration variance + // CPU pinning + high priority on Linux for lower variance const IS_LINUX = process.platform === 'linux'; const HAS_TASKSET = IS_LINUX && spawnSync('which', ['taskset'], { stdio: 'pipe' }).status === 0; + const HAS_NICE = IS_LINUX && spawnSync('which', ['nice'], { stdio: 'pipe' }).status === 0; let cmd = 'node'; let fullArgs = benchArgs; @@ -115,9 +116,19 @@ try { if (HAS_TASKSET) { cmd = 'taskset'; fullArgs = ['-c', '0', 'node', ...benchArgs]; - console.error('๐Ÿ“Œ CPU pinning enabled (taskset -c 0)\n'); + console.error('๐Ÿ“Œ CPU pinning enabled (taskset -c 0)'); } + if (HAS_NICE && process.getuid?.() === 0) { + fullArgs = ['-n', '-20', cmd, ...fullArgs]; + cmd = 'nice'; + console.error('โšก High priority enabled (nice -n -20)'); + } else if (HAS_NICE) { + console.error('๐Ÿ’ก Run with sudo for high scheduling priority (nice -n -20)'); + } + + console.error(''); + const result = spawnSync(cmd, fullArgs, { stdio: 'inherit', cwd: ROOT, diff --git a/scripts/local-bench-summary.sh b/scripts/local-bench-summary.sh index 2fc4d7f..6a26d26 100755 --- a/scripts/local-bench-summary.sh +++ b/scripts/local-bench-summary.sh @@ -2,13 +2,14 @@ # Check CPU tuning on Linux โ€” poor settings cause massive variance hw_warnings="" +hw_fixes=() if [ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then gov=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor) if [ "$gov" != "performance" ]; then hw_warnings+="โš ๏ธ CPU governor is '$gov' โ€” benchmark results will be noisy. - Fix with: sudo cpupower frequency-set -g performance " + hw_fixes+=("sudo cpupower frequency-set -g performance") fi fi @@ -16,14 +17,39 @@ if [ -f /sys/devices/system/cpu/cpufreq/boost ]; then boost=$(cat /sys/devices/system/cpu/cpufreq/boost) if [ "$boost" = "1" ]; then hw_warnings+="โš ๏ธ CPU boost is enabled โ€” frequency varies with thermals. - Fix with: echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost " + hw_fixes+=("echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost") + fi +elif [ -f /sys/devices/system/cpu/intel_pstate/no_turbo ]; then + no_turbo=$(cat /sys/devices/system/cpu/intel_pstate/no_turbo) + if [ "$no_turbo" = "0" ]; then + hw_warnings+="โš ๏ธ Intel Turbo Boost is enabled โ€” frequency varies with thermals. +" + hw_fixes+=("echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo") fi fi if [ -n "$hw_warnings" ]; then echo "" echo "$hw_warnings" + + if [ ${#hw_fixes[@]} -gt 0 ] && [ -t 0 ]; then + echo "Fix with:" + for fix in "${hw_fixes[@]}"; do + echo " $fix" + done + echo "" + read -rp "Apply these fixes now? [y/N] " answer + if [[ "$answer" =~ ^[Yy]$ ]]; then + for fix in "${hw_fixes[@]}"; do + echo "โ†’ $fix" + eval "$fix" + done + echo "" + echo "โœ… CPU tuning applied." + echo "" + fi + fi fi export BENCH_JSON_OUTPUT=./bench-results.json diff --git a/scripts/run-bench.sh b/scripts/run-bench.sh new file mode 100755 index 0000000..acfd9fd --- /dev/null +++ b/scripts/run-bench.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# +# Wrapper that runs a node command with CPU pinning and high priority +# when available, for lower-variance benchmarks. +# +# Usage: ./scripts/run-bench.sh + +set -euo pipefail + +NODE_ARGS=("--expose-gc" "$@") + +CMD=(node "${NODE_ARGS[@]}") + +# CPU pinning on Linux โ€” keep the process on a single core +if command -v taskset &>/dev/null; then + CMD=(taskset -c 0 "${CMD[@]}") + echo "๐Ÿ“Œ CPU pinning enabled (taskset -c 0)" >&2 +fi + +# High scheduling priority (best-effort, needs root for negative values) +if command -v nice &>/dev/null; then + if [ "$(id -u)" = "0" ]; then + CMD=(nice -n -20 "${CMD[@]}") + echo "โšก High priority enabled (nice -n -20)" >&2 + else + echo "๐Ÿ’ก Run with sudo for high scheduling priority (nice -n -20)" >&2 + fi +fi + +echo "" >&2 +exec "${CMD[@]}" From 75fea38554b9c7b83f7ae84497f026132822f496 Mon Sep 17 00:00:00 2001 From: NullVoxPopuli <199018+NullVoxPopuli@users.noreply.github.com> Date: Fri, 20 Mar 2026 10:31:01 -0400 Subject: [PATCH 06/10] Use do_not_optimize to prevent V8 DCE on parse results Wraps each parse call with mitata's do_not_optimize() to ensure V8 doesn't dead-code eliminate any part of the return value. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/parser.bench.mjs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/parser.bench.mjs b/tests/parser.bench.mjs index 4d6f9ce..200314b 100644 --- a/tests/parser.bench.mjs +++ b/tests/parser.bench.mjs @@ -15,7 +15,7 @@ import { createRequire } from 'node:module'; import { readFileSync } from 'node:fs'; import { fileURLToPath } from 'node:url'; import { resolve } from 'node:path'; -import { run, bench, boxplot, summary } from 'mitata'; +import { run, bench, boxplot, summary, do_not_optimize } from 'mitata'; // --------------------------------------------------------------------------- // CLI args @@ -130,17 +130,17 @@ for (const { type, ext, experimentParse, controlParse } of PARSERS) { boxplot(() => { summary(() => { bench(`${type} ${size} (control)`, () => { - for (let i = 0; i < iters; i++) controlParse(code, opts); + for (let i = 0; i < iters; i++) do_not_optimize(controlParse(code, opts)); }); bench(`${type} ${size} (experiment)`, () => { - for (let i = 0; i < iters; i++) experimentParse(code, opts); + for (let i = 0; i < iters; i++) do_not_optimize(experimentParse(code, opts)); }); }); }); } else { // Standalone mode โ€” just benchmark the local parsers bench(`${type} ${size}`, () => { - for (let i = 0; i < iters; i++) experimentParse(code, opts); + for (let i = 0; i < iters; i++) do_not_optimize(experimentParse(code, opts)); }); } } From ba324f712330070bb8584588a5d2496289e8c040 Mon Sep 17 00:00:00 2001 From: NullVoxPopuli <199018+NullVoxPopuli@users.noreply.github.com> Date: Fri, 20 Mar 2026 11:50:28 -0400 Subject: [PATCH 07/10] Use sudo nice for high priority without requiring root shell Instead of requiring the user to run the whole command as root (which loses PATH and can't find node/pnpm), use 'sudo nice -n -20 node ...' so only the nice call is elevated. Activates automatically when passwordless sudo is available. Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/bench-compare.mjs | 11 +++++++++-- scripts/run-bench.sh | 7 +++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/scripts/bench-compare.mjs b/scripts/bench-compare.mjs index 5703d04..c987794 100644 --- a/scripts/bench-compare.mjs +++ b/scripts/bench-compare.mjs @@ -119,12 +119,19 @@ try { console.error('๐Ÿ“Œ CPU pinning enabled (taskset -c 0)'); } - if (HAS_NICE && process.getuid?.() === 0) { + const IS_ROOT = process.getuid?.() === 0; + const HAS_SUDO = IS_LINUX && spawnSync('sudo', ['-n', 'true'], { stdio: 'pipe' }).status === 0; + + if (HAS_NICE && IS_ROOT) { fullArgs = ['-n', '-20', cmd, ...fullArgs]; cmd = 'nice'; console.error('โšก High priority enabled (nice -n -20)'); + } else if (HAS_NICE && HAS_SUDO) { + fullArgs = ['nice', '-n', '-20', cmd, ...fullArgs]; + cmd = 'sudo'; + console.error('โšก High priority enabled (sudo nice -n -20)'); } else if (HAS_NICE) { - console.error('๐Ÿ’ก Run with sudo for high scheduling priority (nice -n -20)'); + console.error('๐Ÿ’ก High priority unavailable (needs passwordless sudo)'); } console.error(''); diff --git a/scripts/run-bench.sh b/scripts/run-bench.sh index acfd9fd..04de72b 100755 --- a/scripts/run-bench.sh +++ b/scripts/run-bench.sh @@ -17,13 +17,16 @@ if command -v taskset &>/dev/null; then echo "๐Ÿ“Œ CPU pinning enabled (taskset -c 0)" >&2 fi -# High scheduling priority (best-effort, needs root for negative values) +# High scheduling priority (best-effort, negative nice needs root) if command -v nice &>/dev/null; then if [ "$(id -u)" = "0" ]; then CMD=(nice -n -20 "${CMD[@]}") echo "โšก High priority enabled (nice -n -20)" >&2 + elif command -v sudo &>/dev/null && sudo -n true 2>/dev/null; then + CMD=(sudo nice -n -20 "${CMD[@]}") + echo "โšก High priority enabled (sudo nice -n -20)" >&2 else - echo "๐Ÿ’ก Run with sudo for high scheduling priority (nice -n -20)" >&2 + echo "๐Ÿ’ก High priority unavailable (needs passwordless sudo)" >&2 fi fi From 23fa308dfbd01445a05e7177b5bc5ab7f14d2326 Mon Sep 17 00:00:00 2001 From: NullVoxPopuli <199018+NullVoxPopuli@users.noreply.github.com> Date: Fri, 20 Mar 2026 11:58:39 -0400 Subject: [PATCH 08/10] DRY up bench scripts, remove sudo logic, add reliability tips - Extract shared formatTime, deltaEmoji, parsePairs into bench-utils.mjs - Simplify run-bench.sh to just CPU pinning (no sudo/nice complexity) - Simplify bench-compare.mjs to just CPU pinning - Replace interactive sudo prompt in local-bench-summary.sh with a "tips for more reliable results" section printed after results Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/bench-compare.mjs | 22 +-------- scripts/bench-utils.mjs | 57 +++++++++++++++++++++ scripts/format-bench-cli.mjs | 61 ++--------------------- scripts/format-bench-comment.mjs | 85 ++++++++------------------------ scripts/local-bench-summary.sh | 71 ++++++++++---------------- scripts/run-bench.sh | 22 ++------- 6 files changed, 112 insertions(+), 206 deletions(-) create mode 100644 scripts/bench-utils.mjs diff --git a/scripts/bench-compare.mjs b/scripts/bench-compare.mjs index c987794..2d8339f 100644 --- a/scripts/bench-compare.mjs +++ b/scripts/bench-compare.mjs @@ -105,10 +105,9 @@ try { CONTROL_DIR, ]; - // CPU pinning + high priority on Linux for lower variance + // CPU pinning on Linux to reduce cross-core migration variance const IS_LINUX = process.platform === 'linux'; const HAS_TASKSET = IS_LINUX && spawnSync('which', ['taskset'], { stdio: 'pipe' }).status === 0; - const HAS_NICE = IS_LINUX && spawnSync('which', ['nice'], { stdio: 'pipe' }).status === 0; let cmd = 'node'; let fullArgs = benchArgs; @@ -116,26 +115,9 @@ try { if (HAS_TASKSET) { cmd = 'taskset'; fullArgs = ['-c', '0', 'node', ...benchArgs]; - console.error('๐Ÿ“Œ CPU pinning enabled (taskset -c 0)'); + console.error('๐Ÿ“Œ CPU pinning enabled (taskset -c 0)\n'); } - const IS_ROOT = process.getuid?.() === 0; - const HAS_SUDO = IS_LINUX && spawnSync('sudo', ['-n', 'true'], { stdio: 'pipe' }).status === 0; - - if (HAS_NICE && IS_ROOT) { - fullArgs = ['-n', '-20', cmd, ...fullArgs]; - cmd = 'nice'; - console.error('โšก High priority enabled (nice -n -20)'); - } else if (HAS_NICE && HAS_SUDO) { - fullArgs = ['nice', '-n', '-20', cmd, ...fullArgs]; - cmd = 'sudo'; - console.error('โšก High priority enabled (sudo nice -n -20)'); - } else if (HAS_NICE) { - console.error('๐Ÿ’ก High priority unavailable (needs passwordless sudo)'); - } - - console.error(''); - const result = spawnSync(cmd, fullArgs, { stdio: 'inherit', cwd: ROOT, diff --git a/scripts/bench-utils.mjs b/scripts/bench-utils.mjs new file mode 100644 index 0000000..995e882 --- /dev/null +++ b/scripts/bench-utils.mjs @@ -0,0 +1,57 @@ +/** + * Shared utilities for benchmark formatting scripts. + */ + +import { readFileSync } from 'node:fs'; + +export function formatTime(ns) { + if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`; + if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} ยตs`; + return `${ns.toFixed(2)} ns`; +} + +export function deltaEmoji(pct) { + const abs = Math.abs(pct); + if (abs < 2) return 'โšช'; + if (pct <= -5) return '๐ŸŸข'; + if (pct >= 5) return '๐Ÿ”ด'; + if (pct < 0) return '๐ŸŸข'; + return '๐ŸŸ '; +} + +/** + * Parse benchmark JSON results into control/experiment pairs with deltas. + * Uses p50 (median) which is more robust to outliers than avg. + */ +export function parsePairs(json) { + const pairs = new Map(); + + for (const trial of json.benchmarks || []) { + for (const r of trial.runs || []) { + if (!r.stats) continue; + const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/); + if (!m) continue; + const [, key, role] = m; + if (!pairs.has(key)) pairs.set(key, {}); + pairs.get(key)[role] = r.stats; + } + } + + const rows = []; + for (const [name, { control, experiment }] of pairs) { + if (!control || !experiment) continue; + const ctrlVal = control.p50 ?? control.avg; + const expVal = experiment.p50 ?? experiment.avg; + const delta = ((expVal - ctrlVal) / ctrlVal) * 100; + rows.push({ name, control: ctrlVal, experiment: expVal, delta }); + } + + return rows; +} + +/** + * Read and parse the benchmark JSON results file. + */ +export function readBenchJSON(path) { + return JSON.parse(readFileSync(path, 'utf8')); +} diff --git a/scripts/format-bench-cli.mjs b/scripts/format-bench-cli.mjs index 516d5c3..3289af5 100644 --- a/scripts/format-bench-cli.mjs +++ b/scripts/format-bench-cli.mjs @@ -6,7 +6,7 @@ * BENCH_JSON_OUTPUT - Path to the JSON bench results */ -import { readFileSync } from 'node:fs'; +import { formatTime, deltaEmoji, parsePairs, readBenchJSON } from './bench-utils.mjs'; const jsonPath = process.env.BENCH_JSON_OUTPUT; @@ -18,67 +18,13 @@ if (!jsonPath) { let json; try { - json = JSON.parse(readFileSync(jsonPath, 'utf8')); + json = readBenchJSON(jsonPath); } catch (e) { console.error(`Could not read ${jsonPath}: ${e.message}`); process.exit(1); } -function formatTime(ns) { - if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`; - if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} ยตs`; - - return `${ns.toFixed(2)} ns`; -} - -function deltaEmoji(pct) { - const abs = Math.abs(pct); - - if (abs < 2) return 'โšช'; - if (pct <= -5) return '๐ŸŸข'; - if (pct >= 5) return '๐Ÿ”ด'; - if (pct < 0) return '๐ŸŸข'; - - return '๐ŸŸ '; -} - -// Group control/experiment pairs -const pairs = new Map(); - -for (const trial of json.benchmarks || []) { - for (const r of trial.runs || []) { - if (!r.stats) continue; - - const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/); - - if (!m) continue; - - const [, key, role] = m; - - if (!pairs.has(key)) pairs.set(key, {}); - - pairs.get(key)[role] = r.stats; - } -} - -if (pairs.size === 0) { - console.log('No comparison data found.'); - process.exit(0); -} - -// Build rows โ€” use median (p50) which is far more robust to outliers from -// CPU frequency scaling, GC pauses, and other system noise than the mean. -const rows = []; - -for (const [name, { control, experiment }] of pairs) { - if (!control || !experiment) continue; - - const ctrlVal = control.p50 ?? control.avg; - const expVal = experiment.p50 ?? experiment.avg; - const delta = ((expVal - ctrlVal) / ctrlVal) * 100; - - rows.push({ name, control: ctrlVal, experiment: expVal, delta }); -} +const rows = parsePairs(json); if (rows.length === 0) { console.log('No comparison data found.'); @@ -96,7 +42,6 @@ const deltaW = Math.max( 'ฮ”'.length, ...rows.map((r) => { const sign = r.delta > 0 ? '+' : ''; - return `${sign}${r.delta.toFixed(1)}%`.length; }) ); diff --git a/scripts/format-bench-comment.mjs b/scripts/format-bench-comment.mjs index f59e453..89ab9e9 100644 --- a/scripts/format-bench-comment.mjs +++ b/scripts/format-bench-comment.mjs @@ -13,6 +13,7 @@ */ import { readFileSync } from 'node:fs'; +import { formatTime, deltaEmoji, parsePairs, readBenchJSON } from './bench-utils.mjs'; const marker = ''; @@ -43,76 +44,30 @@ const jsonPath = process.env.BENCH_JSON_OUTPUT; if (jsonPath) { try { - const json = JSON.parse(readFileSync(jsonPath, 'utf8')); - summarySection = buildSummary(json); + const rows = parsePairs(readBenchJSON(jsonPath)); + + if (rows.length > 0) { + const tableRows = rows.map(({ name, control, experiment, delta }) => { + const emoji = deltaEmoji(delta); + const sign = delta > 0 ? '+' : ''; + return `| ${emoji} | ${name} | ${formatTime(control)} | ${formatTime(experiment)} | ${sign}${delta.toFixed(1)}% |`; + }); + + summarySection = [ + '', + '| | Benchmark | Control (p50) | Experiment (p50) | ฮ” |', + '|---|---|---:|---:|---:|', + ...tableRows, + '', + '> ๐ŸŸข faster ยท ๐Ÿ”ด slower ยท ๐ŸŸ  slightly slower ยท โšช within 2%', + '', + ].join('\n'); + } } catch { // JSON not available or malformed โ€” skip summary } } -function formatTime(ns) { - if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`; - if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} ยตs`; - return `${ns.toFixed(2)} ns`; -} - -function deltaEmoji(pct) { - const abs = Math.abs(pct); - // negative pct means experiment is faster (lower time = better) - if (abs < 2) return 'โšช'; - if (pct <= -5) return '๐ŸŸข'; - if (pct >= 5) return '๐Ÿ”ด'; - if (pct < 0) return '๐ŸŸข'; - return '๐ŸŸ '; -} - -function buildSummary(json) { - const benchmarks = json.benchmarks || []; - - // In comparison mode, benchmarks come in pairs inside summary groups. - // Each benchmark alias is like "gts small (control)" / "gts small (experiment)". - // Group them by stripping the suffix. - const pairs = new Map(); - - for (const trial of benchmarks) { - for (const r of trial.runs || []) { - if (!r.stats) continue; - const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/); - if (!m) continue; - const [, key, role] = m; - if (!pairs.has(key)) pairs.set(key, {}); - pairs.get(key)[role] = r.stats; - } - } - - if (pairs.size === 0) return ''; - - const rows = []; - for (const [name, { control, experiment }] of pairs) { - if (!control || !experiment) continue; - const ctrlVal = control.p50 ?? control.avg; - const expVal = experiment.p50 ?? experiment.avg; - const delta = ((expVal - ctrlVal) / ctrlVal) * 100; - const emoji = deltaEmoji(delta); - const sign = delta > 0 ? '+' : ''; - rows.push( - `| ${emoji} | ${name} | ${formatTime(ctrlVal)} | ${formatTime(expVal)} | ${sign}${delta.toFixed(1)}% |` - ); - } - - if (rows.length === 0) return ''; - - return [ - '', - '| | Benchmark | Control (p50) | Experiment (p50) | ฮ” |', - '|---|---|---:|---:|---:|', - ...rows, - '', - '> ๐ŸŸข faster ยท ๐Ÿ”ด slower ยท ๐ŸŸ  slightly slower ยท โšช within 2%', - '', - ].join('\n'); -} - // --------------------------------------------------------------------------- // Assemble comment // --------------------------------------------------------------------------- diff --git a/scripts/local-bench-summary.sh b/scripts/local-bench-summary.sh index 6a26d26..b2c6915 100755 --- a/scripts/local-bench-summary.sh +++ b/scripts/local-bench-summary.sh @@ -1,65 +1,48 @@ #!/usr/bin/env bash -# Check CPU tuning on Linux โ€” poor settings cause massive variance -hw_warnings="" -hw_fixes=() +export BENCH_JSON_OUTPUT=./bench-results.json + +pnpm bench:compare + +echo "" +echo "โ”โ”โ” Summary โ”โ”โ”" +node scripts/format-bench-cli.mjs + +# Print tips for reducing variance +echo "โ”โ”โ” Tips for more reliable results โ”โ”โ”" +echo "" + +tips=() if [ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then gov=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor) if [ "$gov" != "performance" ]; then - hw_warnings+="โš ๏ธ CPU governor is '$gov' โ€” benchmark results will be noisy. -" - hw_fixes+=("sudo cpupower frequency-set -g performance") + tips+=("CPU governor is '$gov' โ€” set to 'performance' for fixed frequency:") + tips+=(" sudo cpupower frequency-set -g performance") + tips+=("") fi fi if [ -f /sys/devices/system/cpu/cpufreq/boost ]; then boost=$(cat /sys/devices/system/cpu/cpufreq/boost) if [ "$boost" = "1" ]; then - hw_warnings+="โš ๏ธ CPU boost is enabled โ€” frequency varies with thermals. -" - hw_fixes+=("echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost") + tips+=("CPU boost is enabled โ€” disable to prevent thermal-dependent frequency:") + tips+=(" echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost") + tips+=("") fi elif [ -f /sys/devices/system/cpu/intel_pstate/no_turbo ]; then no_turbo=$(cat /sys/devices/system/cpu/intel_pstate/no_turbo) if [ "$no_turbo" = "0" ]; then - hw_warnings+="โš ๏ธ Intel Turbo Boost is enabled โ€” frequency varies with thermals. -" - hw_fixes+=("echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo") + tips+=("Intel Turbo Boost is enabled โ€” disable to prevent thermal-dependent frequency:") + tips+=(" echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo") + tips+=("") fi fi -if [ -n "$hw_warnings" ]; then - echo "" - echo "$hw_warnings" - - if [ ${#hw_fixes[@]} -gt 0 ] && [ -t 0 ]; then - echo "Fix with:" - for fix in "${hw_fixes[@]}"; do - echo " $fix" - done - echo "" - read -rp "Apply these fixes now? [y/N] " answer - if [[ "$answer" =~ ^[Yy]$ ]]; then - for fix in "${hw_fixes[@]}"; do - echo "โ†’ $fix" - eval "$fix" - done - echo "" - echo "โœ… CPU tuning applied." - echo "" - fi - fi -fi - -export BENCH_JSON_OUTPUT=./bench-results.json - -pnpm bench:compare +tips+=("Close other applications to reduce CPU contention") +tips+=("Run multiple times โ€” if deltas flip sign between runs, they're noise") +for tip in "${tips[@]}"; do + echo " $tip" +done echo "" -echo "โ”โ”โ” Summary โ”โ”โ”" -node scripts/format-bench-cli.mjs - -if [ -n "$hw_warnings" ]; then - echo "$hw_warnings" -fi diff --git a/scripts/run-bench.sh b/scripts/run-bench.sh index 04de72b..fcf066c 100755 --- a/scripts/run-bench.sh +++ b/scripts/run-bench.sh @@ -1,34 +1,18 @@ #!/usr/bin/env bash # -# Wrapper that runs a node command with CPU pinning and high priority -# when available, for lower-variance benchmarks. +# Wrapper that runs a node command with CPU pinning when available. # # Usage: ./scripts/run-bench.sh set -euo pipefail -NODE_ARGS=("--expose-gc" "$@") - -CMD=(node "${NODE_ARGS[@]}") +CMD=(node --expose-gc "$@") # CPU pinning on Linux โ€” keep the process on a single core if command -v taskset &>/dev/null; then CMD=(taskset -c 0 "${CMD[@]}") echo "๐Ÿ“Œ CPU pinning enabled (taskset -c 0)" >&2 + echo "" >&2 fi -# High scheduling priority (best-effort, negative nice needs root) -if command -v nice &>/dev/null; then - if [ "$(id -u)" = "0" ]; then - CMD=(nice -n -20 "${CMD[@]}") - echo "โšก High priority enabled (nice -n -20)" >&2 - elif command -v sudo &>/dev/null && sudo -n true 2>/dev/null; then - CMD=(sudo nice -n -20 "${CMD[@]}") - echo "โšก High priority enabled (sudo nice -n -20)" >&2 - else - echo "๐Ÿ’ก High priority unavailable (needs passwordless sudo)" >&2 - fi -fi - -echo "" >&2 exec "${CMD[@]}" From 6271cdf558754aaa174f3cb97d8697fd2496dc7a Mon Sep 17 00:00:00 2001 From: NullVoxPopuli <199018+NullVoxPopuli@users.noreply.github.com> Date: Fri, 20 Mar 2026 12:16:02 -0400 Subject: [PATCH 09/10] Apply suggestion from @NullVoxPopuli --- tests/parser.bench.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/parser.bench.mjs b/tests/parser.bench.mjs index 200314b..c1467ca 100644 --- a/tests/parser.bench.mjs +++ b/tests/parser.bench.mjs @@ -114,7 +114,7 @@ globalThis.gc?.(); // More iterations per sample โ†’ individual GC spikes get diluted, reducing // variance on noisy CI runners. Scale down for larger fixtures so each // sample doesn't take too long (mitata needs many samples for stable stats). -const BENCH_ITERS = { small: 2000, medium: 500, large: 100 }; +const BENCH_ITERS = { small: 1000, medium: 500, large: 100 }; for (const { type, ext, experimentParse, controlParse } of PARSERS) { for (const size of SIZES) { From 00417d136d237b70cdfab9fca20ccf048e09fa42 Mon Sep 17 00:00:00 2001 From: NullVoxPopuli <199018+NullVoxPopuli@users.noreply.github.com> Date: Fri, 20 Mar 2026 12:18:22 -0400 Subject: [PATCH 10/10] Fix camelCase lint error for do_not_optimize import Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/parser.bench.mjs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/parser.bench.mjs b/tests/parser.bench.mjs index c1467ca..bc4a7ae 100644 --- a/tests/parser.bench.mjs +++ b/tests/parser.bench.mjs @@ -15,7 +15,7 @@ import { createRequire } from 'node:module'; import { readFileSync } from 'node:fs'; import { fileURLToPath } from 'node:url'; import { resolve } from 'node:path'; -import { run, bench, boxplot, summary, do_not_optimize } from 'mitata'; +import { run, bench, boxplot, summary, do_not_optimize as doNotOptimize } from 'mitata'; // --------------------------------------------------------------------------- // CLI args @@ -130,17 +130,17 @@ for (const { type, ext, experimentParse, controlParse } of PARSERS) { boxplot(() => { summary(() => { bench(`${type} ${size} (control)`, () => { - for (let i = 0; i < iters; i++) do_not_optimize(controlParse(code, opts)); + for (let i = 0; i < iters; i++) doNotOptimize(controlParse(code, opts)); }); bench(`${type} ${size} (experiment)`, () => { - for (let i = 0; i < iters; i++) do_not_optimize(experimentParse(code, opts)); + for (let i = 0; i < iters; i++) doNotOptimize(experimentParse(code, opts)); }); }); }); } else { // Standalone mode โ€” just benchmark the local parsers bench(`${type} ${size}`, () => { - for (let i = 0; i < iters; i++) do_not_optimize(experimentParse(code, opts)); + for (let i = 0; i < iters; i++) doNotOptimize(experimentParse(code, opts)); }); } }