Merge pull request #177 from ember-tooling/nvp/bench-summary

NullVoxPopuli · web-flow · commit f5cfc763cc36 · 2026-03-15T18:18:24.000-04:00
Make it easier to get a summary of the bench results locally
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,6 @@
 node_modules/
 package-lock.json
+
+bench-output.txt
+bench-comment.md
+bench-results.json
diff --git a/package.json b/package.json
@@ -26,6 +26,7 @@
     "lint:package": "pnpm publint",
     "bench": "node --expose-gc tests/parser.bench.mjs",
     "bench:compare": "node scripts/bench-compare.mjs",
+    "bench:summary": "./scripts/local-bench-summary.sh",
     "test": "vitest run"
   },
   "dependencies": {
diff --git a/scripts/format-bench-cli.mjs b/scripts/format-bench-cli.mjs
@@ -0,0 +1,126 @@
+/* eslint-disable n/no-process-exit */
+/**
+ * Format benchmark JSON results as a CLI-friendly summary table.
+ *
+ * Environment variables:
+ *   BENCH_JSON_OUTPUT - Path to the JSON bench results
+ */
+
+import { readFileSync } from 'node:fs';
+
+const jsonPath = process.env.BENCH_JSON_OUTPUT;
+
+if (!jsonPath) {
+  console.error('BENCH_JSON_OUTPUT not set');
+  process.exit(1);
+}
+
+let json;
+
+try {
+  json = JSON.parse(readFileSync(jsonPath, 'utf8'));
+} catch (e) {
+  console.error(`Could not read ${jsonPath}: ${e.message}`);
+  process.exit(1);
+}
+
+function formatTime(ns) {
+  if (ns >= 1e6) return `${(ns / 1e6).toFixed(2)} ms`;
+  if (ns >= 1e3) return `${(ns / 1e3).toFixed(2)} µs`;
+
+  return `${ns.toFixed(2)} ns`;
+}
+
+function deltaEmoji(pct) {
+  const abs = Math.abs(pct);
+
+  if (abs < 1) return '⚪';
+  if (pct <= -5) return '🟢';
+  if (pct >= 5) return '🔴';
+
+  return '🟡';
+}
+
+// Group control/experiment pairs
+const pairs = new Map();
+
+for (const trial of json.benchmarks || []) {
+  for (const r of trial.runs || []) {
+    if (!r.stats) continue;
+
+    const m = r.name.match(/^(.+)\s+\((control|experiment)\)$/);
+
+    if (!m) continue;
+
+    const [, key, role] = m;
+
+    if (!pairs.has(key)) pairs.set(key, {});
+
+    pairs.get(key)[role] = r.stats;
+  }
+}
+
+if (pairs.size === 0) {
+  console.log('No comparison data found.');
+  process.exit(0);
+}
+
+// Build rows — use median (p50) which is far more robust to outliers from
+// CPU frequency scaling, GC pauses, and other system noise than the mean.
+const rows = [];
+
+for (const [name, { control, experiment }] of pairs) {
+  if (!control || !experiment) continue;
+
+  const ctrlVal = control.p50 ?? control.avg;
+  const expVal = experiment.p50 ?? experiment.avg;
+  const delta = ((expVal - ctrlVal) / ctrlVal) * 100;
+
+  rows.push({ name, control: ctrlVal, experiment: expVal, delta });
+}
+
+if (rows.length === 0) {
+  console.log('No comparison data found.');
+  process.exit(0);
+}
+
+// Calculate column widths
+const nameW = Math.max('Benchmark'.length, ...rows.map((r) => r.name.length));
+const ctrlW = Math.max('Control (p50)'.length, ...rows.map((r) => formatTime(r.control).length));
+const expW = Math.max(
+  'Experiment (p50)'.length,
+  ...rows.map((r) => formatTime(r.experiment).length)
+);
+const deltaW = Math.max(
+  'Δ'.length,
+  ...rows.map((r) => {
+    const sign = r.delta > 0 ? '+' : '';
+
+    return `${sign}${r.delta.toFixed(1)}%`.length;
+  })
+);
+
+// Print table
+const pad = (s, w, right) => (right ? s.padStart(w) : s.padEnd(w));
+
+console.log();
+console.log(
+  `   ${pad('Benchmark', nameW)}   ${pad('Control (p50)', ctrlW, true)}   ${pad('Experiment (p50)', expW, true)}   ${pad('Δ', deltaW, true)}`
+);
+console.log(
+  `   ${'─'.repeat(nameW)}   ${'─'.repeat(ctrlW)}   ${'─'.repeat(expW)}   ${'─'.repeat(deltaW)}`
+);
+
+for (const row of rows) {
+  const emoji = deltaEmoji(row.delta);
+  const sign = row.delta > 0 ? '+' : '';
+  const deltaStr = `${sign}${row.delta.toFixed(1)}%`;
+
+  console.log(
+    `${emoji} ${pad(row.name, nameW)}   ${pad(formatTime(row.control), ctrlW, true)}   ${pad(formatTime(row.experiment), expW, true)}   ${pad(deltaStr, deltaW, true)}`
+  );
+}
+
+console.log();
+console.log('🟢 faster · 🔴 slower · 🟡 within 5% · ⚪ within 1%');
+console.log();
diff --git a/scripts/local-bench-summary.sh b/scripts/local-bench-summary.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+# Check CPU tuning on Linux — poor settings cause massive variance
+hw_warnings=""
+
+if [ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then
+  gov=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
+  if [ "$gov" != "performance" ]; then
+    hw_warnings+="⚠️  CPU governor is '$gov' — benchmark results will be noisy.
+   Fix with: sudo cpupower frequency-set -g performance
+"
+  fi
+fi
+
+if [ -f /sys/devices/system/cpu/cpufreq/boost ]; then
+  boost=$(cat /sys/devices/system/cpu/cpufreq/boost)
+  if [ "$boost" = "1" ]; then
+    hw_warnings+="⚠️  CPU boost is enabled — frequency varies with thermals.
+   Fix with: echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost
+"
+  fi
+fi
+
+if [ -n "$hw_warnings" ]; then
+  echo ""
+  echo "$hw_warnings"
+fi
+
+export BENCH_JSON_OUTPUT=./bench-results.json
+
+pnpm bench:compare
+
+echo ""
+echo "━━━ Summary ━━━"
+node scripts/format-bench-cli.mjs
+
+if [ -n "$hw_warnings" ]; then
+  echo "$hw_warnings"
+fi
diff --git a/tests/parser.bench.mjs b/tests/parser.bench.mjs
@@ -89,6 +89,28 @@ const PARSERS = [
 
 const SIZES = ['small', 'medium', 'large'];
 
+// ---------------------------------------------------------------------------
+// JIT warm-up — parse every fixture with both parsers so V8 compiles and
+// optimises the hot paths before any measurement begins.  Without this, the
+// first-to-run parser pays the JIT compilation cost, creating order bias.
+// ---------------------------------------------------------------------------
+
+const WARMUP_ROUNDS = 5;
+
+for (const { type, ext, experimentParse, controlParse } of PARSERS) {
+  for (const size of SIZES) {
+    const code = FIXTURES[type][size];
+    const opts = { ...PARSE_OPTIONS, filePath: `${size}${ext}` };
+
+    for (let i = 0; i < WARMUP_ROUNDS; i++) {
+      experimentParse(code, opts);
+      controlParse?.(code, opts);
+    }
+  }
+}
+
+globalThis.gc?.();
+
 for (const { type, ext, experimentParse, controlParse } of PARSERS) {
   for (const size of SIZES) {
     const code = FIXTURES[type][size];