From fd621b11677841e342172bb55f58e4318dd82503 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Thu, 18 Jun 2026 14:28:01 -0700 Subject: [PATCH] perf(core): ship build-time V8 code cache for workflow bundles On a cold serverless instance the first replay parses the entire workflow bundle before it can reach the first step. The in-process vm.Script cache (#2471) only helps subsequent replays in the same process; the cold first one still pays a full parse, which grows ~30ms/MB and dominates time-to-first-step for large bundles. Producer (builders/next): for bundles over a size threshold, compile the bundle at build time and emit Script.createCachedData() as base64 in the generated flow route, passed via workflowEntrypoint's cachedData option. Skipped for small bundles (parse is ~1ms; the blob would just bloat the artifact) and via WORKFLOW_DISABLE_BUNDLE_CODE_CACHE. Consumer (core): getCachedWorkflowScript compiles with the cachedData so V8 skips parsing on the first compile. The blob is filename-independent so one build-time cache serves every per-workflow filename; V8 validates it and falls back to a full parse on any version/source mismatch, so a stale blob is never a correctness risk. Measured fresh-process parse: 2MB 34->4.8ms, 8MB 147->21ms (~7x). Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/workflow-vm-compile-cache.md | 7 +++ packages/builders/src/base-builder.ts | 21 ++++--- packages/builders/src/constants.test.ts | 64 ++++++++++++++++++++ packages/builders/src/constants.ts | 72 +++++++++++++++++++++++ packages/builders/src/index.ts | 1 + packages/core/src/runtime.ts | 13 +++- packages/core/src/vm/script-cache.test.ts | 57 ++++++++++++++++++ packages/core/src/vm/script-cache.ts | 35 +++++++++-- packages/core/src/workflow.ts | 13 +++- packages/next/src/builder-deferred.ts | 7 ++- 10 files changed, 270 insertions(+), 20 deletions(-) create mode 100644 .changeset/workflow-vm-compile-cache.md diff --git a/.changeset/workflow-vm-compile-cache.md b/.changeset/workflow-vm-compile-cache.md new file mode 100644 index 0000000000..35d79583ea --- /dev/null +++ b/.changeset/workflow-vm-compile-cache.md @@ -0,0 +1,7 @@ +--- +'@workflow/core': patch +'@workflow/builders': patch +'@workflow/next': patch +--- + +Ship a build-time V8 code cache alongside large workflow bundles so a cold serverless instance skips parsing the bundle on its first replay, cutting time-to-first-step. diff --git a/packages/builders/src/base-builder.ts b/packages/builders/src/base-builder.ts index a4ce3f703a..344fac89ab 100644 --- a/packages/builders/src/base-builder.ts +++ b/packages/builders/src/base-builder.ts @@ -14,7 +14,10 @@ import { applySwcTransform, type WorkflowManifest, } from './apply-swc-transform.js'; -import { createWorkflowEntrypointOptionsCode } from './constants.js'; +import { + createWorkflowEntrypointArgs, + createWorkflowEntrypointOptionsCode, +} from './constants.js'; import { createDiscoverEntriesPlugin } from './discover-entries-esbuild-plugin.js'; import { getEsbuildTsconfigOptions } from './esbuild-tsconfig.js'; import { @@ -1182,19 +1185,18 @@ export abstract class BaseBuilder { } } - const workflowEntrypointOptionsCode = - createWorkflowEntrypointOptionsCode(); - const bundleFinal = async (interimBundle: string) => { const workflowBundleCode = interimBundle; + const { cachedDataDecl, secondArg } = + createWorkflowEntrypointArgs(workflowBundleCode); const workflowFunctionCode = `// biome-ignore-all lint: generated file /* eslint-disable */ import { workflowEntrypoint } from 'workflow/runtime'; const workflowCode = \`${workflowBundleCode.replace(/[\\`$]/g, '\\$&')}\`; - -export const POST = workflowEntrypoint(workflowCode${workflowEntrypointOptionsCode});`; +${cachedDataDecl} +export const POST = workflowEntrypoint(workflowCode${secondArg});`; // we skip the final bundling step for Next.js so it can bundle itself if (!bundleFinalOutput) { @@ -1373,7 +1375,8 @@ export const POST = workflowEntrypoint(workflowCode${workflowEntrypointOptionsCo // 3. Generate combined route file const stepsRelativePath = './' + basename(stepsOutfile).replace(/\\/g, '/'); const escapedVMCode = workflowVMCode.replace(/[\\`$]/g, '\\$&'); - const workflowEntrypointOptionsCode = createWorkflowEntrypointOptionsCode(); + const { cachedDataDecl, secondArg } = + createWorkflowEntrypointArgs(workflowVMCode); const combinedFunctionCode = `// biome-ignore-all lint: generated file /* eslint-disable */ @@ -1384,8 +1387,8 @@ import { workflowEntrypoint } from 'workflow/runtime'; void __steps_registered; const workflowCode = \`${escapedVMCode}\`; - -export const POST = workflowEntrypoint(workflowCode${workflowEntrypointOptionsCode});`; +${cachedDataDecl} +export const POST = workflowEntrypoint(workflowCode${secondArg});`; if (!bundleFinalOutput) { // Write directly (Next.js will bundle) diff --git a/packages/builders/src/constants.test.ts b/packages/builders/src/constants.test.ts index 2474a7e1a5..00f2fd1d05 100644 --- a/packages/builders/src/constants.test.ts +++ b/packages/builders/src/constants.test.ts @@ -1,9 +1,22 @@ +import { Script } from 'node:vm'; import { afterEach, describe, expect, it } from 'vitest'; import { + createWorkflowEntrypointArgs, createWorkflowEntrypointOptionsCode, createWorkflowQueueTrigger, } from './constants.js'; +// A bundle comfortably over the 256KB code-cache threshold. +function largeBundle(): string { + const lines: string[] = ['globalThis.__private_workflows = new Map();']; + for (let i = 0; i < 8000; i++) { + lines.push( + `globalThis.__private_workflows.set('app/wf-${i}', async function wf${i}(a){ return a*${i}+${i}; });` + ); + } + return lines.join('\n'); +} + describe('createWorkflowQueueTrigger', () => { afterEach(() => { delete process.env.WORKFLOW_QUEUE_NAMESPACE; @@ -49,3 +62,54 @@ describe('createWorkflowEntrypointOptionsCode', () => { ); }); }); + +describe('createWorkflowEntrypointArgs', () => { + afterEach(() => { + delete process.env.WORKFLOW_QUEUE_NAMESPACE; + delete process.env.WORKFLOW_DISABLE_BUNDLE_CODE_CACHE; + }); + + it('omits the code cache for small bundles', () => { + const { cachedDataDecl, secondArg } = createWorkflowEntrypointArgs( + 'const workflowCode = 1;' + ); + expect(cachedDataDecl).toBe(''); + expect(secondArg).toBe(''); + }); + + it('emits a usable code cache for large bundles', () => { + const bundle = largeBundle(); + const { cachedDataDecl, secondArg } = createWorkflowEntrypointArgs(bundle); + + expect(cachedDataDecl).toMatch( + /^const __workflowCodeCachedData = ".+";\n$/ + ); + expect(secondArg).toBe(', { cachedData: __workflowCodeCachedData }'); + + // The emitted base64 must be a V8 code cache that the runtime accepts for + // the same source — proving the producer/consumer contract round-trips. + const match = cachedDataDecl.match(/"([^"]+)"/); + expect(match).not.toBeNull(); + const cachedData = Buffer.from(match?.[1] ?? '', 'base64'); + const script = new Script(bundle, { filename: 'rt.js', cachedData }); + expect(script.cachedDataRejected).toBe(false); + }); + + it('combines namespace and code cache', () => { + const { secondArg } = createWorkflowEntrypointArgs(largeBundle(), { + namespace: 'custom', + }); + expect(secondArg).toBe( + ', { namespace: "custom", cachedData: __workflowCodeCachedData }' + ); + }); + + it('respects WORKFLOW_DISABLE_BUNDLE_CODE_CACHE', () => { + process.env.WORKFLOW_DISABLE_BUNDLE_CODE_CACHE = '1'; + const { cachedDataDecl, secondArg } = createWorkflowEntrypointArgs( + largeBundle() + ); + expect(cachedDataDecl).toBe(''); + expect(secondArg).toBe(''); + }); +}); diff --git a/packages/builders/src/constants.ts b/packages/builders/src/constants.ts index 0f88e13e50..f03d00fa4d 100644 --- a/packages/builders/src/constants.ts +++ b/packages/builders/src/constants.ts @@ -1,3 +1,5 @@ +import { Script } from 'node:vm'; + const QUEUE_NAMESPACE_PATTERN = /^[a-z][a-z0-9]*$/; function resolveQueueNamespace(namespace?: string): string | undefined { @@ -67,6 +69,76 @@ export function createWorkflowEntrypointOptionsCode(options?: { return `, { namespace: ${JSON.stringify(namespace)} }`; } +/** + * Variable name a generated route uses to hold the build-time V8 code cache. + */ +const WORKFLOW_CODE_CACHED_DATA_VAR = '__workflowCodeCachedData'; + +/** + * Bundles below this size parse in ~1ms, so embedding a base64 code cache + * (which roughly doubles the route's source size) is not worth it. Above it, + * the cold-instance parse — the dominant first-replay cost — grows ~linearly + * (~30ms/MB), and the cache skips it. + */ +const MIN_CODE_CACHE_BYTES = 256 * 1024; + +/** + * Builds the trailing pieces a generated flow route needs to hand a build-time + * V8 code cache to `workflowEntrypoint`: + * - `cachedDataDecl`: a `const __workflowCodeCachedData = "";` line to + * emit right after `const workflowCode = ...` (empty when no cache). + * - `secondArg`: the full `, { namespace, cachedData }` argument string. + * + * The cache (`Script.createCachedData()`) lets a fresh serverless instance skip + * parsing the bundle on its first replay. Skipped for small bundles and when + * `WORKFLOW_DISABLE_BUNDLE_CODE_CACHE=1`. Generation failure degrades silently + * to no cache; at runtime V8 also validates the blob and falls back to a full + * parse on any mismatch, so this is never a correctness risk. + * + * Only for production (bundle-embedded) routes — dev/watch routes that read the + * bundle from disk should keep using `createWorkflowEntrypointOptionsCode`. + */ +export function createWorkflowEntrypointArgs( + workflowCode: string, + options?: { namespace?: string } +): { cachedDataDecl: string; secondArg: string } { + const namespace = resolveQueueNamespace(options?.namespace); + if (namespace) { + // Reuse prefix construction for namespace validation. + getQueueTopicPrefix('workflow', namespace); + } + + let cachedDataB64 = ''; + if ( + process.env.WORKFLOW_DISABLE_BUNDLE_CODE_CACHE !== '1' && + workflowCode.length >= MIN_CODE_CACHE_BYTES + ) { + try { + const script = new Script(workflowCode, { + filename: 'workflow-bundle.js', + }); + cachedDataB64 = script.createCachedData().toString('base64'); + } catch { + cachedDataB64 = ''; + } + } + + const entries: string[] = []; + if (namespace) { + entries.push(`namespace: ${JSON.stringify(namespace)}`); + } + if (cachedDataB64) { + entries.push(`cachedData: ${WORKFLOW_CODE_CACHED_DATA_VAR}`); + } + + return { + cachedDataDecl: cachedDataB64 + ? `const ${WORKFLOW_CODE_CACHED_DATA_VAR} = ${JSON.stringify(cachedDataB64)};\n` + : '', + secondArg: entries.length ? `, { ${entries.join(', ')} }` : '', + }; +} + /** * Default queue trigger (no namespace). Backward compatible. */ diff --git a/packages/builders/src/index.ts b/packages/builders/src/index.ts index a1ec9443ea..d41cfea3b7 100644 --- a/packages/builders/src/index.ts +++ b/packages/builders/src/index.ts @@ -10,6 +10,7 @@ export { getDecoratorOptionsForDirectoryWithConfigPath, } from './config-helpers.js'; export { + createWorkflowEntrypointArgs, createWorkflowEntrypointOptionsCode, createWorkflowQueueTrigger, WORKFLOW_QUEUE_TRIGGER, diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index c378664b43..754dc9b749 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -282,11 +282,19 @@ function hasOpenHookOrWait(events: Event[]): boolean { */ export function workflowEntrypoint( workflowCode: string, - options?: { namespace?: string } + options?: { namespace?: string; cachedData?: string } ): (req: Request) => Promise { const NO_INLINE_REPLAY_AFTER_MS = Number(process.env.WORKFLOW_V2_TIMEOUT_MS) || 120_000; + // Decode the build-time V8 code cache (base64) once per process. Passed to + // every replay so the first bundle compile in a fresh process can skip + // parsing (see `runWorkflow` / `getCachedWorkflowScript`). Undefined when the + // build emitted no cache (e.g. dev, or a bundle below the size threshold). + const workflowCodeCachedData = options?.cachedData + ? Buffer.from(options.cachedData, 'base64') + : undefined; + const namespace = resolveQueueNamespace(options?.namespace); const workflowPrefix = getQueueTopicPrefix('workflow', namespace); @@ -1074,7 +1082,8 @@ export function workflowEntrypoint( workflowCode, workflowRun, events, - encryptionKey + encryptionKey, + workflowCodeCachedData ); runtimeLogger.debug('Workflow replay completed', { workflowRunId: runId, diff --git a/packages/core/src/vm/script-cache.test.ts b/packages/core/src/vm/script-cache.test.ts index 399f6b2c69..fb00fef140 100644 --- a/packages/core/src/vm/script-cache.test.ts +++ b/packages/core/src/vm/script-cache.test.ts @@ -199,4 +199,61 @@ describe('script-cache', () => { ) as (n: string) => Promise; expect(await fnY('z')).toContain('bundle-Y:3:z'); }); + + describe('build-time code cache (cachedData)', () => { + it('accepts a matching code cache and still produces the right workflow', async () => { + const bundle = buildBundle('cached-ok'); + // Build-time step: compile once and capture the V8 code cache. + const { Script } = await import('node:vm'); + const cachedData = new Script(bundle, { + filename: 'build.js', + }).createCachedData(); + + // Runtime: compiling with the cache must not reject it... + const script = getCachedWorkflowScript(bundle, 'app/file.js', cachedData); + expect(script.cachedDataRejected).toBe(false); + + // ...and the workflow it registers still runs correctly. + const { context } = createContext({ seed, fixedTimestamp }); + script.runInContext(context); + const fn = runInContext( + `globalThis.__private_workflows?.get('app/workflow-5')`, + context + ) as (n: string) => Promise; + expect(await fn('q')).toContain('cached-ok:5:q'); + }); + + it('falls back to a full parse when the code cache is corrupt', async () => { + const bundle = buildBundle('cached-corrupt'); + const corrupt = Buffer.from('not a real v8 code cache blob'); + + // A rejected cache must not throw and must still compile correctly. + const script = getCachedWorkflowScript(bundle, 'app/file.js', corrupt); + expect(script.cachedDataRejected).toBe(true); + + const { context } = createContext({ seed, fixedTimestamp }); + script.runInContext(context); + const fn = runInContext( + `globalThis.__private_workflows?.get('app/workflow-2')`, + context + ) as (n: string) => Promise; + expect(await fn('q')).toContain('cached-corrupt:2:q'); + }); + + it('is independent of filename: a cache built under one filename is accepted under another', async () => { + const bundle = buildBundle('cached-fname'); + const { Script } = await import('node:vm'); + const cachedData = new Script(bundle, { + filename: 'workflow//./a//wfA', + }).createCachedData(); + + // Runtime compiles under a different per-workflow filename. + const script = getCachedWorkflowScript( + bundle, + 'workflow//./b//wfB', + cachedData + ); + expect(script.cachedDataRejected).toBe(false); + }); + }); }); diff --git a/packages/core/src/vm/script-cache.ts b/packages/core/src/vm/script-cache.ts index d23bbc1624..a85d1e3780 100644 --- a/packages/core/src/vm/script-cache.ts +++ b/packages/core/src/vm/script-cache.ts @@ -1,4 +1,5 @@ import { type Context, Script } from 'node:vm'; +import { runtimeLogger as scriptCacheLogger } from '../logger.js'; /** * Module-level cache of compiled workflow-bundle `vm.Script` objects. @@ -100,7 +101,8 @@ function touchBundle(code: string): Map | undefined { */ export function getCachedWorkflowScript( code: string, - filename: string + filename: string, + cachedData?: Buffer ): Script { let byFilename = touchBundle(code); if (byFilename === undefined) { @@ -118,7 +120,26 @@ export function getCachedWorkflowScript( } let script = byFilename.get(filename); if (script === undefined) { - script = new Script(code, { filename }); + // `cachedData` (a V8 code cache produced at build time via + // `Script.createCachedData()`) lets V8 skip parsing the bundle on the first + // compile in this process — the dominant cost on a cold serverless instance + // for large bundles. The in-process cache above only helps subsequent + // replays in the *same* process; the code cache helps the very first one. + // V8 validates the blob against its version/flags and the source; on any + // mismatch it sets `cachedDataRejected` and transparently falls back to a + // full parse, so a stale or wrong-Node-version blob is never a correctness + // risk — only a missed optimization. `cachedData` is independent of + // `filename`, so a single build-time blob is valid for every per-workflow + // filename the bundle is compiled under. + script = cachedData + ? new Script(code, { filename, cachedData }) + : new Script(code, { filename }); + if (cachedData && script.cachedDataRejected) { + scriptCacheLogger.debug( + 'Workflow bundle code cache rejected; falling back to full parse', + { filename } + ); + } byFilename.set(filename, script); } return script; @@ -126,14 +147,18 @@ export function getCachedWorkflowScript( /** * Runs the cached workflow-bundle `Script` against `context`. Compiles and - * caches the `Script` on first use for the given `(code, filename)`. + * caches the `Script` on first use for the given `(code, filename)`. When + * `cachedData` is provided, it is used to skip parsing on the first compile. */ export function runCachedWorkflowScript( code: string, filename: string, - context: Context + context: Context, + cachedData?: Buffer ): unknown { - return getCachedWorkflowScript(code, filename).runInContext(context); + return getCachedWorkflowScript(code, filename, cachedData).runInContext( + context + ); } /** diff --git a/packages/core/src/workflow.ts b/packages/core/src/workflow.ts index 6d3af6a89f..23681c26aa 100644 --- a/packages/core/src/workflow.ts +++ b/packages/core/src/workflow.ts @@ -124,7 +124,11 @@ export async function runWorkflow( workflowCode: string, workflowRun: WorkflowRun, events: Event[], - encryptionKey: CryptoKey | undefined + encryptionKey: CryptoKey | undefined, + // Optional V8 code cache for `workflowCode`, produced at build time. Lets the + // first compile in a fresh process skip parsing the bundle (see + // `getCachedWorkflowScript`). Omitted in dev / when no cache was emitted. + workflowCodeCachedData?: Buffer ): Promise { return trace(`workflow.run ${workflowRun.workflowName}`, async (span) => { span?.setAttributes({ @@ -787,7 +791,12 @@ export async function runWorkflow( // Script rather than the line just past the end of the bundle. That path // is rare (it requires the lookup `?.get(...)` expression to throw) and // does not affect the workflow function or replay determinism. - runCachedWorkflowScript(workflowCode, filename, context); + runCachedWorkflowScript( + workflowCode, + filename, + context, + workflowCodeCachedData + ); const workflowFn = runCachedWorkflowScript( `globalThis.__private_workflows?.get(${JSON.stringify(workflowRun.workflowName)})`, filename, diff --git a/packages/next/src/builder-deferred.ts b/packages/next/src/builder-deferred.ts index 0b62c6cd8d..d2a92c792e 100644 --- a/packages/next/src/builder-deferred.ts +++ b/packages/next/src/builder-deferred.ts @@ -53,6 +53,7 @@ export async function getNextBuilderDeferred() { const { BaseBuilder: BaseBuilderClass, WORKFLOW_QUEUE_TRIGGER, + createWorkflowEntrypointArgs, createWorkflowEntrypointOptionsCode, detectWorkflowPatterns, applySwcTransform, @@ -686,6 +687,8 @@ export async function POST(req) { return (await getWorkflowHandler())(req); }`; } else { + const { cachedDataDecl, secondArg } = + createWorkflowEntrypointArgs(workflowVMCode); routeCode = `// biome-ignore-all lint: generated file /* eslint-disable */ import 'workflow/internal/builtins'; @@ -693,8 +696,8 @@ ${stepImports} import { workflowEntrypoint } from 'workflow/runtime'; const workflowCode = \`${escapedVMCode}\`; - -export const POST = workflowEntrypoint(workflowCode${workflowEntrypointOptionsCode});`; +${cachedDataDecl} +export const POST = workflowEntrypoint(workflowCode${secondArg});`; } await this.writeFileIfChanged(flowOutfile, routeCode);