-
Notifications
You must be signed in to change notification settings - Fork 286
perf(core): build-time V8 code cache for workflow bundles #2523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| --- | ||
| '@workflow/core': patch | ||
| '@workflow/builders': patch | ||
| '@workflow/next': patch | ||
| --- | ||
|
|
||
| Ship a build-time V8 code cache alongside large workflow bundles so a cold serverless instance skips parsing the bundle on its first replay, cutting time-to-first-step. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,5 @@ | ||
| import { Script } from 'node:vm'; | ||
|
|
||
| const QUEUE_NAMESPACE_PATTERN = /^[a-z][a-z0-9]*$/; | ||
|
|
||
| function resolveQueueNamespace(namespace?: string): string | undefined { | ||
|
|
@@ -67,6 +69,76 @@ export function createWorkflowEntrypointOptionsCode(options?: { | |
| return `, { namespace: ${JSON.stringify(namespace)} }`; | ||
| } | ||
|
|
||
| /** | ||
| * Variable name a generated route uses to hold the build-time V8 code cache. | ||
| */ | ||
| const WORKFLOW_CODE_CACHED_DATA_VAR = '__workflowCodeCachedData'; | ||
|
|
||
| /** | ||
| * Bundles below this size parse in ~1ms, so embedding a base64 code cache | ||
| * (which roughly doubles the route's source size) is not worth it. Above it, | ||
| * the cold-instance parse — the dominant first-replay cost — grows ~linearly | ||
| * (~30ms/MB), and the cache skips it. | ||
| */ | ||
| const MIN_CODE_CACHE_BYTES = 256 * 1024; | ||
|
|
||
| /** | ||
| * Builds the trailing pieces a generated flow route needs to hand a build-time | ||
| * V8 code cache to `workflowEntrypoint`: | ||
| * - `cachedDataDecl`: a `const __workflowCodeCachedData = "<base64>";` line to | ||
| * emit right after `const workflowCode = ...` (empty when no cache). | ||
| * - `secondArg`: the full `, { namespace, cachedData }` argument string. | ||
| * | ||
| * The cache (`Script.createCachedData()`) lets a fresh serverless instance skip | ||
| * parsing the bundle on its first replay. Skipped for small bundles and when | ||
| * `WORKFLOW_DISABLE_BUNDLE_CODE_CACHE=1`. Generation failure degrades silently | ||
| * to no cache; at runtime V8 also validates the blob and falls back to a full | ||
| * parse on any mismatch, so this is never a correctness risk. | ||
| * | ||
| * Only for production (bundle-embedded) routes — dev/watch routes that read the | ||
| * bundle from disk should keep using `createWorkflowEntrypointOptionsCode`. | ||
| */ | ||
| export function createWorkflowEntrypointArgs( | ||
| workflowCode: string, | ||
| options?: { namespace?: string } | ||
| ): { cachedDataDecl: string; secondArg: string } { | ||
| const namespace = resolveQueueNamespace(options?.namespace); | ||
| if (namespace) { | ||
| // Reuse prefix construction for namespace validation. | ||
| getQueueTopicPrefix('workflow', namespace); | ||
| } | ||
|
|
||
| let cachedDataB64 = ''; | ||
| if ( | ||
| process.env.WORKFLOW_DISABLE_BUNDLE_CODE_CACHE !== '1' && | ||
| workflowCode.length >= MIN_CODE_CACHE_BYTES | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor: |
||
| ) { | ||
| try { | ||
| const script = new Script(workflowCode, { | ||
| filename: 'workflow-bundle.js', | ||
| }); | ||
| cachedDataB64 = script.createCachedData().toString('base64'); | ||
| } catch { | ||
| cachedDataB64 = ''; | ||
| } | ||
| } | ||
|
|
||
| const entries: string[] = []; | ||
| if (namespace) { | ||
| entries.push(`namespace: ${JSON.stringify(namespace)}`); | ||
| } | ||
| if (cachedDataB64) { | ||
| entries.push(`cachedData: ${WORKFLOW_CODE_CACHED_DATA_VAR}`); | ||
| } | ||
|
|
||
| return { | ||
| cachedDataDecl: cachedDataB64 | ||
| ? `const ${WORKFLOW_CODE_CACHED_DATA_VAR} = ${JSON.stringify(cachedDataB64)};\n` | ||
| : '', | ||
| secondArg: entries.length ? `, { ${entries.join(', ')} }` : '', | ||
| }; | ||
| } | ||
|
|
||
| /** | ||
| * Default queue trigger (no namespace). Backward compatible. | ||
| */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| import { type Context, Script } from 'node:vm'; | ||
| import { runtimeLogger as scriptCacheLogger } from '../logger.js'; | ||
|
|
||
| /** | ||
| * Module-level cache of compiled workflow-bundle `vm.Script` objects. | ||
|
|
@@ -100,7 +101,8 @@ function touchBundle(code: string): Map<string, Script> | undefined { | |
| */ | ||
| export function getCachedWorkflowScript( | ||
| code: string, | ||
| filename: string | ||
| filename: string, | ||
| cachedData?: Buffer | ||
| ): Script { | ||
| let byFilename = touchBundle(code); | ||
| if (byFilename === undefined) { | ||
|
|
@@ -118,22 +120,45 @@ export function getCachedWorkflowScript( | |
| } | ||
| let script = byFilename.get(filename); | ||
| if (script === undefined) { | ||
| script = new Script(code, { filename }); | ||
| // `cachedData` (a V8 code cache produced at build time via | ||
| // `Script.createCachedData()`) lets V8 skip parsing the bundle on the first | ||
| // compile in this process — the dominant cost on a cold serverless instance | ||
| // for large bundles. The in-process cache above only helps subsequent | ||
| // replays in the *same* process; the code cache helps the very first one. | ||
| // V8 validates the blob against its version/flags and the source; on any | ||
| // mismatch it sets `cachedDataRejected` and transparently falls back to a | ||
| // full parse, so a stale or wrong-Node-version blob is never a correctness | ||
| // risk — only a missed optimization. `cachedData` is independent of | ||
| // `filename`, so a single build-time blob is valid for every per-workflow | ||
| // filename the bundle is compiled under. | ||
| script = cachedData | ||
| ? new Script(code, { filename, cachedData }) | ||
| : new Script(code, { filename }); | ||
| if (cachedData && script.cachedDataRejected) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Observability gap: a rejected blob degrades silently, which is correct, but a silent |
||
| scriptCacheLogger.debug( | ||
| 'Workflow bundle code cache rejected; falling back to full parse', | ||
| { filename } | ||
| ); | ||
| } | ||
| byFilename.set(filename, script); | ||
| } | ||
| return script; | ||
| } | ||
|
|
||
| /** | ||
| * Runs the cached workflow-bundle `Script` against `context`. Compiles and | ||
| * caches the `Script` on first use for the given `(code, filename)`. | ||
| * caches the `Script` on first use for the given `(code, filename)`. When | ||
| * `cachedData` is provided, it is used to skip parsing on the first compile. | ||
| */ | ||
| export function runCachedWorkflowScript( | ||
| code: string, | ||
| filename: string, | ||
| context: Context | ||
| context: Context, | ||
| cachedData?: Buffer | ||
| ): unknown { | ||
| return getCachedWorkflowScript(code, filename).runInContext(context); | ||
| return getCachedWorkflowScript(code, filename, cachedData).runInContext( | ||
| context | ||
| ); | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
createWorkflowEntrypointArgs(workflowBundleCode)runs unconditionally here — before theif (!bundleFinalOutput)check below — but every current caller ofcreateWorkflowsBundlepassesbundleFinalOutput: false(createCombinedBundleat L1353 and the Next deferred builder at builder-deferred.ts:619). In that mode thisbundleFinalwritesworkflowFunctionCodeto a.__wf_tmp.jsfile that the caller deletes immediately (L1370 / builder-deferred.ts:636) and then regenerates its own route + cache from the same bundle (L1379 / builder-deferred.ts:691).So for a >256KB bundle, V8
new Script()+createCachedData()runs twice per build and one result is thrown away (~300-400ms of avoidable build cost on an 8MB bundle). Consider gating the cache generation onbundleFinalOutput— the throwaway intermediate route never needs a code cache.