Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/workflow-vm-compile-cache.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
'@workflow/core': patch
'@workflow/builders': patch
'@workflow/next': patch
---

Ship a build-time V8 code cache alongside large workflow bundles so a cold serverless instance skips parsing the bundle on its first replay, cutting time-to-first-step.
21 changes: 12 additions & 9 deletions packages/builders/src/base-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ import {
applySwcTransform,
type WorkflowManifest,
} from './apply-swc-transform.js';
import { createWorkflowEntrypointOptionsCode } from './constants.js';
import {
createWorkflowEntrypointArgs,
createWorkflowEntrypointOptionsCode,
} from './constants.js';
import { createDiscoverEntriesPlugin } from './discover-entries-esbuild-plugin.js';
import { getEsbuildTsconfigOptions } from './esbuild-tsconfig.js';
import {
Expand Down Expand Up @@ -1182,19 +1185,18 @@ export abstract class BaseBuilder {
}
}

const workflowEntrypointOptionsCode =
createWorkflowEntrypointOptionsCode();

const bundleFinal = async (interimBundle: string) => {
const workflowBundleCode = interimBundle;
const { cachedDataDecl, secondArg } =
createWorkflowEntrypointArgs(workflowBundleCode);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

createWorkflowEntrypointArgs(workflowBundleCode) runs unconditionally here — before the if (!bundleFinalOutput) check below — but every current caller of createWorkflowsBundle passes bundleFinalOutput: false (createCombinedBundle at L1353 and the Next deferred builder at builder-deferred.ts:619). In that mode this bundleFinal writes workflowFunctionCode to a .__wf_tmp.js file that the caller deletes immediately (L1370 / builder-deferred.ts:636) and then regenerates its own route + cache from the same bundle (L1379 / builder-deferred.ts:691).

So for a >256KB bundle, V8 new Script() + createCachedData() runs twice per build and one result is thrown away (~300-400ms of avoidable build cost on an 8MB bundle). Consider gating the cache generation on bundleFinalOutput — the throwaway intermediate route never needs a code cache.


const workflowFunctionCode = `// biome-ignore-all lint: generated file
/* eslint-disable */
import { workflowEntrypoint } from 'workflow/runtime';

const workflowCode = \`${workflowBundleCode.replace(/[\\`$]/g, '\\$&')}\`;

export const POST = workflowEntrypoint(workflowCode${workflowEntrypointOptionsCode});`;
${cachedDataDecl}
export const POST = workflowEntrypoint(workflowCode${secondArg});`;

// we skip the final bundling step for Next.js so it can bundle itself
if (!bundleFinalOutput) {
Expand Down Expand Up @@ -1373,7 +1375,8 @@ export const POST = workflowEntrypoint(workflowCode${workflowEntrypointOptionsCo
// 3. Generate combined route file
const stepsRelativePath = './' + basename(stepsOutfile).replace(/\\/g, '/');
const escapedVMCode = workflowVMCode.replace(/[\\`$]/g, '\\$&');
const workflowEntrypointOptionsCode = createWorkflowEntrypointOptionsCode();
const { cachedDataDecl, secondArg } =
createWorkflowEntrypointArgs(workflowVMCode);

const combinedFunctionCode = `// biome-ignore-all lint: generated file
/* eslint-disable */
Expand All @@ -1384,8 +1387,8 @@ import { workflowEntrypoint } from 'workflow/runtime';
void __steps_registered;

const workflowCode = \`${escapedVMCode}\`;

export const POST = workflowEntrypoint(workflowCode${workflowEntrypointOptionsCode});`;
${cachedDataDecl}
export const POST = workflowEntrypoint(workflowCode${secondArg});`;

if (!bundleFinalOutput) {
// Write directly (Next.js will bundle)
Expand Down
64 changes: 64 additions & 0 deletions packages/builders/src/constants.test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
import { Script } from 'node:vm';
import { afterEach, describe, expect, it } from 'vitest';
import {
createWorkflowEntrypointArgs,
createWorkflowEntrypointOptionsCode,
createWorkflowQueueTrigger,
} from './constants.js';

// A bundle comfortably over the 256KB code-cache threshold.
function largeBundle(): string {
const lines: string[] = ['globalThis.__private_workflows = new Map();'];
for (let i = 0; i < 8000; i++) {
lines.push(
`globalThis.__private_workflows.set('app/wf-${i}', async function wf${i}(a){ return a*${i}+${i}; });`
);
}
return lines.join('\n');
}

describe('createWorkflowQueueTrigger', () => {
afterEach(() => {
delete process.env.WORKFLOW_QUEUE_NAMESPACE;
Expand Down Expand Up @@ -49,3 +62,54 @@ describe('createWorkflowEntrypointOptionsCode', () => {
);
});
});

describe('createWorkflowEntrypointArgs', () => {
afterEach(() => {
delete process.env.WORKFLOW_QUEUE_NAMESPACE;
delete process.env.WORKFLOW_DISABLE_BUNDLE_CODE_CACHE;
});

it('omits the code cache for small bundles', () => {
const { cachedDataDecl, secondArg } = createWorkflowEntrypointArgs(
'const workflowCode = 1;'
);
expect(cachedDataDecl).toBe('');
expect(secondArg).toBe('');
});

it('emits a usable code cache for large bundles', () => {
const bundle = largeBundle();
const { cachedDataDecl, secondArg } = createWorkflowEntrypointArgs(bundle);

expect(cachedDataDecl).toMatch(
/^const __workflowCodeCachedData = ".+";\n$/
);
expect(secondArg).toBe(', { cachedData: __workflowCodeCachedData }');

// The emitted base64 must be a V8 code cache that the runtime accepts for
// the same source — proving the producer/consumer contract round-trips.
const match = cachedDataDecl.match(/"([^"]+)"/);
expect(match).not.toBeNull();
const cachedData = Buffer.from(match?.[1] ?? '', 'base64');
const script = new Script(bundle, { filename: 'rt.js', cachedData });
expect(script.cachedDataRejected).toBe(false);
});

it('combines namespace and code cache', () => {
const { secondArg } = createWorkflowEntrypointArgs(largeBundle(), {
namespace: 'custom',
});
expect(secondArg).toBe(
', { namespace: "custom", cachedData: __workflowCodeCachedData }'
);
});

it('respects WORKFLOW_DISABLE_BUNDLE_CODE_CACHE', () => {
process.env.WORKFLOW_DISABLE_BUNDLE_CODE_CACHE = '1';
const { cachedDataDecl, secondArg } = createWorkflowEntrypointArgs(
largeBundle()
);
expect(cachedDataDecl).toBe('');
expect(secondArg).toBe('');
});
});
72 changes: 72 additions & 0 deletions packages/builders/src/constants.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { Script } from 'node:vm';

const QUEUE_NAMESPACE_PATTERN = /^[a-z][a-z0-9]*$/;

function resolveQueueNamespace(namespace?: string): string | undefined {
Expand Down Expand Up @@ -67,6 +69,76 @@ export function createWorkflowEntrypointOptionsCode(options?: {
return `, { namespace: ${JSON.stringify(namespace)} }`;
}

/**
* Variable name a generated route uses to hold the build-time V8 code cache.
*/
const WORKFLOW_CODE_CACHED_DATA_VAR = '__workflowCodeCachedData';

/**
* Bundles below this size parse in ~1ms, so embedding a base64 code cache
* (which roughly doubles the route's source size) is not worth it. Above it,
* the cold-instance parse — the dominant first-replay cost — grows ~linearly
* (~30ms/MB), and the cache skips it.
*/
const MIN_CODE_CACHE_BYTES = 256 * 1024;

/**
* Builds the trailing pieces a generated flow route needs to hand a build-time
* V8 code cache to `workflowEntrypoint`:
* - `cachedDataDecl`: a `const __workflowCodeCachedData = "<base64>";` line to
* emit right after `const workflowCode = ...` (empty when no cache).
* - `secondArg`: the full `, { namespace, cachedData }` argument string.
*
* The cache (`Script.createCachedData()`) lets a fresh serverless instance skip
* parsing the bundle on its first replay. Skipped for small bundles and when
* `WORKFLOW_DISABLE_BUNDLE_CODE_CACHE=1`. Generation failure degrades silently
* to no cache; at runtime V8 also validates the blob and falls back to a full
* parse on any mismatch, so this is never a correctness risk.
*
* Only for production (bundle-embedded) routes — dev/watch routes that read the
* bundle from disk should keep using `createWorkflowEntrypointOptionsCode`.
*/
export function createWorkflowEntrypointArgs(
workflowCode: string,
options?: { namespace?: string }
): { cachedDataDecl: string; secondArg: string } {
const namespace = resolveQueueNamespace(options?.namespace);
if (namespace) {
// Reuse prefix construction for namespace validation.
getQueueTopicPrefix('workflow', namespace);
}

let cachedDataB64 = '';
if (
process.env.WORKFLOW_DISABLE_BUNDLE_CODE_CACHE !== '1' &&
workflowCode.length >= MIN_CODE_CACHE_BYTES

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: MIN_CODE_CACHE_BYTES is a byte count, but it's compared against workflowCode.length, which is UTF-16 code units, not bytes. For ASCII-ish JS bundles these are ~equal so the heuristic holds, but the name implies bytes — either rename (e.g. MIN_CODE_CACHE_CHARS) or compare Buffer.byteLength(workflowCode).

) {
try {
const script = new Script(workflowCode, {
filename: 'workflow-bundle.js',
});
cachedDataB64 = script.createCachedData().toString('base64');
} catch {
cachedDataB64 = '';
}
}

const entries: string[] = [];
if (namespace) {
entries.push(`namespace: ${JSON.stringify(namespace)}`);
}
if (cachedDataB64) {
entries.push(`cachedData: ${WORKFLOW_CODE_CACHED_DATA_VAR}`);
}

return {
cachedDataDecl: cachedDataB64
? `const ${WORKFLOW_CODE_CACHED_DATA_VAR} = ${JSON.stringify(cachedDataB64)};\n`
: '',
secondArg: entries.length ? `, { ${entries.join(', ')} }` : '',
};
}

/**
* Default queue trigger (no namespace). Backward compatible.
*/
Expand Down
1 change: 1 addition & 0 deletions packages/builders/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export {
getDecoratorOptionsForDirectoryWithConfigPath,
} from './config-helpers.js';
export {
createWorkflowEntrypointArgs,
createWorkflowEntrypointOptionsCode,
createWorkflowQueueTrigger,
WORKFLOW_QUEUE_TRIGGER,
Expand Down
13 changes: 11 additions & 2 deletions packages/core/src/runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,11 +282,19 @@ function hasOpenHookOrWait(events: Event[]): boolean {
*/
export function workflowEntrypoint(
workflowCode: string,
options?: { namespace?: string }
options?: { namespace?: string; cachedData?: string }
): (req: Request) => Promise<Response> {
const NO_INLINE_REPLAY_AFTER_MS =
Number(process.env.WORKFLOW_V2_TIMEOUT_MS) || 120_000;

// Decode the build-time V8 code cache (base64) once per process. Passed to
// every replay so the first bundle compile in a fresh process can skip
// parsing (see `runWorkflow` / `getCachedWorkflowScript`). Undefined when the
// build emitted no cache (e.g. dev, or a bundle below the size threshold).
const workflowCodeCachedData = options?.cachedData
? Buffer.from(options.cachedData, 'base64')
: undefined;

const namespace = resolveQueueNamespace(options?.namespace);
const workflowPrefix = getQueueTopicPrefix('workflow', namespace);

Expand Down Expand Up @@ -1074,7 +1082,8 @@ export function workflowEntrypoint(
workflowCode,
workflowRun,
events,
encryptionKey
encryptionKey,
workflowCodeCachedData
);
runtimeLogger.debug('Workflow replay completed', {
workflowRunId: runId,
Expand Down
57 changes: 57 additions & 0 deletions packages/core/src/vm/script-cache.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,4 +199,61 @@ describe('script-cache', () => {
) as (n: string) => Promise<string>;
expect(await fnY('z')).toContain('bundle-Y:3:z');
});

describe('build-time code cache (cachedData)', () => {
it('accepts a matching code cache and still produces the right workflow', async () => {
const bundle = buildBundle('cached-ok');
// Build-time step: compile once and capture the V8 code cache.
const { Script } = await import('node:vm');
const cachedData = new Script(bundle, {
filename: 'build.js',
}).createCachedData();

// Runtime: compiling with the cache must not reject it...
const script = getCachedWorkflowScript(bundle, 'app/file.js', cachedData);
expect(script.cachedDataRejected).toBe(false);

// ...and the workflow it registers still runs correctly.
const { context } = createContext({ seed, fixedTimestamp });
script.runInContext(context);
const fn = runInContext(
`globalThis.__private_workflows?.get('app/workflow-5')`,
context
) as (n: string) => Promise<string>;
expect(await fn('q')).toContain('cached-ok:5:q');
});

it('falls back to a full parse when the code cache is corrupt', async () => {
const bundle = buildBundle('cached-corrupt');
const corrupt = Buffer.from('not a real v8 code cache blob');

// A rejected cache must not throw and must still compile correctly.
const script = getCachedWorkflowScript(bundle, 'app/file.js', corrupt);
expect(script.cachedDataRejected).toBe(true);

const { context } = createContext({ seed, fixedTimestamp });
script.runInContext(context);
const fn = runInContext(
`globalThis.__private_workflows?.get('app/workflow-2')`,
context
) as (n: string) => Promise<string>;
expect(await fn('q')).toContain('cached-corrupt:2:q');
});

it('is independent of filename: a cache built under one filename is accepted under another', async () => {
const bundle = buildBundle('cached-fname');
const { Script } = await import('node:vm');
const cachedData = new Script(bundle, {
filename: 'workflow//./a//wfA',
}).createCachedData();

// Runtime compiles under a different per-workflow filename.
const script = getCachedWorkflowScript(
bundle,
'workflow//./b//wfB',
cachedData
);
expect(script.cachedDataRejected).toBe(false);
});
});
});
35 changes: 30 additions & 5 deletions packages/core/src/vm/script-cache.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { type Context, Script } from 'node:vm';
import { runtimeLogger as scriptCacheLogger } from '../logger.js';

/**
* Module-level cache of compiled workflow-bundle `vm.Script` objects.
Expand Down Expand Up @@ -100,7 +101,8 @@ function touchBundle(code: string): Map<string, Script> | undefined {
*/
export function getCachedWorkflowScript(
code: string,
filename: string
filename: string,
cachedData?: Buffer
): Script {
let byFilename = touchBundle(code);
if (byFilename === undefined) {
Expand All @@ -118,22 +120,45 @@ export function getCachedWorkflowScript(
}
let script = byFilename.get(filename);
if (script === undefined) {
script = new Script(code, { filename });
// `cachedData` (a V8 code cache produced at build time via
// `Script.createCachedData()`) lets V8 skip parsing the bundle on the first
// compile in this process — the dominant cost on a cold serverless instance
// for large bundles. The in-process cache above only helps subsequent
// replays in the *same* process; the code cache helps the very first one.
// V8 validates the blob against its version/flags and the source; on any
// mismatch it sets `cachedDataRejected` and transparently falls back to a
// full parse, so a stale or wrong-Node-version blob is never a correctness
// risk — only a missed optimization. `cachedData` is independent of
// `filename`, so a single build-time blob is valid for every per-workflow
// filename the bundle is compiled under.
script = cachedData
? new Script(code, { filename, cachedData })
: new Script(code, { filename });
if (cachedData && script.cachedDataRejected) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Observability gap: a rejected blob degrades silently, which is correct, but a silent cachedDataRejected is also a silent perf loss in prod — e.g. a Node minor bump on the runtime invalidates every build-time cache and nobody notices the cold-start regression. runWorkflow already opens a span with attributes; consider surfacing cache hit/reject as a span attribute there (or a counter) so this is detectable in production, not only via debug logs.

scriptCacheLogger.debug(
'Workflow bundle code cache rejected; falling back to full parse',
{ filename }
);
}
byFilename.set(filename, script);
}
return script;
}

/**
* Runs the cached workflow-bundle `Script` against `context`. Compiles and
* caches the `Script` on first use for the given `(code, filename)`.
* caches the `Script` on first use for the given `(code, filename)`. When
* `cachedData` is provided, it is used to skip parsing on the first compile.
*/
export function runCachedWorkflowScript(
code: string,
filename: string,
context: Context
context: Context,
cachedData?: Buffer
): unknown {
return getCachedWorkflowScript(code, filename).runInContext(context);
return getCachedWorkflowScript(code, filename, cachedData).runInContext(
context
);
}

/**
Expand Down
Loading
Loading