diff --git a/.changeset/dry-forks-melt.md b/.changeset/dry-forks-melt.md new file mode 100644 index 000000000..07f042b89 --- /dev/null +++ b/.changeset/dry-forks-melt.md @@ -0,0 +1,7 @@ +--- +"@opennextjs/cloudflare": patch +--- + +Factor large repeated values in manifests + +This reduce the size of the generated code. diff --git a/packages/cloudflare/src/cli/build/patches/plugins/load-manifest.spec.ts b/packages/cloudflare/src/cli/build/patches/plugins/load-manifest.spec.ts new file mode 100644 index 000000000..e9cb01272 --- /dev/null +++ b/packages/cloudflare/src/cli/build/patches/plugins/load-manifest.spec.ts @@ -0,0 +1,266 @@ +import { describe, expect, test } from "vitest"; + +import { factorManifestValue, factorObjectValues, getOrCreateVarName } from "./load-manifest.js"; + +describe("getOrCreateVarName", () => { + test("returns a variable name starting with 'v' followed by a 3-char prefix", () => { + const prefixMap = new Map(); + const varName = getOrCreateVarName("some-value-long-enough-for-hashing", prefixMap); + expect(varName).toMatch(/^v[0-9a-f]{3}$/); + }); + + test("returns the same variable name for the same value", () => { + const prefixMap = new Map(); + const value = "some-value-long-enough-for-hashing"; + const first = getOrCreateVarName(value, prefixMap); + const second = getOrCreateVarName(value, prefixMap); + expect(second).toBe(first); + expect(prefixMap.size).toBe(1); + }); + + test("returns different variable names for different values", () => { + const prefixMap = new Map(); + const a = getOrCreateVarName("value-a-that-is-long-enough-to-be-factored", prefixMap); + const b = getOrCreateVarName("value-b-that-is-long-enough-to-be-factored", prefixMap); + expect(a).not.toBe(b); + expect(prefixMap.size).toBe(2); + }); + + // SHA1("test-value-135-padding-to-make-it-long") = 8aa7da... + // SHA1("test-value-152-padding-to-make-it-long") = 8aae79... + // Both share the 3-char prefix "8aa". + test("lengthens the new entry on 3-char collision without renaming the first", () => { + const prefixMap = new Map(); + const first = getOrCreateVarName("test-value-135-padding-to-make-it-long", prefixMap); + const second = getOrCreateVarName("test-value-152-padding-to-make-it-long", prefixMap); + + // The first entry keeps its short 3-char prefix. + expect(first).toBe("v8aa"); + // The second entry gets a longer prefix to avoid collision. + expect(second).toBe("v8aae"); + expect(prefixMap.size).toBe(2); + }); + + // SHA1("test-value-241-...") = 47b8f8... + // SHA1("test-value-404-...") = 47b6fc... + // SHA1("test-value-748-...") = 47bac4... + // All three share the 3-char prefix "47b". + test("handles three-way collision at 3-char prefix", () => { + const prefixMap = new Map(); + const first = getOrCreateVarName("test-value-241-padding-to-make-it-long", prefixMap); + const second = getOrCreateVarName("test-value-404-padding-to-make-it-long", prefixMap); + const third = getOrCreateVarName("test-value-748-padding-to-make-it-long", prefixMap); + + // First takes "47b". + expect(first).toBe("v47b"); + // Second collides at "47b", gets "47b6". + expect(second).toBe("v47b6"); + // Third collides at "47b" (taken by first), gets "47ba". + expect(third).toBe("v47ba"); + expect(prefixMap.size).toBe(3); + }); + + // SHA1("test-value-179-...") = 6ce8d80f... + // SHA1("test-value-548-...") = 6ce8335e... + // Both share the 4-char prefix "6ce8". + test("handles collision that requires more than 4 chars to resolve", () => { + const prefixMap = new Map(); + const first = getOrCreateVarName("test-value-179-padding-to-make-it-long", prefixMap); + const second = getOrCreateVarName("test-value-548-padding-to-make-it-long", prefixMap); + + // First takes "6ce". + expect(first).toBe("v6ce"); + // Second collides at "6ce", tries "6ce8" — still collides, resolves to "6ce83". + expect(second).toBe("v6ce8"); + expect(prefixMap.size).toBe(2); + }); + + test("updates prefixMap in place", () => { + const prefixMap = new Map(); + getOrCreateVarName("value-a-that-is-long-enough-to-be-factored", prefixMap); + expect(prefixMap.size).toBe(1); + const [prefix, fullHash] = [...prefixMap.entries()][0]!; + expect(prefix).toHaveLength(3); + expect(fullHash).toHaveLength(40); + }); +}); + +describe("factorManifestValue", () => { + const makeManifest = (key: string, value: string) => + `globalThis.__RSC_MANIFEST["/page"] = { "${key}": ${value} };`; + + test("factors out large values into a variable", () => { + const values = new Map(); + const prefixMap = new Map(); + const largeValue = JSON.stringify({ a: "x".repeat(50) }); + const manifest = makeManifest("clientModules", largeValue); + + const result = factorManifestValue(manifest, "clientModules", values, prefixMap); + + // The manifest should reference a variable instead of the inline value. + expect(result).not.toContain(largeValue); + expect(values.size).toBe(1); + const [varName, storedValue] = [...values.entries()][0]!; + expect(varName).toMatch(/^v[0-9a-f]{3,}$/); + expect(storedValue).toBe(largeValue); + expect(result).toContain(varName); + expect(prefixMap.size).toBe(1); + }); + + test("leaves small values untouched", () => { + const values = new Map(); + const prefixMap = new Map(); + const smallValue = '"small"'; + const manifest = makeManifest("clientModules", smallValue); + + const result = factorManifestValue(manifest, "clientModules", values, prefixMap); + + expect(result).toBe(manifest); + expect(values.size).toBe(0); + expect(prefixMap.size).toBe(0); + }); + + test("returns original manifest when key is not found", () => { + const values = new Map(); + const prefixMap = new Map(); + const manifest = makeManifest("clientModules", '"some-value"'); + + const result = factorManifestValue(manifest, "nonExistentKey", values, prefixMap); + + expect(result).toBe(manifest); + expect(values.size).toBe(0); + }); + + test("reuses variable name for identical values across manifests", () => { + const values = new Map(); + const prefixMap = new Map(); + const largeValue = JSON.stringify({ a: "x".repeat(50) }); + const manifest1 = makeManifest("clientModules", largeValue); + const manifest2 = makeManifest("clientModules", largeValue); + + const result1 = factorManifestValue(manifest1, "clientModules", values, prefixMap); + const result2 = factorManifestValue(manifest2, "clientModules", values, prefixMap); + + // Both should reference the same variable. + const varName = [...values.keys()][0]!; + expect(result1).toContain(varName); + expect(result2).toContain(varName); + // Only one entry in the values map (same content, same variable). + expect(values.size).toBe(1); + expect(prefixMap.size).toBe(1); + }); + + test("factors multiple keys from the same manifest with shared prefixMap", () => { + const values = new Map(); + const prefixMap = new Map(); + const largeA = JSON.stringify({ a: "a".repeat(50) }); + const largeB = JSON.stringify({ b: "b".repeat(50) }); + const manifest = `globalThis.__RSC_MANIFEST["/page"] = { "clientModules": ${largeA}, "ssrModuleMapping": ${largeB} };`; + + let result = factorManifestValue(manifest, "clientModules", values, prefixMap); + result = factorManifestValue(result, "ssrModuleMapping", values, prefixMap); + + expect(values.size).toBe(2); + expect(prefixMap.size).toBe(2); + // Both variable names should appear in the result. + for (const varName of values.keys()) { + expect(result).toContain(varName); + } + // Neither large value should appear inline. + expect(result).not.toContain(largeA); + expect(result).not.toContain(largeB); + }); +}); + +describe("factorObjectValues", () => { + test("deduplicates repeated large chunks arrays", () => { + const sharedVars = new Map(); + const prefixMap = new Map(); + const chunksArray = JSON.stringify(["chunk-a-long-name.js", "chunk-b-long-name.js"]); + // Two entries with the same chunks array. + const input = `{ + "mod1": { "id": "1", "chunks": ${chunksArray} }, + "mod2": { "id": "2", "chunks": ${chunksArray} } + }`; + + const result = factorObjectValues(input, sharedVars, prefixMap); + + // The chunks array should be replaced by a variable reference. + expect(sharedVars.size).toBe(1); + const [varName, storedValue] = [...sharedVars.entries()][0]!; + expect(varName).toMatch(/^v[0-9a-f]{3,}$/); + expect(storedValue).toBe(chunksArray); + // Both occurrences should use the same variable. + const varOccurrences = result.split(varName).length - 1; + expect(varOccurrences).toBe(2); + expect(prefixMap.size).toBe(1); + }); + + test("skips small chunks arrays", () => { + const sharedVars = new Map(); + const prefixMap = new Map(); + const input = `{ + "mod1": { "id": "1", "chunks": ["a"] } + }`; + + const result = factorObjectValues(input, sharedVars, prefixMap); + + expect(result).toBe(input); + expect(sharedVars.size).toBe(0); + expect(prefixMap.size).toBe(0); + }); + + test("handles distinct chunks arrays with different variable names", () => { + const sharedVars = new Map(); + const prefixMap = new Map(); + const chunksA = JSON.stringify(["chunk-alpha-long-name.js", "chunk-beta-long-name.js"]); + const chunksB = JSON.stringify(["chunk-gamma-long-name.js", "chunk-delta-long-name.js"]); + const input = `{ + "mod1": { "id": "1", "chunks": ${chunksA} }, + "mod2": { "id": "2", "chunks": ${chunksB} } + }`; + + const result = factorObjectValues(input, sharedVars, prefixMap); + + expect(sharedVars.size).toBe(2); + expect(prefixMap.size).toBe(2); + // Both variable names should appear in the result. + for (const varName of sharedVars.keys()) { + expect(result).toContain(varName); + } + }); + + test("shares the prefixMap with factorManifestValue", () => { + const values = new Map(); + const sharedVars = new Map(); + const prefixMap = new Map(); + + // First, factor a manifest value. + const largeValue = JSON.stringify({ a: "x".repeat(50) }); + const manifest = `globalThis.__RSC_MANIFEST["/page"] = { "clientModules": ${largeValue} };`; + factorManifestValue(manifest, "clientModules", values, prefixMap); + expect(prefixMap.size).toBe(1); + + // Then, factor chunks using the same prefixMap. + const chunksArray = JSON.stringify(["chunk-a-long-name.js", "chunk-b-long-name.js"]); + const input = `{ "mod1": { "id": "1", "chunks": ${chunksArray} } }`; + factorObjectValues(input, sharedVars, prefixMap); + + // The prefixMap should now have 2 entries. + expect(prefixMap.size).toBe(2); + // The variable names should be different. + const allVarNames = [...values.keys(), ...sharedVars.keys()]; + expect(new Set(allVarNames).size).toBe(2); + }); + + test("returns input unchanged when no chunks pairs are found", () => { + const sharedVars = new Map(); + const prefixMap = new Map(); + const input = `{ "mod1": { "id": "1", "name": "test" } }`; + + const result = factorObjectValues(input, sharedVars, prefixMap); + + expect(result).toBe(input); + expect(sharedVars.size).toBe(0); + }); +}); diff --git a/packages/cloudflare/src/cli/build/patches/plugins/load-manifest.ts b/packages/cloudflare/src/cli/build/patches/plugins/load-manifest.ts index 368bb07fa..143dd544f 100644 --- a/packages/cloudflare/src/cli/build/patches/plugins/load-manifest.ts +++ b/packages/cloudflare/src/cli/build/patches/plugins/load-manifest.ts @@ -4,11 +4,13 @@ * They rely on `readFileSync` that is not supported by workerd. */ +import crypto from "node:crypto"; import { readFile } from "node:fs/promises"; import { join, posix, relative, sep } from "node:path"; +import { Lang, parse, type SgNode } from "@ast-grep/napi"; import { type BuildOptions, getPackagePath } from "@opennextjs/aws/build/helper.js"; -import { patchCode, type RuleConfig } from "@opennextjs/aws/build/patch/astCodePatcher.js"; +import { applyRule, patchCode, type RuleConfig } from "@opennextjs/aws/build/patch/astCodePatcher.js"; import type { ContentUpdater, Plugin } from "@opennextjs/aws/plugins/content-updater.js"; import { getCrossPlatformPathRegex } from "@opennextjs/aws/utils/regex.js"; import { glob } from "glob"; @@ -96,23 +98,70 @@ async function getEvalManifestRule(buildOpts: BuildOptions) { const baseDir = join(outputDir, "server-functions/default", getPackagePath(buildOpts), ".next"); const appDir = join(baseDir, "server/app"); - const manifests = await glob(join(baseDir, "**/*_client-reference-manifest.js"), { + const manifestPaths = await glob(join(baseDir, "**/*_client-reference-manifest.js"), { windowsPathsNoEscape: true, }); - // Sort by path length descending so longer (more specific) paths match first, - // preventing suffix collisions in the `.endsWith()` chain (see #1156). - const sortedManifests = [...manifests].sort((a, b) => b.length - a.length); - const returnManifests = sortedManifests - .map((manifest) => { - const endsWith = normalizePath(relative(baseDir, manifest)); - const key = normalizePath("/" + relative(appDir, manifest)).replace( - "_client-reference-manifest.js", - "" - ); + // Map of factored large objects (variable name -> {...}) + const factoredObjects = new Map(); + // Map of manifest path -> factored manifest content + const factoredManifest = new Map(); + // Shared map of short hash prefix -> full SHA1 hash, used for collision resolution. + const prefixMap = new Map(); + + for (const path of manifestPaths) { + if (path.endsWith("page_client-reference-manifest.js")) { + // `page_client-reference-manifest.js` files could contain large repeated values. + // Factor out large values into separate variables to reduce the overall size of the generated code. + let manifest = await readFile(path, "utf-8"); + for (const key of [ + "clientModules", + "ssrModuleMapping", + "edgeSSRModuleMapping", + "rscModuleMapping", + "entryCSSFiles", + "entryJSFiles", + ]) { + manifest = factorManifestValue(manifest, key, factoredObjects, prefixMap); + } + factoredManifest.set(path, manifest); + } + } + + // Map of factored values in an object + const factoredValues = new Map(); + + for (const [varName, value] of factoredObjects) { + factoredObjects.set(varName, factorObjectValues(value, factoredValues, prefixMap)); + } + + // Prepend chunks variable declarations before the factored values + const factoredValueCode = [...factoredValues.entries()] + .map(([name, val]) => `const ${name} = ${val};`) + .join("\n"); + + const factoredObjectCode = [...factoredObjects.entries()] + .map(([varName, value]) => `const ${varName} = ${value};`) + .join("\n"); + + const returnManifests = manifestPaths + // Sort by path length descending so longer (more specific) paths match first, + // preventing suffix collisions in the `.endsWith()` chain (see #1156). + .toSorted((a, b) => b.length - a.length) + .map((path) => { + let manifest: string; + + if (factoredManifest.has(path)) { + manifest = factoredManifest.get(path)!; + } else { + manifest = `require(${JSON.stringify(path)});`; + } + + const endsWith = normalizePath(relative(baseDir, path)); + const key = normalizePath("/" + relative(appDir, path)).replace("_client-reference-manifest.js", ""); return ` if ($PATH.endsWith("${endsWith}")) { - require(${JSON.stringify(manifest)}); + ${manifest} return { __RSC_MANIFEST: { "${key}": globalThis.__RSC_MANIFEST["${key}"], @@ -130,6 +179,9 @@ function evalManifest($PATH, $$$ARGS) { }`, }, fix: ` +${factoredValueCode} +${factoredObjectCode} + function evalManifest($PATH, $$$ARGS) { $PATH = $PATH.replaceAll(${JSON.stringify(sep)}, ${JSON.stringify(posix.sep)}); ${returnManifests} @@ -142,3 +194,151 @@ function evalManifest($PATH, $$$ARGS) { }`, } satisfies RuleConfig; } + +/** + * Factor out large manifest values into separate variables. + * + * @param manifest The manifest code. + * @param key The key to factor out. + * @param values A map to store the factored values (indexed by variable name). + * @param prefixMap Map of short hash prefix → full hash, updated in place for + * collision resolution across calls. + * @returns The manifest code with large values factored out. + */ +export function factorManifestValue( + manifest: string, + key: string, + values: Map, + prefixMap: Map +): string { + const valueName = "VALUE"; + // ASTGrep rule to extract the value of a specific key from the manifest object in the evalManifest function. + // + // globalThis.__RSC_MANIFEST["/path/to/page"] = { + // // ... + // key: $VALUE + // // ... + // } + const extractValueRule = ` +rule: + kind: pair + all: + - has: + field: key + pattern: '"${key}"' + - has: + field: value + pattern: $${valueName} +inside: + pattern: globalThis.__RSC_MANIFEST[$$$_] = { $$$ }; + stopBy: end +`; + + const rootNode = parse(Lang.JavaScript, manifest).root(); + const { matches } = applyRule(extractValueRule, rootNode, { once: true }); + if (matches.length === 1 && matches[0]?.getMatch(valueName)) { + const match = matches[0]; + const value = match.getMatch(valueName)!.text(); + if (value.length > 30) { + // Factor out large values into separate variables. + const varName = getOrCreateVarName(value, prefixMap); + values.set(varName, value); + // Replace the value in the manifest with the variable reference. + return rootNode.commitEdits([match.replace(`"${key}": ${varName}`)]); + } + } + + // Return the original manifest if the value is not found or is small enough to not warrant factoring out. + return manifest; +} + +/** + * Factor out large object values into separate variables. + * + * @param valueText The JS source text of the module mapping object. + * @param sharedVars Map to accumulate shared variable declarations. + * @param prefixMap Map of short hash prefix → full hash, updated in place for + * collision resolution across calls. + * @returns The rewritten value text with chunks arrays replaced by variable refs. + */ +export function factorObjectValues( + valueText: string, + sharedVars: Map, + prefixMap: Map +): string { + const rootNode = parse(Lang.JavaScript, valueText).root(); + + // Find all "chunks": [...] pairs + const chunksRule = ` +rule: + kind: pair + all: + - has: + field: key + pattern: '"chunks"' + - has: + field: value + kind: array + pattern: $CHUNKS +`; + + const { matches } = applyRule(chunksRule, rootNode, { once: false }); + + const edits: Array<{ match: SgNode; replacement: string }> = []; + + for (const match of matches) { + const chunksNode = match.getMatch("CHUNKS"); + if (!chunksNode) continue; + const chunksText = chunksNode.text(); + if (chunksText.length <= 30) continue; // Skip small arrays + + const varName = getOrCreateVarName(chunksText, prefixMap); + sharedVars.set(varName, chunksText); + edits.push({ match, replacement: `"chunks": ${varName}` }); + } + + return edits.length === 0 + ? valueText + : rootNode.commitEdits(edits.map((e) => e.match.replace(e.replacement))); +} + +/** Minimum number of hex characters used for short hash prefixes. */ +const MIN_PREFIX_LENGTH = 3; + +/** + * Get or create a short variable name for a value, resolving collisions. + * + * Computes a SHA1 hash of the value, then finds the shortest unique prefix + * (minimum {@link MIN_PREFIX_LENGTH} hex chars). When a new hash collides with + * an existing prefix, the new entry is given a longer prefix — existing entries + * are never renamed. + * + * This allows saving space in the generated code (A full SHA1 is 40 hex chars) because + * identifiers are not minimized by the Open Next build process. + * + * @param value The value to hash. + * @param prefixMap Map of short prefix → full hash, updated in place. + * @returns The variable name (`v`). + */ +export function getOrCreateVarName(value: string, prefixMap: Map): string { + const sha1 = crypto.createHash("sha1").update(value).digest("hex"); + + // Find the shortest prefix (>= MIN_PREFIX_LENGTH) that doesn't collide + // with any existing prefix. Only the new entry is lengthened. + for (let len = MIN_PREFIX_LENGTH; len <= sha1.length; len++) { + const candidate = sha1.slice(0, len); + const existing = prefixMap.get(candidate); + if (existing === undefined) { + prefixMap.set(candidate, sha1); + return `v${candidate}`; + } + if (existing === sha1) { + // Same content seen again — reuse the existing variable. + return `v${candidate}`; + } + // A different hash occupies this exact prefix — lengthen and retry. + } + + // Unreachable: two different SHA1 hashes always diverge before 40 chars. + throw new Error("Failed to find a unique prefix"); +}