From 0fe152f1d4574c0de4aa834bf9f9c4185e4eb7ce Mon Sep 17 00:00:00 2001 From: "d.svitak" Date: Wed, 20 May 2026 13:38:50 +0200 Subject: [PATCH 01/36] feature/MIG-507-layout-analysis - initial script and html file --- migration-examples/.gitignore | 3 + migration-examples/layout/index.html | 414 ++++++++++++++++++ .../common/layout/AreaTemplateAnalysis.groovy | 264 +++++++++++ 3 files changed, 681 insertions(+) create mode 100644 migration-examples/layout/index.html create mode 100644 migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/AreaTemplateAnalysis.groovy diff --git a/migration-examples/.gitignore b/migration-examples/.gitignore index de9b8546..85caa993 100644 --- a/migration-examples/.gitignore +++ b/migration-examples/.gitignore @@ -6,6 +6,9 @@ logs/ !src/main/groovy/com/quadient/migration/example/common/mapping/*.groovy !src/main/groovy/com/quadient/migration/example/common/report/ !src/main/groovy/com/quadient/migration/example/common/report/*.groovy +**/layout/*.json +!src/main/groovy/com/quadient/migration/example/common/layout/ +!src/main/groovy/com/quadient/migration/example/common/layout/*.groovy deploy/ src/main/resources/active-project-config diff --git a/migration-examples/layout/index.html b/migration-examples/layout/index.html new file mode 100644 index 00000000..8f909f10 --- /dev/null +++ b/migration-examples/layout/index.html @@ -0,0 +1,414 @@ + + + + + + Area Template Analysis + + + +
+

Area Template Analysis

+ + +
No file loaded — use the button above to open a *-area-analysis.json file.
+
+ +
+
Load a JSON file to begin.
+ + +
+ + + + diff --git a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/AreaTemplateAnalysis.groovy b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/AreaTemplateAnalysis.groovy new file mode 100644 index 00000000..f3b1b856 --- /dev/null +++ b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/AreaTemplateAnalysis.groovy @@ -0,0 +1,264 @@ +//! --- +//! displayName: Area Template Analysis +//! category: Layout +//! description: Analyse page areas, detect containment and proximity groups, score cross-page similarity and suggest base-template candidates. Output JSON for layout/index.html. +//! --- +package com.quadient.migration.example.common.layout + +import com.fasterxml.jackson.databind.ObjectMapper +import com.quadient.migration.api.Migration +import com.quadient.migration.api.dto.migrationmodel.Area +import com.quadient.migration.api.dto.migrationmodel.DocumentObjectRef +import com.quadient.migration.api.dto.migrationmodel.ImageRef +import com.quadient.migration.api.dto.migrationmodel.builder.DocumentObjectFilterBuilder +import com.quadient.migration.api.repository.DocumentObjectRepository +import com.quadient.migration.example.common.util.PathUtil +import com.quadient.migration.shared.DocumentObjectType +import groovy.transform.Field + +import static com.quadient.migration.example.common.util.InitMigration.initMigration + +@Field Migration migration = initMigration(this.binding) + +def dstFile = PathUtil.dataDirPath(binding, "layout", "${migration.projectConfig.name}-area-analysis.json").toFile() +dstFile.parentFile.mkdirs() + +// ── Load pages & templates from the migration model ────────────────────────── +def templatesAndPages = (migration.documentObjectRepository as DocumentObjectRepository) + .list(new DocumentObjectFilterBuilder() + .types([DocumentObjectType.Page, DocumentObjectType.Template]) + .build()) + +def templates = templatesAndPages.findAll { it.type == DocumentObjectType.Template } +def pages = templatesAndPages.findAll { it.type == DocumentObjectType.Page } +def pageIds = pages.collect { it.id } as Set + +// Build page → template reverse index (a page can appear in at most one template) +def pageToTemplate = [:] +templates.each { tmpl -> + tmpl.content.findAll { it instanceof DocumentObjectRef && pageIds.contains(it.id) }.each { ref -> + pageToTemplate[(ref as DocumentObjectRef).id] = tmpl + } +} + +// ── Build per-page data ─────────────────────────────────────────────────────── +def pageDataList = pages.collect { page -> + def template = pageToTemplate[page.id] + + // Collect valid areas (skip nulls, off-page and zero-dimension artifacts) + def areaDataList = (page.content.findAll { it instanceof Area } as List) + .findAll { area -> + area.position != null && + area.position.width.toMillimeters() > 0 && + area.position.height.toMillimeters() > 0 && + area.position.x.toMillimeters() < 315.0 // 1.5 × A4 width guard + } + .collect { area -> + double x = area.position.x.toMillimeters() + double y = area.position.y.toMillimeters() + double w = area.position.width.toMillimeters() + double h = area.position.height.toMillimeters() + [x: x, y: y, w: w, h: h, x2: x + w, y2: y + h, sz: w * h, + flowToNextPage : area.flowToNextPage, + interactiveFlowName: area.interactiveFlowName ?: "", + contentPreview : buildContentPreview(migration, area)] + } + + // Containment: find the smallest enclosing parent for each area + def containment = findContainment(areaDataList) + + // Proximity grouping: vertical sweep with 5.3 mm gap (~15 pt) + def groups = groupByProximity(areaDataList) + groups.eachWithIndex { grpIndices, gi -> + grpIndices.each { idx -> areaDataList[idx].proximityGroup = gi } + } + + def proximityGroups = groups.collect { grpIndices -> + def subset = grpIndices.collect { areaDataList[it] } + double bx = subset.min { it.x as double }.x as double + double by = subset.min { it.y as double }.y as double + double bx2 = subset.max { it.x2 as double }.x2 as double + double by2 = subset.max { it.y2 as double }.y2 as double + [areaIndices: grpIndices, bbox: [x: r2(bx), y: r2(by), w: r2(bx2 - bx), h: r2(by2 - by)]] + } + + def areas = areaDataList.withIndex().collect { a, i -> + [x: r2(a.x), y: r2(a.y), w: r2(a.w), h: r2(a.h), + flowToNextPage : a.flowToNextPage, + interactiveFlowName: a.interactiveFlowName, + contentPreview : a.contentPreview, + containedIn : containment[i], // list index of parent, or null + proximityGroup : a.proximityGroup ?: 0] + } + + [pageId : page.id, + pageName : page.name, + templateId : template?.id, + templateName : template?.name, + areas : areas, + proximityGroups: proximityGroups] +} + +// ── Similarity matrix & template-family clustering ─────────────────────────── +int n = pageDataList.size() + +def matrix = (0.. + (0.. + if (i == j) return 1.0d + if (j < i) return 0.0d // filled symmetrically below + rs(pageSimilarity(pageDataList[i].areas as List, pageDataList[j].areas as List)) + } +} +(0.. (0.. matrix[i][j] = matrix[j][i] } } + +def families = clusterPages((0.. + [pageIds : idxList.collect { pageDataList[it].pageId }, + pageNames: idxList.collect { pageDataList[it].pageName }] +} + +// ── Write JSON ──────────────────────────────────────────────────────────────── +new ObjectMapper().writerWithDefaultPrettyPrinter().writeValue(dstFile, [ + projectName: migration.projectConfig.name, + pages : pageDataList, + similarity : [matrix: matrix, families: familyData] +]) +println "✓ Written to: ${dstFile.absolutePath}" + +// ── Static helper functions ─────────────────────────────────────────────────── + +/** Summarise area content as a human-readable string. */ +static String buildContentPreview(Migration migration, Area area) { + area.content.collect { c -> + switch (c) { + case DocumentObjectRef: + def obj = migration.documentObjectRepository.find(c.id) + return obj?.name ? "DocObjRef(${obj.name})" : "DocObjRef(${c.id})" + case ImageRef: + def img = migration.imageRepository.find(c.id) + return img?.name ? "ImageRef(${img.name})" : "ImageRef(${c.id})" + default: + return c.class.simpleName + } + }.join("; ") +} + +/** + * For each area find its smallest enclosing parent within the same page. + * Returns a map of list-index → list-index (parent), or empty if top-level. + * Tolerance: 0.7 mm (~2 pt) to absorb rounding differences. + */ +static Map findContainment(List areas) { + def containment = [:] + areas.eachWithIndex { inner, i -> + areas.eachWithIndex { outer, j -> + if (i == j || (outer.sz as double) <= (inner.sz as double)) return + double tol = 0.7 + if ((outer.x as double) - tol <= (inner.x as double) && + (outer.y as double) - tol <= (inner.y as double) && + (inner.x2 as double) <= (outer.x2 as double) + tol && + (inner.y2 as double) <= (outer.y2 as double) + tol) { + Integer cur = containment[i] as Integer + if (cur == null || (areas[cur].sz as double) > (outer.sz as double)) { + containment[i] = j + } + } + } + } + return containment +} + +/** + * Vertical sweep grouping: areas whose Y ranges overlap or are within + * yGapMm of each other are placed in the same group. + * Returns a list of groups, each group being a list of list-indices. + */ +static List> groupByProximity(List areas, double yGapMm = 5.3) { + if (!areas) return [] + def indexed = areas.withIndex().collect { area, i -> [area: area, idx: i] } + indexed.sort { a, b -> + (a.area.y as double) <=> (b.area.y as double) ?: (a.area.x as double) <=> (b.area.x as double) + } + def groups = [] + def current = [indexed[0].idx as int] + double curY2 = indexed[0].area.y2 as double + indexed.drop(1).each { entry -> + if ((entry.area.y as double) <= curY2 + yGapMm) { + current << (entry.idx as int) + curY2 = Math.max(curY2, entry.area.y2 as double) + } else { + groups << current + current = [entry.idx as int] + curY2 = entry.area.y2 as double + } + } + groups << current + return groups +} + +/** + * Compare two pages by greedy bipartite matching on area geometry. + * Each area is described by (x, width) normalised to pageRefMm and + * height with a log-ratio so a 3× difference scores ~1.0 (i.e. large + * height differences are tolerated but still weakly penalised). + * Unmatched areas (different counts) reduce the score proportionally. + */ +static double pageSimilarity(List areasA, List areasB, + double pageRefMm = 210.0, double matchThreshold = 0.5) { + def va = areasA.findAll { (it.w as double) > 0 && (it.h as double) > 0 && (it.x as double) < pageRefMm * 1.5 } + def vb = areasB.findAll { (it.w as double) > 0 && (it.h as double) > 0 && (it.x as double) < pageRefMm * 1.5 } + if (!va || !vb) return 0.0 + + def dists = [] + va.eachWithIndex { a, i -> + vb.eachWithIndex { b, j -> + double xd = Math.abs((a.x as double) - (b.x as double)) / pageRefMm + double wd = Math.abs((a.w as double) - (b.w as double)) / pageRefMm + double hA = Math.max(a.h as double, 0.1) + double hB = Math.max(b.h as double, 0.1) + double hr = Math.abs(Math.log(hA / hB)) / Math.log(3.0) + dists << [d: Math.sqrt(xd * xd + wd * wd + hr * hr), i: i, j: j] + } + } + dists.sort { it.d } + + def matchedA = [] as Set + def matchedB = [] as Set + double total = 0.0 + dists.each { e -> + if (!(e.i in matchedA) && !(e.j in matchedB)) { + matchedA << e.i; matchedB << e.j + total += Math.max(0.0, 1.0 - (e.d as double) / matchThreshold) + } + } + int slots = Math.max(va.size(), vb.size()) + return slots > 0 ? total / slots : 0.0 +} + +/** + * Greedy single-linkage clustering: merge two families when any pair of + * their pages has similarity >= threshold. + */ +static List> clusterPages(List indices, + List> matrix, + double threshold = 0.60) { + def families = indices.collect { [it] } + boolean changed = true + while (changed) { + changed = false + for (int i = 0; i < families.size() && !changed; i++) { + for (int j = i + 1; j < families.size() && !changed; j++) { + boolean merge = families[i].any { a -> families[j].any { b -> (matrix[a][b] as double) >= threshold } } + if (merge) { + families[i].addAll(families[j]) + families.remove(j) + changed = true + } + } + } + } + return families +} + +static double r2(double v) { Math.round(v * 100) / 100.0 } +static double rs(double v) { Math.round(v * 1000) / 1000.0 } From 2d1ee74bef52a98690218077040bfbdfe81d7648 Mon Sep 17 00:00:00 2001 From: "d.svitak" Date: Thu, 21 May 2026 10:27:37 +0200 Subject: [PATCH 02/36] feature/MIG-507-layout-analysis - rename and extend with template page grouping attempt #1 --- migration-examples/layout/index.html | 180 +++++++++++++----- ...ateAnalysis.groovy => LayoutExport.groovy} | 96 ++++++++-- 2 files changed, 211 insertions(+), 65 deletions(-) rename migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/{AreaTemplateAnalysis.groovy => LayoutExport.groovy} (74%) diff --git a/migration-examples/layout/index.html b/migration-examples/layout/index.html index 8f909f10..ff08de1c 100644 --- a/migration-examples/layout/index.html +++ b/migration-examples/layout/index.html @@ -3,7 +3,7 @@ - Area Template Analysis + Layout Export @@ -175,6 +224,18 @@

Layout Export

+ +
+ +
+ + +
+ + · + +
+
@@ -188,27 +249,18 @@

Layout Export

-
- Compare by: -
- - -
-
-

-
+
Template similarity
+

+
+ +
Page similarity
+

+
-
- Compare by: -
- - -
-
-

+

    @@ -223,8 +275,8 @@

    Layout Export

    ]; // ── State ───────────────────────────────────────────────────────────────────── -let gData = null; -let viewLevel = 'page'; // 'page' | 'template' +let gData = null; +let selectedTmplIds = new Set(); // template IDs currently checked in the filter // ── File loading ────────────────────────────────────────────────────────────── document.getElementById("file").addEventListener("change", async (e) => { @@ -250,41 +302,127 @@

    Layout Export

    }); }); -// ── Segmented level toggle (synced across both tab panels) ──────────────────── -document.querySelectorAll(".seg-btn").forEach(btn => { - btn.addEventListener("click", () => { - viewLevel = btn.dataset.level; - document.querySelectorAll(".seg-btn").forEach(b => - b.classList.toggle("active", b.dataset.level === viewLevel)); - if (gData) { - renderMatrix(gData); - renderFamilies(gData); - } +// ── Template filter combobox ────────────────────────────────────────────────── +const filterBtn = document.getElementById("filter-btn"); +const filterPanel = document.getElementById("filter-panel"); + +filterBtn.addEventListener("click", e => { + filterPanel.hidden = !filterPanel.hidden; + e.stopPropagation(); +}); +document.addEventListener("click", () => { filterPanel.hidden = true; }); +filterPanel.addEventListener("click", e => e.stopPropagation()); + +document.getElementById("filter-all").addEventListener("click", () => { + filterPanel.querySelectorAll("input[type=checkbox]").forEach(cb => { + cb.checked = true; + selectedTmplIds.add(cb.dataset.id); }); + updateFilterBtn(); + if (gData) rerender(); +}); +document.getElementById("filter-none").addEventListener("click", () => { + filterPanel.querySelectorAll("input[type=checkbox]").forEach(cb => { cb.checked = false; }); + selectedTmplIds.clear(); + updateFilterBtn(); + if (gData) rerender(); }); +function buildFilterPanel(templates) { + filterPanel.innerHTML = ""; + templates.forEach(t => { + const label = document.createElement("label"); + const cb = document.createElement("input"); + cb.type = "checkbox"; cb.checked = true; cb.dataset.id = t.templateId; + cb.addEventListener("change", () => { + if (cb.checked) selectedTmplIds.add(t.templateId); + else selectedTmplIds.delete(t.templateId); + updateFilterBtn(); + if (gData) rerender(); + }); + label.appendChild(cb); + label.appendChild(document.createTextNode(" " + t.templateName)); + filterPanel.appendChild(label); + }); +} + +function updateFilterBtn() { + const total = gData?.similarity?.templateLevel?.templates?.length ?? 0; + filterBtn.textContent = selectedTmplIds.size === total + ? `All templates (${total}) ▾` + : `${selectedTmplIds.size} of ${total} templates ▾`; +} + // ── Main render ─────────────────────────────────────────────────────────────── function render(data) { gData = data; document.getElementById("empty").hidden = true; document.getElementById("app").hidden = false; - renderPages(data.pages); - renderMatrix(data); - renderFamilies(data); + + const tl = data.similarity?.templateLevel; + if (tl) { + selectedTmplIds = new Set(tl.templates.map(t => t.templateId)); + buildFilterPanel(tl.templates); + updateFilterBtn(); + } else { + document.getElementById("filter-bar").style.display = "none"; + } + + rerender(); +} + +function rerender() { + renderPagesGrouped(gData); + renderSimilarity(gData); + renderFamilies(gData); } -// ── Pages ───────────────────────────────────────────────────────────────────── +// ── Pages & Areas (grouped by template, filtered) ──────────────────────────── const CANVAS_W = 260; const PAD = 14; -function renderPages(pages) { +function renderPagesGrouped(data) { const grid = document.getElementById("pages-grid"); grid.innerHTML = ""; - pages.forEach(page => grid.appendChild(buildPageCard(page))); + const tl = data.similarity?.templateLevel; + + if (!tl) { + // Old JSON: flat list, no filter + const wrap = document.createElement("div"); + wrap.className = "tmpl-group-pages"; + data.pages.forEach(page => wrap.appendChild(buildPageCard(page))); + grid.appendChild(wrap); + return; + } + + const selected = tl.templates.filter(t => selectedTmplIds.has(t.templateId)); + if (selected.length === 0) { + grid.innerHTML = "

    No templates selected.

    "; + return; + } + + selected.forEach(tmpl => { + const group = document.createElement("div"); + group.className = "tmpl-group"; + + const header = document.createElement("div"); + header.className = "tmpl-group-header"; + header.innerHTML = + `${esc(tmpl.templateName)}` + + `${tmpl.pageIndices.length} page(s)`; + group.appendChild(header); + + const pagesWrap = document.createElement("div"); + pagesWrap.className = "tmpl-group-pages"; + tmpl.pageIndices.forEach(pi => pagesWrap.appendChild(buildPageCard(data.pages[pi]))); + group.appendChild(pagesWrap); + + grid.appendChild(group); + }); } function buildPageCard(page) { - const areas = page.areas; + const areas = page.areas; if (!areas.length) { const card = document.createElement("div"); card.className = "page-card"; @@ -298,41 +436,32 @@

    Layout Export

    const refW = Math.max(maxX2, 210); const scale = (CANVAS_W - 2 * PAD) / refW; const svgH = Math.ceil(maxY2 * scale + 2 * PAD); - - const svg = svgEl("svg", { width: CANVAS_W, height: svgH, - style: "background:#fff;cursor:default" }); + const svg = svgEl("svg", { width: CANVAS_W, height: svgH, style: "background:#fff;cursor:default" }); (page.proximityGroups || []).forEach((grp, gi) => { const b = grp.bbox; - const r = svgEl("rect", { - x: sc(b.x, scale), y: sc(b.y, scale), - width: sc(b.w, scale), height: sc(b.h, scale), - fill: "none", stroke: "#e00", "stroke-width": 1.5, - "stroke-dasharray": "5,3" - }); - svg.appendChild(r); - const lbl = svgEl("text", { - x: sc(b.x, scale) + 2, y: sc(b.y, scale) - 2, - "font-size": 7, fill: "#e00", "font-weight": "bold" - }); + svg.appendChild(svgEl("rect", { + x: sc(b.x, scale), y: sc(b.y, scale), width: sc(b.w, scale), height: sc(b.h, scale), + fill: "none", stroke: "#e00", "stroke-width": 1.5, "stroke-dasharray": "5,3" + })); + const lbl = svgEl("text", { x: sc(b.x, scale) + 2, y: sc(b.y, scale) - 2, + "font-size": 7, fill: "#e00", "font-weight": "bold" }); lbl.textContent = `G${gi + 1}`; svg.appendChild(lbl); }); areas.forEach((a, i) => { - const color = PALETTE[i % PALETTE.length]; - const isTop = a.containedIn == null; - const rx = sc(a.x, scale), ry = sc(a.y, scale); - const rw = sc(a.w, scale), rh = sc(a.h, scale); - const label = shortLabel(a.contentPreview) || `area_${i}`; - const tip = `${label}\nx=${a.x}mm y=${a.y}mm w=${a.w}mm h=${a.h}mm` - + (a.flowToNextPage ? "\n[flowToNextPage]" : "") - + (a.interactiveFlowName ? `\nflow: ${a.interactiveFlowName}` : ""); + const color = PALETTE[i % PALETTE.length]; + const isTop = a.containedIn == null; + const rx = sc(a.x, scale), ry = sc(a.y, scale), rw = sc(a.w, scale), rh = sc(a.h, scale); + const label = shortLabel(a.contentPreview) || `area_${i}`; + const tip = `${label}\nx=${a.x}mm y=${a.y}mm w=${a.w}mm h=${a.h}mm` + + (a.flowToNextPage ? "\n[flowToNextPage]" : "") + + (a.interactiveFlowName ? `\nflow: ${a.interactiveFlowName}` : ""); const rect = svgEl("rect", { x: rx, y: ry, width: rw, height: rh, - fill: color, "fill-opacity": "0.32", - stroke: color, + fill: color, "fill-opacity": "0.32", stroke: color, "stroke-width": isTop ? "1.5" : "1", "stroke-dasharray": isTop ? "none" : "3,2" }); @@ -342,10 +471,8 @@

    Layout Export

    svg.appendChild(rect); if (rw > 12 && rh > 8) { - const txt = svgEl("text", { - x: rx + 2, y: ry + 9, - "font-size": 6.5, fill: "#222", "pointer-events": "none", - }); + const txt = svgEl("text", { x: rx + 2, y: ry + 9, + "font-size": 6.5, fill: "#222", "pointer-events": "none" }); txt.textContent = label.length > 28 ? label.slice(-28) : label; svg.appendChild(txt); } @@ -364,42 +491,69 @@

    Layout Export

    return card; } -// ── Similarity matrix ───────────────────────────────────────────────────────── -function renderMatrix(data) { - const wrap = document.getElementById("matrix-wrap"); - const desc = document.getElementById("similarity-desc"); - // Support both new (pageLevel/templateLevel) and old (flat) JSON - const pl = data.similarity.pageLevel || data.similarity; - const tl = data.similarity.templateLevel; - - if (viewLevel === "template" && tl) { - desc.innerHTML = - `Compares templates by positional page matching (page 1↔1, page 2↔2, …). - Templates with different page counts are penalised for unmatched pages. - Score 1.0 = identical page sequence, 0.0 = nothing in common. - Threshold for same-family grouping: 0.60.`; - buildMatrixTable(wrap, tl.templates.map(t => t.templateName), tl.matrix); +// ── Similarity (both matrices, filtered by selected templates) ──────────────── +function renderSimilarity(data) { + const tl = data.similarity?.templateLevel; + const pl = data.similarity?.pageLevel || data.similarity; + + // ── Template matrix ── + const tmplWrap = document.getElementById("tmpl-matrix-wrap"); + const tmplDesc = document.getElementById("tmpl-matrix-desc"); + if (tl) { + const selIdx = tl.templates.map((t, i) => selectedTmplIds.has(t.templateId) ? i : -1).filter(i => i >= 0); + if (selIdx.length > 0) { + tmplDesc.textContent = + "Compares templates by positional page matching (page 1↔1, 2↔2, …). " + + "Templates with different page counts are penalised for unmatched pages. Score 1.0 = identical, 0.0 = nothing in common."; + buildMatrixTable(tmplWrap, + selIdx.map(i => tl.templates[i].templateName), + selIdx.map(i => selIdx.map(j => tl.matrix[i][j]))); + } else { + tmplDesc.textContent = ""; + tmplWrap.innerHTML = "

    No templates selected.

    "; + } + } else { + tmplDesc.textContent = ""; + tmplWrap.innerHTML = "

    Template-level data not available (old JSON format).

    "; + } + + // ── Page matrix — only pages from selected templates ── + const pageWrap = document.getElementById("page-matrix-wrap"); + const pageDesc = document.getElementById("page-matrix-desc"); + let selPageIdx = []; + + if (tl && selectedTmplIds.size > 0) { + const seen = new Set(); + tl.templates.filter(t => selectedTmplIds.has(t.templateId)) + .forEach(t => t.pageIndices.forEach(pi => { if (!seen.has(pi)) { seen.add(pi); selPageIdx.push(pi); } })); + selPageIdx.sort((a, b) => a - b); + } else if (!tl) { + selPageIdx = data.pages.map((_, i) => i); + } + + if (selPageIdx.length > 0) { + pageDesc.textContent = + "Compares pages by area structure (x + width normalised to page width, height with 3× tolerance). " + + "Score 1.0 = identical structure, 0.0 = nothing in common."; + buildMatrixTable(pageWrap, + selPageIdx.map(i => data.pages[i].pageName), + selPageIdx.map(i => selPageIdx.map(j => pl.matrix[i][j]))); } else { - desc.innerHTML = - `Compares pages by area structure (x + width normalised to page width, height with 3× tolerance). - Score 1.0 = identical structure, 0.0 = nothing in common. - Threshold for same-family grouping: 0.60.`; - buildMatrixTable(wrap, data.pages.map(p => p.pageName), pl.matrix); + pageDesc.textContent = ""; + pageWrap.innerHTML = "

    No pages to display.

    "; } } function buildMatrixTable(wrap, names, matrix) { const n = names.length; - if (n === 0) { wrap.innerHTML = "

    No data.

    "; return; } + if (n === 0) { wrap.innerHTML = ""; return; } const table = document.createElement("table"); - const thead = table.createTHead(); - const hrow = thead.insertRow(); + const hrow = table.createTHead().insertRow(); hrow.insertCell().innerHTML = ""; names.forEach((name, j) => { const th = document.createElement("th"); - th.title = name; - th.textContent = `${j + 1}`; + th.title = name; th.textContent = `${j + 1}`; hrow.appendChild(th); }); @@ -412,12 +566,10 @@

    Layout Export

    matrix[i].forEach((score, j) => { const td = row.insertCell(); if (i === j) { - td.textContent = "—"; - td.style.background = "#e8e8e8"; + td.textContent = "—"; td.style.background = "#e8e8e8"; } else { td.textContent = score.toFixed(2); - const g = Math.round(200 * score); - const r = Math.round(255 * (1 - score)); + const g = Math.round(200 * score), r = Math.round(255 * (1 - score)); td.style.background = `rgb(${r},${g},80)`; if (score >= 0.60) td.style.fontWeight = "700"; } @@ -428,56 +580,119 @@

    Layout Export

    wrap.appendChild(table); const note = document.createElement("p"); note.style.cssText = "font-size:11px;color:var(--muted);margin-top:6px"; - note.textContent = "Green = high similarity · Bold = same family (≥ 0.60). Row/column numbers match."; + note.textContent = "Green = high similarity · Bold = same family (≥ 0.60). Numbers match row/column headers."; wrap.appendChild(note); } -// ── Template families ───────────────────────────────────────────────────────── +// ── Template families (hierarchical) ───────────────────────────────────────── function renderFamilies(data) { + const tl = data.similarity?.templateLevel; + const pl = data.similarity?.pageLevel || data.similarity; const list = document.getElementById("families-list"); const desc = document.getElementById("families-desc"); - const pl = data.similarity.pageLevel || data.similarity; - const tl = data.similarity.templateLevel; list.innerHTML = ""; - if (viewLevel === "template" && tl) { - desc.textContent = - "Templates grouped into base-template candidates. " + - "Templates in the same family share enough structural similarity to be served by a single base template."; - tl.families.forEach((fam, fi) => { - const li = document.createElement("li"); - const isUnique = fam.templateIds.length === 1; - const label = isUnique - ? `Base Template ${fi + 1} (unique — no shared base template needed)` - : `Base Template ${fi + 1}`; - const tmplBadges = fam.templateNames.map(n => `${esc(n)}`).join(" "); - const pageBadges = fam.pageNames.map(n => `${esc(n)}`).join(" "); - li.innerHTML = - `${label}
    ` + - `Templates: ${tmplBadges}
    ` + - `Pages: ${pageBadges}`; - list.appendChild(li); - }); - } else { - desc.textContent = - "Pages grouped into base-template candidates based on structural similarity. " + - "Pages in the same family share enough area positions/sizes to be served by one base template."; + if (!tl) { + // Fallback: old JSON — flat page families + desc.textContent = "Pages grouped by structural similarity into base-template candidates."; pl.families.forEach((fam, fi) => { - const li = document.createElement("li"); + const li = document.createElement("li"); const isUnique = fam.pageIds.length === 1; - const label = isUnique - ? `Base Template ${fi + 1} (unique page — no shared template needed)` - : `Base Template ${fi + 1}`; - li.innerHTML = `${label}
    ` + - fam.pageNames.map(n => `${esc(n)}`).join(" "); + li.innerHTML = + `
    Base Template ${fi + 1}` + + (isUnique ? ` (unique)` : "") + `
    ` + + `
    ` + + `${fam.pageNames.map(n => `${esc(n)}`).join("")}` + + `
    `; list.appendChild(li); }); + return; } + + desc.textContent = + "Each card is a base template candidate. " + + "Numbered rows are proposed base template pages — original pages below each number " + + "share enough structural similarity (≥ 0.60) to be served by that one template page."; + + const tmplById = Object.fromEntries(tl.templates.map(t => [t.templateId, t])); + + tl.families.forEach((fam, fi) => { + // OR logic: show family if at least one of its templates is currently selected. + // Unselected templates stay visible but dimmed to preserve family context. + if (!fam.templateIds.some(id => selectedTmplIds.has(id))) return; + + const isUnique = fam.templateIds.length === 1; + const famTemplates = fam.templateIds.map(id => tmplById[id]).filter(Boolean); + const multiTmpl = famTemplates.length > 1; + + // All page indices in this family, in original list order + const allPageIdx = famTemplates.flatMap(t => t.pageIndices); + allPageIdx.sort((a, b) => a - b); + + // Cluster pages by structural similarity → each cluster = proposed base template page + const clusters = clusterByMatrix(allPageIdx, pl.matrix); + clusters.sort((a, b) => Math.min(...a) - Math.min(...b)); + + // Display label for a page index + const labelOf = pi => { + const pageName = data.pages[pi].pageName; + if (!multiTmpl) return pageName; + for (const t of famTemplates) { + if (t.pageIndices.includes(pi)) return `${t.templateName} / ${pageName}`; + } + return pageName; + }; + + // Template badges: active = normal, inactive = dimmed (but still shown for context) + const tmplBadges = fam.templateIds.map((id, idx) => + `${esc(fam.templateNames[idx])}` + ).join(" "); + + let slotsHtml = '
    '; + clusters.forEach((cluster, ci) => { + const badges = cluster.map(pi => + `${esc(labelOf(pi))}`).join(""); + slotsHtml += + `
    ` + + `${ci + 1}` + + `${badges}` + + `
    `; + }); + slotsHtml += "
    "; + + const li = document.createElement("li"); + li.innerHTML = + `
    ` + + `Base Template ${fi + 1}` + + (isUnique ? ` (unique)` : "") + + `${tmplBadges}` + + `
    ` + slotsHtml; + list.appendChild(li); + }); } // ── Utilities ───────────────────────────────────────────────────────────────── function sc(mm, scale) { return Math.round((mm * scale + PAD) * 10) / 10; } +/** Single-linkage clustering of page list indices using the global page-level similarity matrix. */ +function clusterByMatrix(indices, matrix, threshold = 0.60) { + let clusters = indices.map(i => [i]); + let changed = true; + while (changed) { + changed = false; + for (let i = 0; i < clusters.length && !changed; i++) { + for (let j = i + 1; j < clusters.length && !changed; j++) { + if (clusters[i].some(a => clusters[j].some(b => (matrix[a]?.[b] ?? 0) >= threshold))) { + clusters[i].push(...clusters[j]); + clusters.splice(j, 1); + changed = true; + } + } + } + } + return clusters; +} + function svgEl(tag, attrs) { const el = document.createElementNS("http://www.w3.org/2000/svg", tag); Object.entries(attrs).forEach(([k, v]) => el.setAttribute(k, v)); @@ -494,5 +709,6 @@

    Layout Export

    return String(s).replace(/&/g,"&").replace(//g,">"); } + From 619fe3602c33ae58637a06987cdb086e2ecebb42 Mon Sep 17 00:00:00 2001 From: "d.svitak" Date: Fri, 22 May 2026 09:02:40 +0200 Subject: [PATCH 04/36] feature/MIG-507-layout-analysis - Slight restructure of the layout analysis --- migration-examples/layout/index.html | 294 +++++++++++++++------------ 1 file changed, 164 insertions(+), 130 deletions(-) diff --git a/migration-examples/layout/index.html b/migration-examples/layout/index.html index e623cf0e..4c253ed1 100644 --- a/migration-examples/layout/index.html +++ b/migration-examples/layout/index.html @@ -3,7 +3,7 @@ - Layout Export + Layout Analysis @@ -263,18 +216,20 @@

    Layout Analysis

    diff --git a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy index 9546145b..c69f25b4 100644 --- a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy +++ b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy @@ -29,27 +29,25 @@ def templatesAndPages = (migration.documentObjectRepository as DocumentObjectRep .build()) def templates = templatesAndPages.findAll { it.type == DocumentObjectType.Template } -def pages = templatesAndPages.findAll { it.type == DocumentObjectType.Page } -def pageIds = pages.collect { it.id } as Set +def pages = templatesAndPages.findAll { it.type == DocumentObjectType.Page } +def pageIds = pages.collect { it.id } as Set -// Build page → template reverse index; also track each page's position within its template -def pageToTemplate = [:] -def pageIndexInTemplate = [:] +// Page → template lookup; also track each page's position within its template +def pageToTemplate = [:] +def pagePositionInTemplate = [:] templates.each { tmpl -> tmpl.content .findAll { it instanceof DocumentObjectRef && pageIds.contains(it.id) } .eachWithIndex { ref, idx -> - pageToTemplate[(ref as DocumentObjectRef).id] = tmpl - pageIndexInTemplate[(ref as DocumentObjectRef).id] = idx + pageToTemplate[(ref as DocumentObjectRef).id] = tmpl + pagePositionInTemplate[(ref as DocumentObjectRef).id] = idx } } -// ── Build per-page data ─────────────────────────────────────────────────────── def pageDataList = pages.collect { page -> def template = pageToTemplate[page.id] - // Collect valid areas (skip nulls, off-page and zero-dimension artifacts) - def areaDataList = (page.content.findAll { it instanceof Area } as List) + def areaList = (page.content.findAll { it instanceof Area } as List) .findAll { area -> area.position != null && area.position.width.toMillimeters() > 0 && @@ -62,67 +60,58 @@ def pageDataList = pages.collect { page -> double w = area.position.width.toMillimeters() double h = area.position.height.toMillimeters() [x: x, y: y, w: w, h: h, x2: x + w, y2: y + h, sz: w * h, - flowToNextPage : area.flowToNextPage, + flowToNextPage : area.flowToNextPage, interactiveFlowName: area.interactiveFlowName ?: "", - contentPreview : buildContentPreview(migration, area)] + contentPreview : buildContentPreview(migration, area)] } - // Containment: find the smallest enclosing parent for each area - def containment = findContainment(areaDataList) + def containment = findContainment(areaList) // Proximity grouping: vertical sweep with 5.3 mm gap (~15 pt) - def groups = groupByProximity(areaDataList) - groups.eachWithIndex { grpIndices, gi -> - grpIndices.each { idx -> areaDataList[idx].proximityGroup = gi } + def groups = groupByProximity(areaList) + groups.eachWithIndex { groupIndices, gi -> + groupIndices.each { idx -> areaList[idx].proximityGroup = gi } } - def proximityGroups = groups.collect { grpIndices -> - def subset = grpIndices.collect { areaDataList[it] } + def proximityGroups = groups.collect { groupIndices -> + def subset = groupIndices.collect { areaList[it] } double bx = subset.min { it.x as double }.x as double double by = subset.min { it.y as double }.y as double double bx2 = subset.max { it.x2 as double }.x2 as double double by2 = subset.max { it.y2 as double }.y2 as double - [areaIndices: grpIndices, bbox: [x: r2(bx), y: r2(by), w: r2(bx2 - bx), h: r2(by2 - by)]] + [areaIndices: groupIndices, bbox: [x: round2(bx), y: round2(by), w: round2(bx2 - bx), h: round2(by2 - by)]] } - def areas = areaDataList.withIndex().collect { a, i -> - [x: r2(a.x), y: r2(a.y), w: r2(a.w), h: r2(a.h), - flowToNextPage : a.flowToNextPage, + def areas = areaList.withIndex().collect { a, i -> + [x: round2(a.x), y: round2(a.y), w: round2(a.w), h: round2(a.h), + flowToNextPage : a.flowToNextPage, interactiveFlowName: a.interactiveFlowName, - contentPreview : a.contentPreview, - containedIn : containment[i], // list index of parent, or null - proximityGroup : a.proximityGroup ?: 0] + contentPreview : a.contentPreview, + containedIn : containment[i], // list index of parent area, or null if top-level + proximityGroup : a.proximityGroup ?: 0] } - [pageId : page.id, - pageName : page.name, - templateId : template?.id, - templateName : template?.name, - templatePageIndex : pageIndexInTemplate[page.id], // position of this page within its template - areas : areas, - proximityGroups : proximityGroups] + [pageId : page.id, + pageName : page.name, + templateId : template?.id, + templateName : template?.name, + templatePageIndex : pagePositionInTemplate[page.id], + areas : areas, + proximityGroups : proximityGroups] } -// ── Similarity matrix & template-family clustering ─────────────────────────── -int n = pageDataList.size() +int pageCount = pageDataList.size() -def matrix = (0.. - (0.. +def pageMatrix = (0.. + (0.. if (i == j) return 1.0d - if (j < i) return 0.0d // filled symmetrically below - rs(pageSimilarity(pageDataList[i].areas as List, pageDataList[j].areas as List)) + if (j < i) return 0.0d + round3(pageSimilarity(pageDataList[i].areas as List, pageDataList[j].areas as List)) } } -(0.. (0.. matrix[i][j] = matrix[j][i] } } - -def families = clusterPages((0.. - [pageIds : idxList.collect { pageDataList[it].pageId }, - pageNames: idxList.collect { pageDataList[it].pageName }] -} +(0.. (0.. pageMatrix[i][j] = pageMatrix[j][i] } } -// ── Template-level similarity & clustering ──────────────────────────────────── -// Group page-list indices by their parent template, preserving intra-template page order. +// Group page-list indices by their parent template, preserving intra-template page order def templateGroupMap = [:] pageDataList.eachWithIndex { pg, i -> def key = pg.templateId ?: "solo::${pg.pageId}" @@ -131,49 +120,36 @@ pageDataList.eachWithIndex { pg, i -> } def templateList = templateGroupMap.collect { key, entries -> - def sorted = entries.sort { it.order } - def pageIndices = sorted.collect { it.listIdx as int } - def firstPg = pageDataList[pageIndices[0]] - [templateId : firstPg.templateId ?: key, - templateName: firstPg.templateName ?: firstPg.pageName, + def pageIndices = entries.sort { it.order }.collect { it.listIdx as int } + def firstPage = pageDataList[pageIndices[0]] + [templateId : firstPage.templateId ?: key, + templateName: firstPage.templateName ?: firstPage.pageName, pageIndices : pageIndices] } -int nt = templateList.size() -def tmplMatrix = (0.. - (0.. +int templateCount = templateList.size() +def templateMatrix = (0.. + (0.. if (i == j) return 1.0d if (j < i) return 0.0d - rs(templateSimilarity( + round3(templateSimilarity( templateList[i].pageIndices as List, templateList[j].pageIndices as List, - matrix)) + pageMatrix)) } } -(0.. (0.. tmplMatrix[i][j] = tmplMatrix[j][i] } } - -def tmplFamilies = clusterPages((0.. - [templateIds : idxList.collect { templateList[it].templateId }, - templateNames: idxList.collect { templateList[it].templateName }, - pageNames : idxList.collectMany { ti -> - (templateList[ti].pageIndices as List).collect { pi -> pageDataList[pi].pageName as String } - }] -} +(0.. (0.. templateMatrix[i][j] = templateMatrix[j][i] } } -// ── Write JSON ──────────────────────────────────────────────────────────────── new ObjectMapper().writerWithDefaultPrettyPrinter().writeValue(dstFile, [ projectName: migration.projectConfig.name, pages : pageDataList, similarity : [ - pageLevel : [matrix: matrix, families: familyData], - templateLevel: [templates: templateList, matrix: tmplMatrix, families: tmplFamilyData] + pageLevel : [matrix: pageMatrix], + templateLevel: [templates: templateList, matrix: templateMatrix] ] ]) println "✓ Written to: ${dstFile.absolutePath}" -// ── Static helper functions ─────────────────────────────────────────────────── - /** Summarise area content as a human-readable string. */ static String buildContentPreview(Migration migration, Area area) { area.content.collect { c -> @@ -192,8 +168,8 @@ static String buildContentPreview(Migration migration, Area area) { /** * For each area find its smallest enclosing parent within the same page. - * Returns a map of list-index → list-index (parent), or empty if top-level. - * Tolerance: 0.7 mm (~2 pt) to absorb rounding differences. + * Returns a map of list-index → list-index (parent). + * Tolerance of 0.7 mm (~2 pt) absorbs rounding differences. */ static Map findContainment(List areas) { def containment = [:] @@ -246,69 +222,42 @@ static List> groupByProximity(List areas, double yGapMm = 5.3 /** * Compare two pages by greedy bipartite matching on area geometry. * Each area is described by (x, width) normalised to pageRefMm and - * height with a log-ratio so a 3× difference scores ~1.0 (i.e. large + * height with a log-ratio so a 3× difference scores ~1.0 (large * height differences are tolerated but still weakly penalised). * Unmatched areas (different counts) reduce the score proportionally. */ static double pageSimilarity(List areasA, List areasB, double pageRefMm = 210.0, double matchThreshold = 0.5) { - def va = areasA.findAll { (it.w as double) > 0 && (it.h as double) > 0 && (it.x as double) < pageRefMm * 1.5 } - def vb = areasB.findAll { (it.w as double) > 0 && (it.h as double) > 0 && (it.x as double) < pageRefMm * 1.5 } - if (!va || !vb) return 0.0 - - def dists = [] - va.eachWithIndex { a, i -> - vb.eachWithIndex { b, j -> - double xd = Math.abs((a.x as double) - (b.x as double)) / pageRefMm - double wd = Math.abs((a.w as double) - (b.w as double)) / pageRefMm - double hA = Math.max(a.h as double, 0.1) - double hB = Math.max(b.h as double, 0.1) - double hr = Math.abs(Math.log(hA / hB)) / Math.log(3.0) - dists << [d: Math.sqrt(xd * xd + wd * wd + hr * hr), i: i, j: j] + def validA = areasA.findAll { (it.w as double) > 0 && (it.h as double) > 0 && (it.x as double) < pageRefMm * 1.5 } + def validB = areasB.findAll { (it.w as double) > 0 && (it.h as double) > 0 && (it.x as double) < pageRefMm * 1.5 } + if (!validA || !validB) return 0.0 + + def distances = [] + validA.eachWithIndex { a, i -> + validB.eachWithIndex { b, j -> + double xDist = Math.abs((a.x as double) - (b.x as double)) / pageRefMm + double wDist = Math.abs((a.w as double) - (b.w as double)) / pageRefMm + double hRatio = Math.abs(Math.log(Math.max(a.h as double, 0.1) / Math.max(b.h as double, 0.1))) / Math.log(3.0) + distances << [d: Math.sqrt(xDist * xDist + wDist * wDist + hRatio * hRatio), i: i, j: j] } } - dists.sort { it.d } + distances.sort { it.d } def matchedA = [] as Set def matchedB = [] as Set double total = 0.0 - dists.each { e -> - if (!(e.i in matchedA) && !(e.j in matchedB)) { - matchedA << e.i; matchedB << e.j - total += Math.max(0.0, 1.0 - (e.d as double) / matchThreshold) + distances.each { pair -> + if (!(pair.i in matchedA) && !(pair.j in matchedB)) { + matchedA << pair.i; matchedB << pair.j + total += Math.max(0.0, 1.0 - (pair.d as double) / matchThreshold) } } - int slots = Math.max(va.size(), vb.size()) + int slots = Math.max(validA.size(), validB.size()) return slots > 0 ? total / slots : 0.0 } -/** - * Greedy single-linkage clustering: merge two families when any pair of - * their pages has similarity >= threshold. - */ -static List> clusterPages(List indices, - List> matrix, - double threshold = 0.60) { - def families = indices.collect { [it] } - boolean changed = true - while (changed) { - changed = false - for (int i = 0; i < families.size() && !changed; i++) { - for (int j = i + 1; j < families.size() && !changed; j++) { - boolean merge = families[i].any { a -> families[j].any { b -> (matrix[a][b] as double) >= threshold } } - if (merge) { - families[i].addAll(families[j]) - families.remove(j) - changed = true - } - } - } - } - return families -} - -static double r2(double v) { Math.round(v * 100) / 100.0 } -static double rs(double v) { Math.round(v * 1000) / 1000.0 } +static double round2(double v) { Math.round(v * 100) / 100.0 } +static double round3(double v) { Math.round(v * 1000) / 1000.0 } /** * Compare two templates by positional page matching (page 1↔1, page 2↔2, …). @@ -320,7 +269,6 @@ static double templateSimilarity(List pagesA, List pagesB, int maxLen = Math.max(pagesA.size(), pagesB.size()) if (maxLen == 0) return 0.0 double total = 0.0 - int minLen = Math.min(pagesA.size(), pagesB.size()) - (0.. total += pageMatrix[pagesA[k]][pagesB[k]] as double } + (0.. total += pageMatrix[pagesA[k]][pagesB[k]] as double } return total / maxLen } From 446ef138ab7ae0cc74835f691a90da01d26b30a3 Mon Sep 17 00:00:00 2001 From: "d.svitak" Date: Fri, 22 May 2026 13:57:06 +0200 Subject: [PATCH 06/36] feature/MIG-507-layout-analysis - Formatting --- migration-examples/layout/index.html | 1313 ++++++++++------- .../example/common/layout/LayoutExport.groovy | 102 +- 2 files changed, 843 insertions(+), 572 deletions(-) diff --git a/migration-examples/layout/index.html b/migration-examples/layout/index.html index dde52ca7..576224e5 100644 --- a/migration-examples/layout/index.html +++ b/migration-examples/layout/index.html @@ -1,8 +1,8 @@ - - + + Layout Analysis

    Layout Analysis

    - -
    No file loaded — use the button above to open a *-layout.json file.
    + +
    No file loaded — use the button above to open a *-layout.json file.
    @@ -216,490 +469,524 @@

    Layout Analysis

    + // svgPos offsets by PAD (use for x/y); svgDim does not (use for width/height) + function svgPos(mm, scale) { + return Math.round((mm * scale + PAD) * 10) / 10; + } + + function svgDim(mm, scale) { + return Math.round(mm * scale * 10) / 10; + } + + function areaLabel(preview) { + if (!preview) return ""; + const m = preview.match(/\(([^)]+)\)$/); + return m ? m[1] : preview.slice(0, 40); + } + + function htmlEsc(s) { + return String(s).replace(/&/g, "&").replace(//g, ">"); + } + diff --git a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy index c69f25b4..29b05e51 100644 --- a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy +++ b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy @@ -29,37 +29,33 @@ def templatesAndPages = (migration.documentObjectRepository as DocumentObjectRep .build()) def templates = templatesAndPages.findAll { it.type == DocumentObjectType.Template } -def pages = templatesAndPages.findAll { it.type == DocumentObjectType.Page } -def pageIds = pages.collect { it.id } as Set +def pages = templatesAndPages.findAll { it.type == DocumentObjectType.Page } +def pageIds = pages.collect { it.id } as Set // Page → template lookup; also track each page's position within its template -def pageToTemplate = [:] +def pageToTemplate = [:] def pagePositionInTemplate = [:] templates.each { tmpl -> tmpl.content - .findAll { it instanceof DocumentObjectRef && pageIds.contains(it.id) } - .eachWithIndex { ref, idx -> - pageToTemplate[(ref as DocumentObjectRef).id] = tmpl - pagePositionInTemplate[(ref as DocumentObjectRef).id] = idx - } + .findAll { it instanceof DocumentObjectRef && pageIds.contains(it.id) } + .eachWithIndex { ref, idx -> + pageToTemplate[(ref as DocumentObjectRef).id] = tmpl + pagePositionInTemplate[(ref as DocumentObjectRef).id] = idx + } } def pageDataList = pages.collect { page -> def template = pageToTemplate[page.id] def areaList = (page.content.findAll { it instanceof Area } as List) - .findAll { area -> - area.position != null && - area.position.width.toMillimeters() > 0 && - area.position.height.toMillimeters() > 0 && - area.position.x.toMillimeters() < 315.0 // 1.5 × A4 width guard + .findAll { area -> area.position != null && area.position.width.toMillimeters() > 0 && area.position.height.toMillimeters() > 0 && area.position.x.toMillimeters() < 315.0 // 1.5 × A4 width guard } .collect { area -> double x = area.position.x.toMillimeters() double y = area.position.y.toMillimeters() double w = area.position.width.toMillimeters() double h = area.position.height.toMillimeters() - [x: x, y: y, w: w, h: h, x2: x + w, y2: y + h, sz: w * h, + [x : x, y: y, w: w, h: h, x2: x + w, y2: y + h, sz: w * h, flowToNextPage : area.flowToNextPage, interactiveFlowName: area.interactiveFlowName ?: "", contentPreview : buildContentPreview(migration, area)] @@ -69,21 +65,20 @@ def pageDataList = pages.collect { page -> // Proximity grouping: vertical sweep with 5.3 mm gap (~15 pt) def groups = groupByProximity(areaList) - groups.eachWithIndex { groupIndices, gi -> - groupIndices.each { idx -> areaList[idx].proximityGroup = gi } + groups.eachWithIndex { groupIndices, gi -> groupIndices.each { idx -> areaList[idx].proximityGroup = gi } } def proximityGroups = groups.collect { groupIndices -> def subset = groupIndices.collect { areaList[it] } - double bx = subset.min { it.x as double }.x as double - double by = subset.min { it.y as double }.y as double + double bx = subset.min { it.x as double }.x as double + double by = subset.min { it.y as double }.y as double double bx2 = subset.max { it.x2 as double }.x2 as double double by2 = subset.max { it.y2 as double }.y2 as double [areaIndices: groupIndices, bbox: [x: round2(bx), y: round2(by), w: round2(bx2 - bx), h: round2(by2 - by)]] } def areas = areaList.withIndex().collect { a, i -> - [x: round2(a.x), y: round2(a.y), w: round2(a.w), h: round2(a.h), + [x : round2(a.x), y: round2(a.y), w: round2(a.w), h: round2(a.h), flowToNextPage : a.flowToNextPage, interactiveFlowName: a.interactiveFlowName, contentPreview : a.contentPreview, @@ -91,13 +86,13 @@ def pageDataList = pages.collect { page -> proximityGroup : a.proximityGroup ?: 0] } - [pageId : page.id, - pageName : page.name, - templateId : template?.id, - templateName : template?.name, - templatePageIndex : pagePositionInTemplate[page.id], - areas : areas, - proximityGroups : proximityGroups] + [pageId : page.id, + pageName : page.name, + templateId : template?.id, + templateName : template?.name, + templatePageIndex: pagePositionInTemplate[page.id], + areas : areas, + proximityGroups : proximityGroups] } int pageCount = pageDataList.size() @@ -105,7 +100,7 @@ int pageCount = pageDataList.size() def pageMatrix = (0.. (0.. if (i == j) return 1.0d - if (j < i) return 0.0d + if (j < i) return 0.0d round3(pageSimilarity(pageDataList[i].areas as List, pageDataList[j].areas as List)) } } @@ -121,7 +116,7 @@ pageDataList.eachWithIndex { pg, i -> def templateList = templateGroupMap.collect { key, entries -> def pageIndices = entries.sort { it.order }.collect { it.listIdx as int } - def firstPage = pageDataList[pageIndices[0]] + def firstPage = pageDataList[pageIndices[0]] [templateId : firstPage.templateId ?: key, templateName: firstPage.templateName ?: firstPage.pageName, pageIndices : pageIndices] @@ -131,24 +126,20 @@ int templateCount = templateList.size() def templateMatrix = (0.. (0.. if (i == j) return 1.0d - if (j < i) return 0.0d - round3(templateSimilarity( - templateList[i].pageIndices as List, - templateList[j].pageIndices as List, - pageMatrix)) + if (j < i) return 0.0d + round3(templateSimilarity(templateList[i].pageIndices as List, + templateList[j].pageIndices as List, + pageMatrix)) } } (0.. (0.. templateMatrix[i][j] = templateMatrix[j][i] } } -new ObjectMapper().writerWithDefaultPrettyPrinter().writeValue(dstFile, [ - projectName: migration.projectConfig.name, - pages : pageDataList, - similarity : [ - pageLevel : [matrix: pageMatrix], - templateLevel: [templates: templateList, matrix: templateMatrix] - ] -]) -println "✓ Written to: ${dstFile.absolutePath}" +new ObjectMapper().writerWithDefaultPrettyPrinter().writeValue(dstFile, + [projectName: migration.projectConfig.name, + pages : pageDataList, + similarity : [pageLevel : [matrix: pageMatrix], + templateLevel: [templates: templateList, matrix: templateMatrix]]]) +println "Written to: ${dstFile.absolutePath}" /** Summarise area content as a human-readable string. */ static String buildContentPreview(Migration migration, Area area) { @@ -169,18 +160,14 @@ static String buildContentPreview(Migration migration, Area area) { /** * For each area find its smallest enclosing parent within the same page. * Returns a map of list-index → list-index (parent). - * Tolerance of 0.7 mm (~2 pt) absorbs rounding differences. - */ + * Tolerance of 0.7 mm (~2 pt) absorbs rounding differences.*/ static Map findContainment(List areas) { def containment = [:] areas.eachWithIndex { inner, i -> areas.eachWithIndex { outer, j -> if (i == j || (outer.sz as double) <= (inner.sz as double)) return double tol = 0.7 - if ((outer.x as double) - tol <= (inner.x as double) && - (outer.y as double) - tol <= (inner.y as double) && - (inner.x2 as double) <= (outer.x2 as double) + tol && - (inner.y2 as double) <= (outer.y2 as double) + tol) { + if ((outer.x as double) - tol <= (inner.x as double) && (outer.y as double) - tol <= (inner.y as double) && (inner.x2 as double) <= (outer.x2 as double) + tol && (inner.y2 as double) <= (outer.y2 as double) + tol) { Integer cur = containment[i] as Integer if (cur == null || (areas[cur].sz as double) > (outer.sz as double)) { containment[i] = j @@ -194,15 +181,13 @@ static Map findContainment(List areas) { /** * Vertical sweep grouping: areas whose Y ranges overlap or are within * yGapMm of each other are placed in the same group. - * Returns a list of groups, each group being a list of list-indices. - */ + * Returns a list of groups, each group being a list of list-indices.*/ static List> groupByProximity(List areas, double yGapMm = 5.3) { if (!areas) return [] def indexed = areas.withIndex().collect { area, i -> [area: area, idx: i] } - indexed.sort { a, b -> - (a.area.y as double) <=> (b.area.y as double) ?: (a.area.x as double) <=> (b.area.x as double) + indexed.sort { a, b -> (a.area.y as double) <=> (b.area.y as double) ?: (a.area.x as double) <=> (b.area.x as double) } - def groups = [] + def groups = [] def current = [indexed[0].idx as int] double curY2 = indexed[0].area.y2 as double indexed.drop(1).each { entry -> @@ -224,8 +209,7 @@ static List> groupByProximity(List areas, double yGapMm = 5.3 * Each area is described by (x, width) normalised to pageRefMm and * height with a log-ratio so a 3× difference scores ~1.0 (large * height differences are tolerated but still weakly penalised). - * Unmatched areas (different counts) reduce the score proportionally. - */ + * Unmatched areas (different counts) reduce the score proportionally.*/ static double pageSimilarity(List areasA, List areasB, double pageRefMm = 210.0, double matchThreshold = 0.5) { def validA = areasA.findAll { (it.w as double) > 0 && (it.h as double) > 0 && (it.x as double) < pageRefMm * 1.5 } @@ -235,8 +219,8 @@ static double pageSimilarity(List areasA, List areasB, def distances = [] validA.eachWithIndex { a, i -> validB.eachWithIndex { b, j -> - double xDist = Math.abs((a.x as double) - (b.x as double)) / pageRefMm - double wDist = Math.abs((a.w as double) - (b.w as double)) / pageRefMm + double xDist = Math.abs((a.x as double) - (b.x as double)) / pageRefMm + double wDist = Math.abs((a.w as double) - (b.w as double)) / pageRefMm double hRatio = Math.abs(Math.log(Math.max(a.h as double, 0.1) / Math.max(b.h as double, 0.1))) / Math.log(3.0) distances << [d: Math.sqrt(xDist * xDist + wDist * wDist + hRatio * hRatio), i: i, j: j] } @@ -257,13 +241,13 @@ static double pageSimilarity(List areasA, List areasB, } static double round2(double v) { Math.round(v * 100) / 100.0 } + static double round3(double v) { Math.round(v * 1000) / 1000.0 } /** * Compare two templates by positional page matching (page 1↔1, page 2↔2, …). * Unmatched pages (different template lengths) contribute 0, penalising the score - * proportionally so longer templates aren't artificially favoured. - */ + * proportionally so longer templates aren't artificially favoured.*/ static double templateSimilarity(List pagesA, List pagesB, List> pageMatrix) { int maxLen = Math.max(pagesA.size(), pagesB.size()) From b5d396cbd1aced322db179d97c021d3a818782cb Mon Sep 17 00:00:00 2001 From: "d.svitak" Date: Fri, 22 May 2026 14:34:48 +0200 Subject: [PATCH 07/36] feature/MIG-507-layout-analysis - bug fixes and improvements --- migration-examples/layout/index.html | 34 +++-- .../example/common/layout/LayoutExport.groovy | 118 +++++++++++------- 2 files changed, 91 insertions(+), 61 deletions(-) diff --git a/migration-examples/layout/index.html b/migration-examples/layout/index.html index 576224e5..f0833974 100644 --- a/migration-examples/layout/index.html +++ b/migration-examples/layout/index.html @@ -549,7 +549,7 @@

    Layout Analysis

    if (currentData) rerender(); }); label.appendChild(cb); - label.appendChild(document.createTextNode(" " + t.templateName)); + label.appendChild(document.createTextNode(" " + templateName(t))); filterPanel.appendChild(label); }); } @@ -589,7 +589,7 @@

    Layout Analysis

    const card = document.createElement("div"); card.className = "page-card"; card.innerHTML = - `
    ${htmlEsc(page.pageName)}
    ` + + `
    ${htmlEsc(pageName(page))}
    ` + `
    No areas
    `; return card; } @@ -657,7 +657,7 @@

    Layout Analysis

    const card = document.createElement("div"); card.className = "page-card"; card.innerHTML = - `
    ${htmlEsc(page.pageName)}
    ` + + `
    ${htmlEsc(pageName(page))}
    ` + `
    ${areas.length} area(s)${tmplInfo}
    `; card.appendChild(svg); return card; @@ -678,7 +678,7 @@

    Layout Analysis

    "Compares templates by positional page matching (page 1↔1, 2↔2, …). " + "Templates with different page counts are penalised for unmatched pages. Score 1.0 = identical, 0.0 = nothing in common."; buildMatrixTable(tmplWrap, - selectedIndices.map(i => tl.templates[i].templateName), + selectedIndices.map(i => templateName(tl.templates[i])), selectedIndices.map(i => selectedIndices.map(j => tl.matrix[i][j]))); } else { tmplDesc.textContent = ""; @@ -714,7 +714,7 @@

    Layout Analysis

    "Compares pages by area structure (x + width normalised to page width, height with 3× tolerance). " + "Score 1.0 = identical structure, 0.0 = nothing in common. Pages sorted by proposed family."; buildMatrixTable(pageWrap, - ordered.map(i => data.pages[i].pageName), + ordered.map(i => pageName(data.pages[i])), ordered.map(i => ordered.map(j => pl.matrix[i][j]))); } else { pageDesc.textContent = ""; @@ -733,7 +733,7 @@

    Layout Analysis

    hrow.insertCell().innerHTML = ""; names.forEach((name, j) => { const th = document.createElement("th"); - th.title = name; + th.title = name ?? ""; th.textContent = `${j + 1}`; hrow.appendChild(th); }); @@ -742,7 +742,7 @@

    Layout Analysis

    names.forEach((name, i) => { const row = tbody.insertRow(); const th = document.createElement("th"); - th.textContent = `${i + 1}. ${name.slice(0, 36)}`; + th.textContent = `${i + 1}. ${(name ?? "").slice(0, 36)}`; row.appendChild(th); matrix[i].forEach((score, j) => { const td = row.insertCell(); @@ -784,7 +784,7 @@

    Layout Analysis

    (cluster.length === 1 ? `(unique)` : "") + `
    ` + `
    ` + - `${cluster.map(i => `${htmlEsc(data.pages[i].pageName)}`).join("")}` + + `${cluster.map(i => `${htmlEsc(pageName(data.pages[i]))}`).join("")}` + `
    `; list.appendChild(li); }); @@ -808,13 +808,13 @@

    Layout Analysis

    const clusters = clusterPages(pageIndices, pl.matrix).sort((a, b) => Math.min(...a) - Math.min(...b)); const pageLabel = pi => { - const name = data.pages[pi].pageName; + const name = pageName(data.pages[pi]); const owner = hasMultipleTemplates && templates.find(t => t.pageIndices.includes(pi)); - return owner ? `${owner.templateName} / ${name}` : name; + return owner ? `${templateName(owner)} / ${name}` : name; }; const templateBadges = fam.templateIds.map((id, idx) => - `${htmlEsc(fam.templateNames[idx])}` + `${htmlEsc(fam.templateNames[idx] ?? id)}` ).join(" "); let slotsHtml = '
    '; @@ -871,7 +871,7 @@

    Layout Analysis

    wrap.className = "preview-card"; const title = document.createElement("div"); title.className = "preview-card-title"; - title.textContent = currentData.pages[pi].pageName; + title.textContent = pageName(currentData.pages[pi]); wrap.appendChild(title); const svg = buildPageCard(currentData.pages[pi]).querySelector("svg"); if (svg) wrap.appendChild(svg); @@ -917,7 +917,7 @@

    Layout Analysis

    return clusterPages(tl.templates.map((_, i) => i), tl.matrix) .map(cluster => ({ templateIds: cluster.map(i => tl.templates[i].templateId), - templateNames: cluster.map(i => tl.templates[i].templateName), + templateNames: cluster.map(i => tl.templates[i].templateName ?? tl.templates[i].templateId), })); } @@ -987,6 +987,14 @@

    Layout Analysis

    function htmlEsc(s) { return String(s).replace(/&/g, "&").replace(//g, ">"); } + + function pageName(page) { + return page.pageName ?? page.pageId; + } + + function templateName(tmpl) { + return tmpl.templateName ?? tmpl.templateId; + } diff --git a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy index 29b05e51..f6a31257 100644 --- a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy +++ b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy @@ -44,55 +44,31 @@ templates.each { tmpl -> } } -def pageDataList = pages.collect { page -> - def template = pageToTemplate[page.id] - - def areaList = (page.content.findAll { it instanceof Area } as List) - .findAll { area -> area.position != null && area.position.width.toMillimeters() > 0 && area.position.height.toMillimeters() > 0 && area.position.x.toMillimeters() < 315.0 // 1.5 × A4 width guard - } - .collect { area -> - double x = area.position.x.toMillimeters() - double y = area.position.y.toMillimeters() - double w = area.position.width.toMillimeters() - double h = area.position.height.toMillimeters() - [x : x, y: y, w: w, h: h, x2: x + w, y2: y + h, sz: w * h, - flowToNextPage : area.flowToNextPage, - interactiveFlowName: area.interactiveFlowName ?: "", - contentPreview : buildContentPreview(migration, area)] - } - - def containment = findContainment(areaList) - - // Proximity grouping: vertical sweep with 5.3 mm gap (~15 pt) - def groups = groupByProximity(areaList) - groups.eachWithIndex { groupIndices, gi -> groupIndices.each { idx -> areaList[idx].proximityGroup = gi } - } +def pageDataList = [] - def proximityGroups = groups.collect { groupIndices -> - def subset = groupIndices.collect { areaList[it] } - double bx = subset.min { it.x as double }.x as double - double by = subset.min { it.y as double }.y as double - double bx2 = subset.max { it.x2 as double }.x2 as double - double by2 = subset.max { it.y2 as double }.y2 as double - [areaIndices: groupIndices, bbox: [x: round2(bx), y: round2(by), w: round2(bx2 - bx), h: round2(by2 - by)]] - } - - def areas = areaList.withIndex().collect { a, i -> - [x : round2(a.x), y: round2(a.y), w: round2(a.w), h: round2(a.h), - flowToNextPage : a.flowToNextPage, - interactiveFlowName: a.interactiveFlowName, - contentPreview : a.contentPreview, - containedIn : containment[i], // list index of parent area, or null if top-level - proximityGroup : a.proximityGroup ?: 0] - } +pages.each { page -> + def template = pageToTemplate[page.id] + def processed = processAreas(migration, page.content.findAll { it instanceof Area } as List) + pageDataList << [pageId : page.id, + pageName : page.name, + templateId : template?.id, + templateName : template?.name, + templatePageIndex: pagePositionInTemplate[page.id], + areas : processed.areas, + proximityGroups : processed.proximityGroups] +} - [pageId : page.id, - pageName : page.name, - templateId : template?.id, - templateName : template?.name, - templatePageIndex: pagePositionInTemplate[page.id], - areas : areas, - proximityGroups : proximityGroups] +templates.each { tmpl -> + def directAreas = tmpl.content.findAll { it instanceof Area } as List + if (!directAreas) return + def processed = processAreas(migration, directAreas) + pageDataList << [pageId : tmpl.id, + pageName : tmpl.name, + templateId : tmpl.id, + templateName : tmpl.name, + templatePageIndex: null, + areas : processed.areas, + proximityGroups : processed.proximityGroups] } int pageCount = pageDataList.size() @@ -118,7 +94,7 @@ def templateList = templateGroupMap.collect { key, entries -> def pageIndices = entries.sort { it.order }.collect { it.listIdx as int } def firstPage = pageDataList[pageIndices[0]] [templateId : firstPage.templateId ?: key, - templateName: firstPage.templateName ?: firstPage.pageName, + templateName: firstPage.templateName ?: firstPage.pageName ?: firstPage.templateId ?: key, pageIndices : pageIndices] } @@ -141,6 +117,52 @@ new ObjectMapper().writerWithDefaultPrettyPrinter().writeValue(dstFile, templateLevel: [templates: templateList, matrix: templateMatrix]]]) println "Written to: ${dstFile.absolutePath}" +/** Processes raw areas into the structured area + proximity-group format used by the JSON output. */ +static Map processAreas(Migration migration, List rawAreas) { + def areaList = rawAreas + .findAll { area -> + area.position != null && + area.position.width.toMillimeters() > 0 && + area.position.height.toMillimeters() > 0 && + area.position.x.toMillimeters() < 315.0 // 1.5 × A4 width guard + } + .collect { area -> + double x = area.position.x.toMillimeters() + double y = area.position.y.toMillimeters() + double w = area.position.width.toMillimeters() + double h = area.position.height.toMillimeters() + [x: x, y: y, w: w, h: h, x2: x + w, y2: y + h, sz: w * h, + flowToNextPage : area.flowToNextPage, + interactiveFlowName: area.interactiveFlowName ?: "", + contentPreview : buildContentPreview(migration, area)] + } + + def containment = findContainment(areaList) + + def groups = groupByProximity(areaList) + groups.eachWithIndex { groupIndices, gi -> groupIndices.each { idx -> areaList[idx].proximityGroup = gi } } + + def proximityGroups = groups.collect { groupIndices -> + def subset = groupIndices.collect { areaList[it] } + double bx = subset.min { it.x as double }.x as double + double by = subset.min { it.y as double }.y as double + double bx2 = subset.max { it.x2 as double }.x2 as double + double by2 = subset.max { it.y2 as double }.y2 as double + [areaIndices: groupIndices, bbox: [x: round2(bx), y: round2(by), w: round2(bx2 - bx), h: round2(by2 - by)]] + } + + def areas = areaList.withIndex().collect { a, i -> + [x : round2(a.x), y: round2(a.y), w: round2(a.w), h: round2(a.h), + flowToNextPage : a.flowToNextPage, + interactiveFlowName: a.interactiveFlowName, + contentPreview : a.contentPreview, + containedIn : containment[i], // list index of parent area, or null if top-level + proximityGroup : a.proximityGroup ?: 0] + } + + [areas: areas, proximityGroups: proximityGroups] +} + /** Summarise area content as a human-readable string. */ static String buildContentPreview(Migration migration, Area area) { area.content.collect { c -> From 9b97c080f32254549f4a08be5d22eca4b1b43ec3 Mon Sep 17 00:00:00 2001 From: "d.svitak" Date: Mon, 25 May 2026 14:50:16 +0200 Subject: [PATCH 08/36] feature/MIG-507-layout-analysis - manual reviewing and refactoring --- .../example/common/layout/LayoutExport.groovy | 347 +++++++++++------- 1 file changed, 212 insertions(+), 135 deletions(-) diff --git a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy index f6a31257..be5d0665 100644 --- a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy +++ b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy @@ -8,162 +8,169 @@ package com.quadient.migration.example.common.layout import com.fasterxml.jackson.databind.ObjectMapper import com.quadient.migration.api.Migration import com.quadient.migration.api.dto.migrationmodel.Area +import com.quadient.migration.api.dto.migrationmodel.DocumentObject import com.quadient.migration.api.dto.migrationmodel.DocumentObjectRef import com.quadient.migration.api.dto.migrationmodel.ImageRef import com.quadient.migration.api.dto.migrationmodel.builder.DocumentObjectFilterBuilder -import com.quadient.migration.api.repository.DocumentObjectRepository import com.quadient.migration.example.common.util.PathUtil import com.quadient.migration.shared.DocumentObjectType import groovy.transform.Field import static com.quadient.migration.example.common.util.InitMigration.initMigration -@Field Migration migration = initMigration(this.binding) +@Field Migration migration = initMigration(LayoutExport.binding) def dstFile = PathUtil.dataDirPath(binding, "layout", "${migration.projectConfig.name}-layout.json").toFile() dstFile.parentFile.mkdirs() -def templatesAndPages = (migration.documentObjectRepository as DocumentObjectRepository) - .list(new DocumentObjectFilterBuilder() - .types([DocumentObjectType.Page, DocumentObjectType.Template]) - .build()) +List templatesAndPages = migration.documentObjectRepository + .list(new DocumentObjectFilterBuilder().types([DocumentObjectType.Page, DocumentObjectType.Template]).build()) -def templates = templatesAndPages.findAll { it.type == DocumentObjectType.Template } -def pages = templatesAndPages.findAll { it.type == DocumentObjectType.Page } -def pageIds = pages.collect { it.id } as Set +List templates = templatesAndPages.findAll { it.type == DocumentObjectType.Template } as List +List pages = templatesAndPages.findAll { it.type == DocumentObjectType.Page } as List +Map pageById = pages.collectEntries { [(it.id): it] } as Map +List pageEntries = [] +Set assignedPageIds = [] -// Page → template lookup; also track each page's position within its template -def pageToTemplate = [:] -def pagePositionInTemplate = [:] -templates.each { tmpl -> +templates.each { DocumentObject tmpl -> tmpl.content - .findAll { it instanceof DocumentObjectRef && pageIds.contains(it.id) } - .eachWithIndex { ref, idx -> - pageToTemplate[(ref as DocumentObjectRef).id] = tmpl - pagePositionInTemplate[(ref as DocumentObjectRef).id] = idx + .findAll { it instanceof DocumentObjectRef && pageById.containsKey(it.id) } + .eachWithIndex { ref, int idx -> + DocumentObject page = pageById[(ref as DocumentObjectRef).id] + assignedPageIds << page.id + ProcessedAreasAndGroups areasAndGroups = processAreasAndGroups(migration, page.content.findAll { it instanceof Area } as List) + pageEntries << new PageEntry(pageId: page.id, + pageName: page.name, + templateId: tmpl.id, + templateName: tmpl.name, + templatePageIndex: idx, + areas: areasAndGroups.areas, + proximityGroups: areasAndGroups.proximityGroups) } -} -def pageDataList = [] - -pages.each { page -> - def template = pageToTemplate[page.id] - def processed = processAreas(migration, page.content.findAll { it instanceof Area } as List) - pageDataList << [pageId : page.id, - pageName : page.name, - templateId : template?.id, - templateName : template?.name, - templatePageIndex: pagePositionInTemplate[page.id], - areas : processed.areas, - proximityGroups : processed.proximityGroups] + List directAreas = tmpl.content.findAll { it instanceof Area } as List + if (directAreas) { + ProcessedAreasAndGroups processed = processAreasAndGroups(migration, directAreas) + pageEntries << new PageEntry(pageId: tmpl.id, + pageName: tmpl.name, + templateId: tmpl.id, + templateName: tmpl.name, + templatePageIndex: null, + areas: processed.areas, + proximityGroups: processed.proximityGroups) + } } -templates.each { tmpl -> - def directAreas = tmpl.content.findAll { it instanceof Area } as List - if (!directAreas) return - def processed = processAreas(migration, directAreas) - pageDataList << [pageId : tmpl.id, - pageName : tmpl.name, - templateId : tmpl.id, - templateName : tmpl.name, - templatePageIndex: null, - areas : processed.areas, - proximityGroups : processed.proximityGroups] +pages.findAll { !assignedPageIds.contains(it.id) }.each { DocumentObject page -> + ProcessedAreasAndGroups processed = processAreasAndGroups(migration, page.content.findAll { it instanceof Area } as List) + pageEntries << new PageEntry(pageId: page.id, + pageName: page.name, + templateId: null, + templateName: null, + templatePageIndex: null, + areas: processed.areas, + proximityGroups: processed.proximityGroups) } -int pageCount = pageDataList.size() +int pageCount = pageEntries.size() -def pageMatrix = (0.. - (0.. +List> pageMatrix = (0.. + (0.. if (i == j) return 1.0d if (j < i) return 0.0d - round3(pageSimilarity(pageDataList[i].areas as List, pageDataList[j].areas as List)) + round3dp(pageSimilarity(pageEntries[i].areas, pageEntries[j].areas)) } -} +} as List> (0.. (0.. pageMatrix[i][j] = pageMatrix[j][i] } } // Group page-list indices by their parent template, preserving intra-template page order -def templateGroupMap = [:] -pageDataList.eachWithIndex { pg, i -> - def key = pg.templateId ?: "solo::${pg.pageId}" +Map> templateGroupMap = [:] +pageEntries.eachWithIndex { PageEntry pg, int i -> + String key = pg.templateId ?: "orphan:${pg.pageId}" if (!templateGroupMap.containsKey(key)) templateGroupMap[key] = [] - templateGroupMap[key] << [listIdx: i as int, order: (pg.templatePageIndex ?: 0) as int] + templateGroupMap[key] << new TemplateGroupEntry(listIdx: i, order: pg.templatePageIndex ?: 0) } -def templateList = templateGroupMap.collect { key, entries -> - def pageIndices = entries.sort { it.order }.collect { it.listIdx as int } - def firstPage = pageDataList[pageIndices[0]] - [templateId : firstPage.templateId ?: key, - templateName: firstPage.templateName ?: firstPage.pageName ?: firstPage.templateId ?: key, - pageIndices : pageIndices] -} +List templateList = templateGroupMap.collect { String key, List entries -> + List pageIndices = entries.sort { it.order }.collect { it.listIdx } as List + PageEntry firstPage = pageEntries[pageIndices[0]] + new TemplateEntry(templateId: firstPage.templateId ?: key, + templateName: firstPage.templateName ?: firstPage.templateId ?: key, + pageIndices: pageIndices) +} as List int templateCount = templateList.size() -def templateMatrix = (0.. - (0.. +List> templateMatrix = (0.. + (0.. if (i == j) return 1.0d if (j < i) return 0.0d - round3(templateSimilarity(templateList[i].pageIndices as List, - templateList[j].pageIndices as List, + round3dp(templateSimilarity(templateList[i].pageIndices, + templateList[j].pageIndices, pageMatrix)) } -} +} as List> (0.. (0.. templateMatrix[i][j] = templateMatrix[j][i] } } new ObjectMapper().writerWithDefaultPrettyPrinter().writeValue(dstFile, [projectName: migration.projectConfig.name, - pages : pageDataList, + pages : pageEntries, similarity : [pageLevel : [matrix: pageMatrix], templateLevel: [templates: templateList, matrix: templateMatrix]]]) println "Written to: ${dstFile.absolutePath}" /** Processes raw areas into the structured area + proximity-group format used by the JSON output. */ -static Map processAreas(Migration migration, List rawAreas) { - def areaList = rawAreas +static ProcessedAreasAndGroups processAreasAndGroups(Migration migration, List modelAreas) { + List workingAreas = modelAreas .findAll { area -> area.position != null && area.position.width.toMillimeters() > 0 && area.position.height.toMillimeters() > 0 && area.position.x.toMillimeters() < 315.0 // 1.5 × A4 width guard } - .collect { area -> + .collect { Area area -> double x = area.position.x.toMillimeters() double y = area.position.y.toMillimeters() double w = area.position.width.toMillimeters() double h = area.position.height.toMillimeters() - [x: x, y: y, w: w, h: h, x2: x + w, y2: y + h, sz: w * h, - flowToNextPage : area.flowToNextPage, - interactiveFlowName: area.interactiveFlowName ?: "", - contentPreview : buildContentPreview(migration, area)] - } + new WorkingArea(x: x, + y: y, + w: w, + h: h, + x2: x + w, + y2: y + h, + areaSize: w * h, + flowToNextPage: area.flowToNextPage, + interactiveFlowName: area.interactiveFlowName ?: "", + contentPreview: buildContentPreview(migration, area)) + } as List - def containment = findContainment(areaList) + Map containment = findContainment(workingAreas) - def groups = groupByProximity(areaList) - groups.eachWithIndex { groupIndices, gi -> groupIndices.each { idx -> areaList[idx].proximityGroup = gi } } + List> groups = groupByProximity(workingAreas) + groups.eachWithIndex { List groupIndices, int gi -> groupIndices.each { int idx -> workingAreas[idx].proximityGroup = gi } } - def proximityGroups = groups.collect { groupIndices -> - def subset = groupIndices.collect { areaList[it] } - double bx = subset.min { it.x as double }.x as double - double by = subset.min { it.y as double }.y as double - double bx2 = subset.max { it.x2 as double }.x2 as double - double by2 = subset.max { it.y2 as double }.y2 as double - [areaIndices: groupIndices, bbox: [x: round2(bx), y: round2(by), w: round2(bx2 - bx), h: round2(by2 - by)]] - } + List proximityGroups = groups.collect { List groupIndices -> + List subset = groupIndices.collect { workingAreas[it] } as List + double bx = subset.min { it.x }.x + double by = subset.min { it.y }.y + double bx2 = subset.max { it.x2 }.x2 + double by2 = subset.max { it.y2 }.y2 + new ProximityGroupEntry(areaIndices: groupIndices, + position: new Position(x: round2dp(bx), y: round2dp(by), w: round2dp(bx2 - bx), h: round2dp(by2 - by))) + } as List - def areas = areaList.withIndex().collect { a, i -> - [x : round2(a.x), y: round2(a.y), w: round2(a.w), h: round2(a.h), - flowToNextPage : a.flowToNextPage, - interactiveFlowName: a.interactiveFlowName, - contentPreview : a.contentPreview, - containedIn : containment[i], // list index of parent area, or null if top-level - proximityGroup : a.proximityGroup ?: 0] - } + List areas = workingAreas.withIndex().collect { WorkingArea a, int i -> + new AreaEntry(x: round2dp(a.x), y: round2dp(a.y), w: round2dp(a.w), h: round2dp(a.h), + flowToNextPage: a.flowToNextPage, + interactiveFlowName: a.interactiveFlowName, + contentPreview: a.contentPreview, + containedIn: containment[i], // list index of parent area, or null if top-level + proximityGroup: a.proximityGroup ?: 0) + } as List - [areas: areas, proximityGroups: proximityGroups] + new ProcessedAreasAndGroups(areas: areas, proximityGroups: proximityGroups) } -/** Summarise area content as a human-readable string. */ static String buildContentPreview(Migration migration, Area area) { area.content.collect { c -> switch (c) { @@ -183,15 +190,15 @@ static String buildContentPreview(Migration migration, Area area) { * For each area find its smallest enclosing parent within the same page. * Returns a map of list-index → list-index (parent). * Tolerance of 0.7 mm (~2 pt) absorbs rounding differences.*/ -static Map findContainment(List areas) { - def containment = [:] - areas.eachWithIndex { inner, i -> - areas.eachWithIndex { outer, j -> - if (i == j || (outer.sz as double) <= (inner.sz as double)) return +static Map findContainment(List areas) { + Map containment = [:] + areas.eachWithIndex { WorkingArea inner, int i -> + areas.eachWithIndex { WorkingArea outer, int j -> + if (i == j || outer.areaSize <= inner.areaSize) return double tol = 0.7 - if ((outer.x as double) - tol <= (inner.x as double) && (outer.y as double) - tol <= (inner.y as double) && (inner.x2 as double) <= (outer.x2 as double) + tol && (inner.y2 as double) <= (outer.y2 as double) + tol) { - Integer cur = containment[i] as Integer - if (cur == null || (areas[cur].sz as double) > (outer.sz as double)) { + if (outer.x - tol <= inner.x && outer.y - tol <= inner.y && inner.x2 <= outer.x2 + tol && inner.y2 <= outer.y2 + tol) { + Integer cur = containment[i] + if (cur == null || areas[cur].areaSize > outer.areaSize) { containment[i] = j } } @@ -202,24 +209,25 @@ static Map findContainment(List areas) { /** * Vertical sweep grouping: areas whose Y ranges overlap or are within - * yGapMm of each other are placed in the same group. + * groupingGap of each other are placed in the same group. * Returns a list of groups, each group being a list of list-indices.*/ -static List> groupByProximity(List areas, double yGapMm = 5.3) { - if (!areas) return [] - def indexed = areas.withIndex().collect { area, i -> [area: area, idx: i] } - indexed.sort { a, b -> (a.area.y as double) <=> (b.area.y as double) ?: (a.area.x as double) <=> (b.area.x as double) +static List> groupByProximity(List workingAreas, double groupingGap = 5.3) { + if (!workingAreas) return [] + List sortedIndices = (0.. + workingAreas[i].y <=> workingAreas[j].y ?: workingAreas[i].x <=> workingAreas[j].x } - def groups = [] - def current = [indexed[0].idx as int] - double curY2 = indexed[0].area.y2 as double - indexed.drop(1).each { entry -> - if ((entry.area.y as double) <= curY2 + yGapMm) { - current << (entry.idx as int) - curY2 = Math.max(curY2, entry.area.y2 as double) + List> groups = [] + List current = [sortedIndices[0]] + double curY2 = workingAreas[sortedIndices[0]].y2 + sortedIndices.drop(1).each { int idx -> + WorkingArea area = workingAreas[idx] + if (area.y <= curY2 + groupingGap) { + current << idx + curY2 = Math.max(curY2, area.y2) } else { groups << current - current = [entry.idx as int] - curY2 = entry.area.y2 as double + current = [idx] + curY2 = area.y2 } } groups << current @@ -232,39 +240,38 @@ static List> groupByProximity(List areas, double yGapMm = 5.3 * height with a log-ratio so a 3× difference scores ~1.0 (large * height differences are tolerated but still weakly penalised). * Unmatched areas (different counts) reduce the score proportionally.*/ -static double pageSimilarity(List areasA, List areasB, +static double pageSimilarity(List areasA, List areasB, double pageRefMm = 210.0, double matchThreshold = 0.5) { - def validA = areasA.findAll { (it.w as double) > 0 && (it.h as double) > 0 && (it.x as double) < pageRefMm * 1.5 } - def validB = areasB.findAll { (it.w as double) > 0 && (it.h as double) > 0 && (it.x as double) < pageRefMm * 1.5 } + List validA = areasA.findAll { it.w > 0 && it.h > 0 && it.x < pageRefMm * 1.5 } as List + List validB = areasB.findAll { it.w > 0 && it.h > 0 && it.x < pageRefMm * 1.5 } as List if (!validA || !validB) return 0.0 - def distances = [] - validA.eachWithIndex { a, i -> - validB.eachWithIndex { b, j -> - double xDist = Math.abs((a.x as double) - (b.x as double)) / pageRefMm - double wDist = Math.abs((a.w as double) - (b.w as double)) / pageRefMm - double hRatio = Math.abs(Math.log(Math.max(a.h as double, 0.1) / Math.max(b.h as double, 0.1))) / Math.log(3.0) - distances << [d: Math.sqrt(xDist * xDist + wDist * wDist + hRatio * hRatio), i: i, j: j] + List areaMatches = [] + validA.eachWithIndex { AreaEntry a, int i -> + validB.eachWithIndex { AreaEntry b, int j -> + double xDist = Math.abs(a.x - b.x) / pageRefMm + double wDist = Math.abs(a.w - b.w) / pageRefMm + double hRatio = Math.abs(Math.log(Math.max(a.h, 0.1) / Math.max(b.h, 0.1))) / Math.log(3.0) + areaMatches << new AreaMatch(distance: Math.sqrt(xDist * xDist + wDist * wDist + hRatio * hRatio), indexA: i, indexB: j) } } - distances.sort { it.d } + areaMatches.sort { it.distance } - def matchedA = [] as Set - def matchedB = [] as Set + Set matchedA = [] + Set matchedB = [] double total = 0.0 - distances.each { pair -> - if (!(pair.i in matchedA) && !(pair.j in matchedB)) { - matchedA << pair.i; matchedB << pair.j - total += Math.max(0.0, 1.0 - (pair.d as double) / matchThreshold) + areaMatches.each { AreaMatch match -> + if (!(match.indexA in matchedA) && !(match.indexB in matchedB)) { + matchedA << match.indexA; matchedB << match.indexB + total += Math.max(0.0, 1.0 - match.distance / matchThreshold) } } int slots = Math.max(validA.size(), validB.size()) return slots > 0 ? total / slots : 0.0 } -static double round2(double v) { Math.round(v * 100) / 100.0 } - -static double round3(double v) { Math.round(v * 1000) / 1000.0 } +static double round2dp(double v) { Math.round(v * 100) / 100.0 } +static double round3dp(double v) { Math.round(v * 1000) / 1000.0 } /** * Compare two templates by positional page matching (page 1↔1, page 2↔2, …). @@ -275,6 +282,76 @@ static double templateSimilarity(List pagesA, List pagesB, int maxLen = Math.max(pagesA.size(), pagesB.size()) if (maxLen == 0) return 0.0 double total = 0.0 - (0.. total += pageMatrix[pagesA[k]][pagesB[k]] as double } + (0.. total += pageMatrix[pagesA[k]][pagesB[k]] } return total / maxLen } + +class WorkingArea { + double x + double y + double w + double h + double x2 + double y2 + double areaSize + boolean flowToNextPage + String interactiveFlowName + String contentPreview + int proximityGroup = 0 +} + +class AreaEntry { + double x + double y + double w + double h + boolean flowToNextPage + String interactiveFlowName + String contentPreview + Integer containedIn + int proximityGroup +} + +class Position { + double x + double y + double w + double h +} + +class ProximityGroupEntry { + List areaIndices + Position position +} + +class ProcessedAreasAndGroups { + List areas + List proximityGroups +} + +class PageEntry { + String pageId + String pageName + String templateId + String templateName + Integer templatePageIndex + List areas + List proximityGroups +} + +class TemplateGroupEntry { + int listIdx + int order +} + +class TemplateEntry { + String templateId + String templateName + List pageIndices +} + +class AreaMatch { + double distance + int indexA + int indexB +} From ddfa54688e14feb630647caa657e14d25be15aae Mon Sep 17 00:00:00 2001 From: "d.svitak" Date: Mon, 25 May 2026 15:25:19 +0200 Subject: [PATCH 09/36] feature/MIG-507-layout-analysis - manual reviewing and refactoring #2 --- migration-examples/layout/index.html | 2 +- .../example/common/layout/LayoutExport.groovy | 148 ++++++++++-------- 2 files changed, 83 insertions(+), 67 deletions(-) diff --git a/migration-examples/layout/index.html b/migration-examples/layout/index.html index f0833974..7e08ae31 100644 --- a/migration-examples/layout/index.html +++ b/migration-examples/layout/index.html @@ -602,7 +602,7 @@

    Layout Analysis

    const svg = svgEl("svg", {width: CANVAS_W, height: svgH, style: "background:#fff;cursor:default"}); (page.proximityGroups || []).forEach((grp, gi) => { - const b = grp.bbox; + const b = grp.position; svg.appendChild(svgEl("rect", { x: svgPos(b.x, scale), y: svgPos(b.y, scale), width: svgDim(b.w, scale), height: svgDim(b.h, scale), diff --git a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy index be5d0665..fc55d6ed 100644 --- a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy +++ b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy @@ -10,6 +10,7 @@ import com.quadient.migration.api.Migration import com.quadient.migration.api.dto.migrationmodel.Area import com.quadient.migration.api.dto.migrationmodel.DocumentObject import com.quadient.migration.api.dto.migrationmodel.DocumentObjectRef +import com.quadient.migration.api.dto.migrationmodel.Image import com.quadient.migration.api.dto.migrationmodel.ImageRef import com.quadient.migration.api.dto.migrationmodel.builder.DocumentObjectFilterBuilder import com.quadient.migration.example.common.util.PathUtil @@ -18,6 +19,11 @@ import groovy.transform.Field import static com.quadient.migration.example.common.util.InitMigration.initMigration +@Field static final double MAX_X_MM = 315.0 // 1.5 × A4 width — off-page area guard +@Field static final double PAGE_REF_MM = 210.0 // A4 reference width for normalisation +@Field static final double PROXIMITY_GAP_MM = 5.3 // vertical gap threshold for proximity grouping +@Field static final double CONTAINMENT_TOL_MM = 0.7 // tolerance (~2 pt) for containment checks + @Field Migration migration = initMigration(LayoutExport.binding) def dstFile = PathUtil.dataDirPath(binding, "layout", "${migration.projectConfig.name}-layout.json").toFile() @@ -32,13 +38,34 @@ Map pageById = pages.collectEntries { [(it.id): it] } as List pageEntries = [] Set assignedPageIds = [] +// Pre-cache content references for buildContentPreview +Set allDocObjRefIds = [] as Set +Set allImageRefIds = [] as Set +(templates + pages).each { DocumentObject obj -> + obj.content.findAll { it instanceof Area }.each { area -> + (area as Area).content.each { c -> + if (c instanceof DocumentObjectRef) allDocObjRefIds << c.id + else if (c instanceof ImageRef) allImageRefIds << c.id + } + } +} + +Map docObjCache = allDocObjRefIds + ? migration.documentObjectRepository.list(new DocumentObjectFilterBuilder().ids(allDocObjRefIds.toList()).build()) + .collectEntries { [(it.id): it] } as Map + : [:] as Map +Map imageCache = allImageRefIds + ? migration.imageRepository.listAll().findAll { allImageRefIds.contains(it.id) } + .collectEntries { [(it.id): it] } as Map + : [:] as Map + templates.each { DocumentObject tmpl -> tmpl.content .findAll { it instanceof DocumentObjectRef && pageById.containsKey(it.id) } .eachWithIndex { ref, int idx -> DocumentObject page = pageById[(ref as DocumentObjectRef).id] assignedPageIds << page.id - ProcessedAreasAndGroups areasAndGroups = processAreasAndGroups(migration, page.content.findAll { it instanceof Area } as List) + ProcessedAreasAndGroups areasAndGroups = processAreasAndGroups(docObjCache, imageCache, page.content.findAll { it instanceof Area } as List) pageEntries << new PageEntry(pageId: page.id, pageName: page.name, templateId: tmpl.id, @@ -50,7 +77,7 @@ templates.each { DocumentObject tmpl -> List directAreas = tmpl.content.findAll { it instanceof Area } as List if (directAreas) { - ProcessedAreasAndGroups processed = processAreasAndGroups(migration, directAreas) + ProcessedAreasAndGroups processed = processAreasAndGroups(docObjCache, imageCache, directAreas) pageEntries << new PageEntry(pageId: tmpl.id, pageName: tmpl.name, templateId: tmpl.id, @@ -62,7 +89,7 @@ templates.each { DocumentObject tmpl -> } pages.findAll { !assignedPageIds.contains(it.id) }.each { DocumentObject page -> - ProcessedAreasAndGroups processed = processAreasAndGroups(migration, page.content.findAll { it instanceof Area } as List) + ProcessedAreasAndGroups processed = processAreasAndGroups(docObjCache, imageCache, page.content.findAll { it instanceof Area } as List) pageEntries << new PageEntry(pageId: page.id, pageName: page.name, templateId: null, @@ -72,44 +99,28 @@ pages.findAll { !assignedPageIds.contains(it.id) }.each { DocumentObject page -> proximityGroups: processed.proximityGroups) } -int pageCount = pageEntries.size() - -List> pageMatrix = (0.. - (0.. - if (i == j) return 1.0d - if (j < i) return 0.0d - round3dp(pageSimilarity(pageEntries[i].areas, pageEntries[j].areas)) - } -} as List> -(0.. (0.. pageMatrix[i][j] = pageMatrix[j][i] } } +List> pageMatrix = buildSymmetricMatrix(pageEntries.size()) { int i, int j -> + pageSimilarity(pageEntries[i].areas, pageEntries[j].areas) +} -// Group page-list indices by their parent template, preserving intra-template page order -Map> templateGroupMap = [:] +// Group page-list indices by their parent template +Map> templateGroupMap = [:] pageEntries.eachWithIndex { PageEntry pg, int i -> String key = pg.templateId ?: "orphan:${pg.pageId}" - if (!templateGroupMap.containsKey(key)) templateGroupMap[key] = [] - templateGroupMap[key] << new TemplateGroupEntry(listIdx: i, order: pg.templatePageIndex ?: 0) + templateGroupMap.computeIfAbsent(key) { [] } << i } -List templateList = templateGroupMap.collect { String key, List entries -> - List pageIndices = entries.sort { it.order }.collect { it.listIdx } as List - PageEntry firstPage = pageEntries[pageIndices[0]] - new TemplateEntry(templateId: firstPage.templateId ?: key, - templateName: firstPage.templateName ?: firstPage.templateId ?: key, +List templateList = templateGroupMap.collect { String key, List indices -> + List pageIndices = indices.sort { pageEntries[it].templatePageIndex ?: 0 } + PageEntry first = pageEntries[pageIndices[0]] + new TemplateEntry(templateId: first.templateId ?: key, + templateName: first.templateName ?: first.templateId ?: key, pageIndices: pageIndices) } as List -int templateCount = templateList.size() -List> templateMatrix = (0.. - (0.. - if (i == j) return 1.0d - if (j < i) return 0.0d - round3dp(templateSimilarity(templateList[i].pageIndices, - templateList[j].pageIndices, - pageMatrix)) - } -} as List> -(0.. (0.. templateMatrix[i][j] = templateMatrix[j][i] } } +List> templateMatrix = buildSymmetricMatrix(templateList.size()) { int i, int j -> + templateSimilarity(templateList[i].pageIndices, templateList[j].pageIndices, pageMatrix) +} new ObjectMapper().writerWithDefaultPrettyPrinter().writeValue(dstFile, [projectName: migration.projectConfig.name, @@ -118,14 +129,29 @@ new ObjectMapper().writerWithDefaultPrettyPrinter().writeValue(dstFile, templateLevel: [templates: templateList, matrix: templateMatrix]]]) println "Written to: ${dstFile.absolutePath}" +// --- Helper methods --- + +/** Builds a symmetric similarity matrix, computing only the upper triangle. */ +static List> buildSymmetricMatrix(int n, Closure scoreFn) { + List> m = (0.. + (0.. + if (i == j) return 1.0d + if (j < i) return 0.0d + round3dp(scoreFn(i, j)) + } + } as List> + (0.. (0.. m[i][j] = m[j][i] } } + m +} + /** Processes raw areas into the structured area + proximity-group format used by the JSON output. */ -static ProcessedAreasAndGroups processAreasAndGroups(Migration migration, List modelAreas) { +static ProcessedAreasAndGroups processAreasAndGroups(Map docObjCache, Map imageCache, List modelAreas) { List workingAreas = modelAreas .findAll { area -> area.position != null && area.position.width.toMillimeters() > 0 && area.position.height.toMillimeters() > 0 && - area.position.x.toMillimeters() < 315.0 // 1.5 × A4 width guard + area.position.x.toMillimeters() < MAX_X_MM } .collect { Area area -> double x = area.position.x.toMillimeters() @@ -141,7 +167,7 @@ static ProcessedAreasAndGroups processAreasAndGroups(Migration migration, List Map containment = findContainment(workingAreas) @@ -164,21 +190,21 @@ static ProcessedAreasAndGroups processAreasAndGroups(Migration migration, List new ProcessedAreasAndGroups(areas: areas, proximityGroups: proximityGroups) } -static String buildContentPreview(Migration migration, Area area) { +static String buildContentPreview(Map docObjCache, Map imageCache, Area area) { area.content.collect { c -> switch (c) { case DocumentObjectRef: - def obj = migration.documentObjectRepository.find(c.id) + def obj = docObjCache[c.id] return obj?.name ? "DocObjRef(${obj.name})" : "DocObjRef(${c.id})" case ImageRef: - def img = migration.imageRepository.find(c.id) + def img = imageCache[c.id] return img?.name ? "ImageRef(${img.name})" : "ImageRef(${c.id})" default: return c.class.simpleName @@ -188,15 +214,14 @@ static String buildContentPreview(Migration migration, Area area) { /** * For each area find its smallest enclosing parent within the same page. - * Returns a map of list-index → list-index (parent). - * Tolerance of 0.7 mm (~2 pt) absorbs rounding differences.*/ + * Returns a map of list-index → list-index (parent). */ static Map findContainment(List areas) { Map containment = [:] areas.eachWithIndex { WorkingArea inner, int i -> areas.eachWithIndex { WorkingArea outer, int j -> if (i == j || outer.areaSize <= inner.areaSize) return - double tol = 0.7 - if (outer.x - tol <= inner.x && outer.y - tol <= inner.y && inner.x2 <= outer.x2 + tol && inner.y2 <= outer.y2 + tol) { + if (outer.x - CONTAINMENT_TOL_MM <= inner.x && outer.y - CONTAINMENT_TOL_MM <= inner.y && + inner.x2 <= outer.x2 + CONTAINMENT_TOL_MM && inner.y2 <= outer.y2 + CONTAINMENT_TOL_MM) { Integer cur = containment[i] if (cur == null || areas[cur].areaSize > outer.areaSize) { containment[i] = j @@ -209,9 +234,9 @@ static Map findContainment(List areas) { /** * Vertical sweep grouping: areas whose Y ranges overlap or are within - * groupingGap of each other are placed in the same group. - * Returns a list of groups, each group being a list of list-indices.*/ -static List> groupByProximity(List workingAreas, double groupingGap = 5.3) { + * the proximity gap of each other are placed in the same group. + * Returns a list of groups, each group being a list of list-indices. */ +static List> groupByProximity(List workingAreas, double groupingGap = PROXIMITY_GAP_MM) { if (!workingAreas) return [] List sortedIndices = (0.. workingAreas[i].y <=> workingAreas[j].y ?: workingAreas[i].x <=> workingAreas[j].x @@ -236,21 +261,17 @@ static List> groupByProximity(List workingAreas, doub /** * Compare two pages by greedy bipartite matching on area geometry. - * Each area is described by (x, width) normalised to pageRefMm and - * height with a log-ratio so a 3× difference scores ~1.0 (large - * height differences are tolerated but still weakly penalised). - * Unmatched areas (different counts) reduce the score proportionally.*/ + * Each area is described by (x, width) normalised to page reference width and + * height with a log-ratio so a 3× difference scores ~1.0. */ static double pageSimilarity(List areasA, List areasB, - double pageRefMm = 210.0, double matchThreshold = 0.5) { - List validA = areasA.findAll { it.w > 0 && it.h > 0 && it.x < pageRefMm * 1.5 } as List - List validB = areasB.findAll { it.w > 0 && it.h > 0 && it.x < pageRefMm * 1.5 } as List - if (!validA || !validB) return 0.0 + double matchThreshold = 0.5) { + if (!areasA || !areasB) return 0.0 List areaMatches = [] - validA.eachWithIndex { AreaEntry a, int i -> - validB.eachWithIndex { AreaEntry b, int j -> - double xDist = Math.abs(a.x - b.x) / pageRefMm - double wDist = Math.abs(a.w - b.w) / pageRefMm + areasA.eachWithIndex { AreaEntry a, int i -> + areasB.eachWithIndex { AreaEntry b, int j -> + double xDist = Math.abs(a.x - b.x) / PAGE_REF_MM + double wDist = Math.abs(a.w - b.w) / PAGE_REF_MM double hRatio = Math.abs(Math.log(Math.max(a.h, 0.1) / Math.max(b.h, 0.1))) / Math.log(3.0) areaMatches << new AreaMatch(distance: Math.sqrt(xDist * xDist + wDist * wDist + hRatio * hRatio), indexA: i, indexB: j) } @@ -266,7 +287,7 @@ static double pageSimilarity(List areasA, List areasB, total += Math.max(0.0, 1.0 - match.distance / matchThreshold) } } - int slots = Math.max(validA.size(), validB.size()) + int slots = Math.max(areasA.size(), areasB.size()) return slots > 0 ? total / slots : 0.0 } @@ -276,7 +297,7 @@ static double round3dp(double v) { Math.round(v * 1000) / 1000.0 } /** * Compare two templates by positional page matching (page 1↔1, page 2↔2, …). * Unmatched pages (different template lengths) contribute 0, penalising the score - * proportionally so longer templates aren't artificially favoured.*/ + * proportionally so longer templates aren't artificially favoured. */ static double templateSimilarity(List pagesA, List pagesB, List> pageMatrix) { int maxLen = Math.max(pagesA.size(), pagesB.size()) @@ -339,11 +360,6 @@ class PageEntry { List proximityGroups } -class TemplateGroupEntry { - int listIdx - int order -} - class TemplateEntry { String templateId String templateName From f5159b7fd880b1a7e88f1f8420248ea18d400fba Mon Sep 17 00:00:00 2001 From: "d.svitak" Date: Mon, 25 May 2026 15:35:21 +0200 Subject: [PATCH 10/36] feature/MIG-507-layout-analysis - fixes --- .../migration/example/common/layout/LayoutExport.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy index fc55d6ed..8b46620e 100644 --- a/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy +++ b/migration-examples/src/main/groovy/com/quadient/migration/example/common/layout/LayoutExport.groovy @@ -24,7 +24,7 @@ import static com.quadient.migration.example.common.util.InitMigration.initMigra @Field static final double PROXIMITY_GAP_MM = 5.3 // vertical gap threshold for proximity grouping @Field static final double CONTAINMENT_TOL_MM = 0.7 // tolerance (~2 pt) for containment checks -@Field Migration migration = initMigration(LayoutExport.binding) +@Field Migration migration = initMigration(this.binding) def dstFile = PathUtil.dataDirPath(binding, "layout", "${migration.projectConfig.name}-layout.json").toFile() dstFile.parentFile.mkdirs() @@ -238,7 +238,7 @@ static Map findContainment(List areas) { * Returns a list of groups, each group being a list of list-indices. */ static List> groupByProximity(List workingAreas, double groupingGap = PROXIMITY_GAP_MM) { if (!workingAreas) return [] - List sortedIndices = (0.. + List sortedIndices = (0.. workingAreas[i].y <=> workingAreas[j].y ?: workingAreas[i].x <=> workingAreas[j].x } List> groups = [] From 04e748d39c7d944cf9747bff29383b8500658bb6 Mon Sep 17 00:00:00 2001 From: "d.svitak" Date: Tue, 26 May 2026 09:45:45 +0200 Subject: [PATCH 11/36] feature/MIG-507-layout-analysis - types for index.html --- migration-examples/layout/index.html | 1155 +++++++++++++++----------- 1 file changed, 670 insertions(+), 485 deletions(-) diff --git a/migration-examples/layout/index.html b/migration-examples/layout/index.html index 7e08ae31..37545523 100644 --- a/migration-examples/layout/index.html +++ b/migration-examples/layout/index.html @@ -468,533 +468,718 @@

    Layout Analysis

    - From c6a8706530dfde918b4935ef0d50b17b73e0ad50 Mon Sep 17 00:00:00 2001 From: "d.svitak" Date: Tue, 26 May 2026 10:21:59 +0200 Subject: [PATCH 12/36] feature/MIG-507-layout-analysis - refactor index.html --- migration-examples/layout/index.html | 190 +++++++++++---------------- 1 file changed, 75 insertions(+), 115 deletions(-) diff --git a/migration-examples/layout/index.html b/migration-examples/layout/index.html index 37545523..714bbe15 100644 --- a/migration-examples/layout/index.html +++ b/migration-examples/layout/index.html @@ -97,32 +97,6 @@ display: block; } - .page-card { - border: 1px solid var(--line); - border-radius: 10px; - overflow: hidden; - background: var(--bg); - } - - .page-card-title { - padding: 8px 12px; - font-size: 13px; - font-weight: 600; - border-bottom: 1px solid var(--line); - background: #fff; - } - - .page-card-sub { - padding: 2px 12px 6px; - font-size: 11px; - color: var(--muted); - background: #fff; - } - - .page-card svg { - display: block; - } - .matrix-wrap { overflow-x: auto; } @@ -469,6 +443,8 @@

    Layout Analysis