Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,22 @@ self-ignored `.reglance/` directory — nothing to add to `.gitignore`.
| `viewports` | no | `[{ name, width, height, deviceScaleFactor? }]`. Defaults to `desktop` (1920×1080), `mobile` (390×844). |
| `output` | no | Output directory. Defaults to `.reglance`. |
| `pixelmatchOptions` | no | [pixelmatch](https://github.com/mapbox/pixelmatch) options, e.g. `{ "threshold": 0.1 }`. |
| `timeouts` | no | `{ goto, settle }` in ms. Navigation and post-scroll network-idle waits. Defaults `{ goto: 15000, settle: 8000 }`. Raise `settle` for slow, lazy-loading pages. |
| `timeouts` | no | `{ goto, settle }` in ms. `goto` bounds navigation; `settle` bounds each post-scroll wait (network idle, then image load/decode). Defaults `{ goto: 15000, settle: 8000 }`. Raise `settle` for slow, lazy-loading pages. |
| `blockHosts` | no | Hostnames to block requests to during capture, e.g. `["challenges.cloudflare.com"]`. Each entry also blocks its subdomains. |

`domain` is only needed by `capture`; `control` and `compare` work on the files
already captured. See [`reglance.example.json`](reglance.example.json) for a full
example.

`blockHosts` aborts every request to the listed hosts (and their subdomains)
before it leaves the browser. Use it for third-party embeds that keep the
network busy and stall capture — CAPTCHA widgets like Cloudflare Turnstile,
ad tech, analytics — or that render differently on every load and pollute
diffs. Captures wait for the network to go idle, so a widget that polls or
retries indefinitely will otherwise time out every viewport on pages that
embed it. Entries are bare hostnames; `"example.org"` blocks `example.org` and
`sub.example.org` alike.

A viewport's optional `deviceScaleFactor` (device pixel ratio) renders the page
as it would appear on a higher-density display — use `2` for a retina capture,
`3` for some phones. It defaults to `1`. Captures sharing a DPR run in one
Expand Down Expand Up @@ -114,6 +124,11 @@ guards against silently baselining bad data:
- If a page never loads cleanly (after retries), `capture` reports it as
degraded instead of treating it as a success. Add `--fail-on-degraded` to
make the run exit non-zero in CI.
- `capture` scrolls each page one viewport at a time so every lazy-loaded
image is triggered, then waits (bounded by `timeouts.settle`) for all
images to load and decode before screenshotting — and warns per capture
when any image was still loading, instead of silently shipping a partial
screenshot.
- `control` records each promotion in `.reglance/controls/manifest.json` and
warns when it promoted fewer captures than expected (so the untouched
controls are now stale). `compare` warns when the baseline mixes controls
Expand Down
3 changes: 2 additions & 1 deletion reglance.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@
"timeouts": {
"goto": 15000,
"settle": 8000
}
},
"blockHosts": ["challenges.cloudflare.com"]
}
165 changes: 136 additions & 29 deletions src/capture.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -59,38 +59,109 @@
});
}

/**
* Whether a URL's host is covered by the configured block list.
*
* An entry matches the host itself and all of its subdomains, so
* "example.org" blocks both "example.org" and "sub.example.org". Non-network
* URLs (blob:, data:, chrome-extension:) have no hostname and never match.
*
* @param {string} url - The request URL.
* @param {string[]} blockHosts - Normalized (lowercase) hostnames.
* @returns {boolean} True when the request should be blocked.
*/
export function isBlockedHost(url, blockHosts) {
if (!blockHosts?.length) {
return false;
}

let host;
try {
host = new URL(url).hostname.toLowerCase();
} catch {
return false;
}

if (!host) {
return false;
}

return blockHosts.some(
(entry) => host === entry || host.endsWith(`.${entry}`)
);
}

/**
* Scroll the full height of the page and back to the top.
*
* Triggers lazy-loaded images and other on-scroll behavior so the screenshot
* captures the page as a visitor would see it. Scrolls to the bottom and waits
* for the page to grow, repeating until the height stabilizes — so a short page
* settles almost instantly while a tall one keeps going as content loads,
* rather than paying a fixed per-step delay across the whole height.
* captures the page as a visitor would see it. Steps one viewport at a time
* because lazy loaders (IntersectionObserver, native loading="lazy") only
* trigger for content near the viewport — jumping straight to the bottom
* skips everything in between, and which of those images load becomes a
* timing race. The height is re-read every step so content that loads and
* grows the page extends the walk.
*
* @param {import('playwright').Page} page - The page to scroll.
*/
async function autoScroll(page) {
await page.evaluate(async () => {
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
let lastHeight = -1;
const root = document.scrollingElement || document.documentElement;
let position = 0;

// Cap iterations so a page can't loop forever. (e.g. infinite scroll)
for (let i = 0; i < 500; i++) {
position += window.innerHeight;
// 'instant' overrides a site's `scroll-behavior: smooth`, which
// would otherwise animate each step and outpace this loop.
window.scrollTo({ top: position, behavior: 'instant' });
await sleep(50);

// Cap iterations so a page that grows on every scroll (infinite feed)
// can't loop forever.
for (let i = 0; i < 100; i++) {
const height = document.body.scrollHeight;
if (height === lastHeight) {
if (position >= root.scrollHeight - window.innerHeight) {
break;
}
lastHeight = height;
window.scrollTo(0, height);
await sleep(50);
}

window.scrollTo(0, 0);
window.scrollTo({ top: 0, behavior: 'instant' });
});
}

/**
* Wait for every image on the page to finish loading and be ready to paint.
*
* The network-idle settle alone is not enough: it only covers requests that
* have already started, says nothing about decode state, and when it times
* out on a busy server the capture proceeds silently with whatever images
* happened to arrive. decode() resolves once an image is loaded and decoded;
* a broken image rejects, which counts as settled — a missing image is the
* page's actual state, not something to keep waiting on.
*
* @param {import('playwright').Page} page - The page to wait on.
* @param {number} timeout - Max wait in ms.
* @returns {Promise<number>} How many images were still loading at timeout.
*/
async function waitForImages(page, timeout) {
return page.evaluate(async (maxWait) => {
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));

// Hidden images are excluded: they cannot paint into the screenshot,
// and a hidden native-lazy image (e.g. a desktop-only image at a
// mobile viewport width) never loads at all by design — waiting for
// it would burn the full timeout and warn about nothing.
const images = Array.from(document.images).filter((img) =>
img.checkVisibility ? img.checkVisibility() : true
);

await Promise.race([
Promise.all(images.map((img) => img.decode().catch(() => {}))),
sleep(maxWait),
]);

return images.filter((img) => !img.complete).length;
}, timeout);
}

/**
* Group viewports by their device scale factor, preserving order.
*
Expand Down Expand Up @@ -132,11 +203,11 @@
* groupViewportsByScaleFactor); within a context the first viewport navigates
* fresh and the rest reuse the page (reloading unless --skip-reload).
*
* @param {import('playwright').Browser} browser - The shared browser.
* @param {object} target - The target ({ key, url }).
* @param {Array} viewports - Viewport definitions.
* @param {object} dirs - Output directory paths.
* @param {object} options - Capture options.
* @param {import('playwright').Browser} browser The shared browser.
* @param {object} target The target ({ key, url }).
* @param {Array} viewports Viewport definitions.
* @param {object} dirs Output directory paths.
* @param {object} options Capture options.
* @returns {Promise<Array<{ slug: string, url: string, reason: string }>>} Failed slugs.
*/
async function captureTarget(browser, target, viewports, dirs, options) {
Expand All @@ -145,6 +216,7 @@
retryCount = 2,
timeouts = DEFAULT_TIMEOUTS,
ignoreHTTPSErrors = false,
blockHosts = [],
} = options;
const failures = [];
let currentSlug = target.key;
Expand All @@ -154,14 +226,28 @@
ignoreHTTPSErrors,
deviceScaleFactor: group.deviceScaleFactor,
});

if (blockHosts.length) {
await context.route('**/*', (route) => {
if (isBlockedHost(route.request().url(), blockHosts)) {
return route.abort('blockedbyclient');
}
return route.continue();
});
}

const page = await context.newPage();

const failedResources = new Set();
page.on('requestfailed', (request) => {
// A deliberately blocked host is not a load failure.
if (isBlockedHost(request.url(), blockHosts)) {
return;
}
const type = request.resourceType();
if (type === 'stylesheet' || type === 'script') {
failedResources.add(request.url());
console.warn(`⚠️ Failed to load ${type}: ${request.url()}`);

Check warning on line 250 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (24)

Unexpected console statement. Only these console methods are allowed: error

Check warning on line 250 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (22)

Unexpected console statement. Only these console methods are allowed: error
}
});

Expand All @@ -171,7 +257,7 @@
const slug = `${target.key}-${viewport.name}`;
currentSlug = slug;

console.log(`Capturing ${slug}...`);

Check warning on line 260 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (24)

Unexpected console statement. Only these console methods are allowed: error

Check warning on line 260 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (22)

Unexpected console statement. Only these console methods are allowed: error

await page.setViewportSize({
width: viewport.width,
Expand All @@ -187,7 +273,7 @@
try {
if (i === 0 || !skipReload) {
if (attempts > 0) {
console.log(` Retry attempt ${attempts}...`);

Check warning on line 276 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (24)

Unexpected console statement. Only these console methods are allowed: error

Check warning on line 276 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (22)

Unexpected console statement. Only these console methods are allowed: error
await page.waitForTimeout(1000);
}

Expand Down Expand Up @@ -240,13 +326,24 @@
// Slow/never-idle page; screenshot what we have.
});

const pendingImages = await waitForImages(
page,
timeouts.settle
);
if (pendingImages > 0) {
console.warn(

Check warning on line 334 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (24)

Unexpected console statement. Only these console methods are allowed: error

Check warning on line 334 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (22)

Unexpected console statement. Only these console methods are allowed: error
` ⚠️ ${slug}: ${pendingImages} image(s) were still loading at capture ` +
'— the screenshot may be missing them. Raise "timeouts.settle" if this persists.'
);
}

const imagePath = path.join(dirs.captures, `${slug}.png`);
await page.screenshot({ path: imagePath, fullPage: true });

const htmlPath = path.join(dirs.capturesHtml, `${slug}.html`);
fs.writeFileSync(htmlPath, await page.content());

console.log(`✓ Captured ${slug}`);

Check warning on line 346 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (24)

Unexpected console statement. Only these console methods are allowed: error

Check warning on line 346 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (22)

Unexpected console statement. Only these console methods are allowed: error
}
} catch (error) {
// A failure outside the retry loop (e.g. screenshot or HTML write).
Expand All @@ -270,8 +367,8 @@
* opt-in (so existing best-effort/partial workflows keep working) — see the
* D-004 decision.
*
* @param {Array} failures - The degraded-slug records.
* @param {boolean} [failOnDegraded] - Whether degraded captures fail the run.
* @param {Array} failures The degraded-slug records.
* @param {boolean} [failOnDegraded] Whether degraded captures fail the run.
* @returns {boolean} True when the run should signal failure.
*/
export function shouldFailRun(failures, failOnDegraded = false) {
Expand All @@ -281,13 +378,13 @@
/**
* Capture screenshots for every configured target.
*
* @param {object} config - The normalized config.
* @param {object} [options] - Capture options.
* @param {number} [options.concurrency] - Parallel browser contexts.
* @param {number} [options.staggerDelay] - Delay (ms) between context starts.
* @param {boolean}[options.skipReload] - Reuse the page between viewports.
* @param {boolean}[options.failOnDegraded] - Exit non-zero if any capture is degraded.
* @param {Array} [options.only] - Limit to these target keys.
* @param {object} config The normalized config.
* @param {object} [options] Capture options.
* @param {number} [options.concurrency] Parallel browser contexts.
* @param {number} [options.staggerDelay] Delay (ms) between context starts.
* @param {boolean}[options.skipReload] Reuse the page between viewports.
* @param {boolean}[options.failOnDegraded] Exit non-zero if any capture is degraded.
* @param {Array} [options.only] Limit to these target keys.
* @returns {Promise<{ failures: Array }>} The degraded-slug records.
*/
export async function capture(config, options = {}) {
Expand All @@ -305,18 +402,23 @@

const offDomain = offDomainTargets(targets, config.domain);
if (offDomain.length) {
console.warn(

Check warning on line 405 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (24)

Unexpected console statement. Only these console methods are allowed: error

Check warning on line 405 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (22)

Unexpected console statement. Only these console methods are allowed: error
`⚠️ ${offDomain.length} path(s) point off the configured domain and ` +
`will be captured as-is: ${offDomain.map((target) => target.key).join(', ')}.`
);
}

const totalShots = targets.length * config.viewports.length;
console.log(`Domain: ${config.domain}`);

Check warning on line 412 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (24)

Unexpected console statement. Only these console methods are allowed: error

Check warning on line 412 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (22)

Unexpected console statement. Only these console methods are allowed: error
console.log(`Targets: ${targets.length}`);

Check warning on line 413 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (24)

Unexpected console statement. Only these console methods are allowed: error

Check warning on line 413 in src/capture.mjs

View workflow job for this annotation

GitHub Actions / test (22)

Unexpected console statement. Only these console methods are allowed: error
console.log(`Viewports per target: ${config.viewports.length}`);
console.log(`Total screenshots: ${totalShots}`);
console.log(`Concurrency: ${concurrency} parallel contexts`);
if (config.blockHosts?.length) {
console.log(
`Blocking hosts (and subdomains): ${config.blockHosts.join(', ')}`
);
}

// Ignore TLS certificate errors only for local development hosts (where
// self-signed certs are normal). For a non-local host, validation stays on
Expand Down Expand Up @@ -370,7 +472,12 @@
target,
config.viewports,
config.dirs,
{ skipReload, timeouts: config.timeouts, ignoreHTTPSErrors }
{
skipReload,
timeouts: config.timeouts,
ignoreHTTPSErrors,
blockHosts: config.blockHosts ?? [],
}
);
failures.push(...targetFailures);
}
Expand Down
45 changes: 45 additions & 0 deletions src/config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,50 @@ export function validateViewports(viewports) {
});
}

/**
* Normalize and validate the `blockHosts` config value.
*
* Entries are bare hostnames; each blocks the host itself and all of its
* subdomains during capture (a leading `*.` is accepted and equivalent).
* Anything with a scheme, path, or port is rejected up front — silently
* matching nothing would read as "the block isn't working".
*
* @param {*} blockHosts - The raw config value.
* @returns {string[]} Lowercased hostnames with any `*.` prefix removed.
*/
export function normalizeBlockHosts(blockHosts) {
if (blockHosts === undefined) {
return [];
}

if (!Array.isArray(blockHosts)) {
throw new Error(
'❌ Invalid "blockHosts": expected an array of hostnames.\n' +
'💡 Use entries like ["challenges.cloudflare.com"].'
);
}

return blockHosts.map((entry) => {
if (typeof entry !== 'string' || !entry.trim()) {
throw new Error(
`❌ Invalid "blockHosts" entry ${JSON.stringify(entry)}: expected a non-empty string.\n` +
'💡 Use a bare hostname like "challenges.cloudflare.com".'
);
}

const host = entry.trim().toLowerCase().replace(/^\*\./, '');

if (/[/:\s]/.test(host)) {
throw new Error(
`❌ Invalid "blockHosts" entry ${JSON.stringify(entry)}: expected a bare hostname (no scheme, port, or path).\n` +
'💡 Use "challenges.cloudflare.com", not "https://challenges.cloudflare.com/".'
);
}

return host;
});
}

/**
* Join a domain origin and a path into a full URL.
*
Expand Down Expand Up @@ -295,6 +339,7 @@ export function loadConfig({ configPath = 'reglance.json', domain } = {}) {
viewports,
targets,
pixelmatchOptions,
blockHosts: normalizeBlockHosts(raw.blockHosts),
timeouts: {
...DEFAULT_TIMEOUTS,
...raw.timeouts,
Expand Down
Loading
Loading