From fd36729e0679e08f167965ceec7c2a336f5ae211 Mon Sep 17 00:00:00 2001 From: Trevor Burnham Date: Fri, 6 Mar 2026 14:12:04 -0500 Subject: [PATCH] fix: sync sourceCodeLocation.attrs keys with adjusted SVG/MathML/XML attribute names When parsing foreign content (SVG, MathML), attribute names are adjusted from lowercase to their spec-correct casing (e.g. viewbox -> viewBox). The location info keys in token.location.attrs were not updated to match, causing a mismatch where attrs[i].name and Object.keys(location.attrs) would differ. Fixes #318 --- packages/parse5/lib/common/foreign-content.ts | 10 ++++ .../lib/parser/parser-location-info.test.ts | 47 +++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/packages/parse5/lib/common/foreign-content.ts b/packages/parse5/lib/common/foreign-content.ts index 464e65c7e..b721e2d65 100644 --- a/packages/parse5/lib/common/foreign-content.ts +++ b/packages/parse5/lib/common/foreign-content.ts @@ -188,9 +188,17 @@ export function causesExit(startTagToken: TagToken): boolean { } //Token adjustments +function adjustTokenAttrLocation(token: TagToken, oldName: string, newName: string): void { + if (token.location?.attrs) { + token.location.attrs[newName] = token.location.attrs[oldName]; + delete token.location.attrs[oldName]; + } +} + export function adjustTokenMathMLAttrs(token: TagToken): void { for (let i = 0; i < token.attrs.length; i++) { if (token.attrs[i].name === DEFINITION_URL_ATTR) { + adjustTokenAttrLocation(token, DEFINITION_URL_ATTR, ADJUSTED_DEFINITION_URL_ATTR); token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR; break; } @@ -202,6 +210,7 @@ export function adjustTokenSVGAttrs(token: TagToken): void { const adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP.get(token.attrs[i].name); if (adjustedAttrName != null) { + adjustTokenAttrLocation(token, token.attrs[i].name, adjustedAttrName); token.attrs[i].name = adjustedAttrName; } } @@ -212,6 +221,7 @@ export function adjustTokenXMLAttrs(token: TagToken): void { const adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP.get(token.attrs[i].name); if (adjustedAttrEntry) { + adjustTokenAttrLocation(token, token.attrs[i].name, adjustedAttrEntry.name); token.attrs[i].prefix = adjustedAttrEntry.prefix; token.attrs[i].name = adjustedAttrEntry.name; token.attrs[i].namespace = adjustedAttrEntry.namespace; diff --git a/packages/parse5/lib/parser/parser-location-info.test.ts b/packages/parse5/lib/parser/parser-location-info.test.ts index 0fdb15c49..f013da787 100644 --- a/packages/parse5/lib/parser/parser-location-info.test.ts +++ b/packages/parse5/lib/parser/parser-location-info.test.ts @@ -156,6 +156,53 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { ); }); + it('Regression - SVG attribute location keys should match adjusted camelCase names (GH-318)', () => { + const html = ''; + + const opts = { + treeAdapter, + sourceCodeLocationInfo: true, + }; + + const fragment = parseFragment(html, opts); + const svg = treeAdapter.getChildNodes(fragment)[0]; + const location = treeAdapter.getNodeSourceCodeLocation(svg); + const attrs = treeAdapter.getAttrList(svg); + + assert.ok(location?.attrs); + + // The attrs array should have the camelCase name + const viewBoxAttr = attrs.find((a) => a.name === 'viewBox'); + assert.ok(viewBoxAttr, 'attrs should contain viewBox with camelCase name'); + + // The location attrs should use the same camelCase key + assert.ok(location.attrs['viewBox'], 'location.attrs should have camelCase key "viewBox"'); + assert.ok(!location.attrs['viewbox'], 'location.attrs should not have lowercase key "viewbox"'); + }); + + it('Regression - MathML definitionURL attribute location key should match adjusted name (GH-318)', () => { + const html = ''; + + const opts = { + treeAdapter, + sourceCodeLocationInfo: true, + }; + + const fragment = parseFragment(html, opts); + const math = treeAdapter.getChildNodes(fragment)[0]; + const mprescripts = treeAdapter.getChildNodes(math)[0]; + const location = treeAdapter.getNodeSourceCodeLocation(mprescripts); + const attrs = treeAdapter.getAttrList(mprescripts); + + assert.ok(location?.attrs); + + const attr = attrs.find((a) => a.name === 'definitionURL'); + assert.ok(attr, 'attrs should contain definitionURL'); + + assert.ok(location.attrs['definitionURL'], 'location.attrs should have "definitionURL"'); + assert.ok(!location.attrs['definitionurl'], 'location.attrs should not have "definitionurl"'); + }); + it('Regression - Escaped script content has incorrect location info (GH-265)', () => { const html = '';