From f56a381a83f42271082316fa2beea96f66cffa6c Mon Sep 17 00:00:00 2001 From: SkyZeroZx <73321943+SkyZeroZx@users.noreply.github.com> Date: Tue, 23 Jun 2026 12:19:56 -0500 Subject: [PATCH] fix: escape fallback raw-content text nodes Escape text nodes in iframe, noembed, noscript, and noframes during HTML serialization instead of emitting attacker-controlled bytes as raw markup. These fallback raw-content elements are inert when parsed directly by a browser, but SSR post-processing can reparse serialized HTML without preserving raw-content parser state. Escaping their text nodes prevents ancestor-closing payloads from being emitted as executable markup while preserving raw serialization for script, style, xmp, and plaintext. --- lib/NodeUtils.js | 83 +++++++++++++- test/html5lib-tests.json | 36 +++--- test/tools/update-html5lib-tests.js | 96 +++++++++++++++- test/xss.js | 170 +++++++++++++++++++++++++++- 4 files changed, 362 insertions(+), 23 deletions(-) diff --git a/lib/NodeUtils.js b/lib/NodeUtils.js index d7332e2..6a69110 100644 --- a/lib/NodeUtils.js +++ b/lib/NodeUtils.js @@ -34,6 +34,15 @@ var hasRawContent = { PLAINTEXT: true }; +var hasRawContentFallback = { + // Text in these fallback raw-content elements is inert for browser parsing, + // but downstream SSR post-processing may reparse it without raw-text state. + IFRAME: true, + NOEMBED: true, + NOSCRIPT: true, + NOFRAMES: true +}; + var emptyElements = { area: true, base: true, @@ -144,11 +153,77 @@ function escapeMatchingClosingTag(rawText, parentTag) { // would otherwise shift the replacement and leave a real `` // break-out in the output. return rawText.replace( - new RegExp(parentClosingTag, 'ig'), + new RegExp(escapeRegExp(parentClosingTag), 'ig'), (m) => '<' + m.slice(1) ); } +function escapeRegExp(text) { + return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function escapeFallbackRawText(rawText, parentTag) { + var result = ''; + var index = 0; + + while (index < rawText.length) { + var commentStart = rawText.indexOf('", + "html": "" } }, @@ -43563,8 +43563,8 @@ ] } ], - "html": "" + "html": "X", + "noQuirksBodyHtml": "" } }, { @@ -43618,8 +43618,8 @@ ] } ], - "html": "", - "noQuirksBodyHtml": "" + "html": "", + "noQuirksBodyHtml": "" } }, { @@ -43717,8 +43717,8 @@ ] } ], - "html": "<body><script><!--...</script></body>", - "noQuirksBodyHtml": "<body><script><!--...</script></body>" + "html": "&lt;body&gt;&lt;script&gt;<!--...</script></body>", + "noQuirksBodyHtml": "&lt;body&gt;&lt;script&gt;<!--...</script></body>" } }, { @@ -47887,7 +47887,7 @@ ] } ], - "html": "", + "html": "" } }, @@ -47979,8 +47979,8 @@ ] } ], - "html": "" + "html": "X", + "noQuirksBodyHtml": "" } }, { @@ -48031,8 +48031,8 @@ ] } ], - "html": "", - "noQuirksBodyHtml": "" + "html": "", + "noQuirksBodyHtml": "" } }, { @@ -48125,8 +48125,8 @@ ] } ], - "html": "<body><script><!--...</script></body>", - "noQuirksBodyHtml": "<body><script><!--...</script></body>" + "html": "&lt;body&gt;&lt;script&gt;<!--...</script></body>", + "noQuirksBodyHtml": "&lt;body&gt;&lt;script&gt;<!--...</script></body>" } }, { diff --git a/test/tools/update-html5lib-tests.js b/test/tools/update-html5lib-tests.js index 2bc5a3b..4a8daf9 100755 --- a/test/tools/update-html5lib-tests.js +++ b/test/tools/update-html5lib-tests.js @@ -51,6 +51,12 @@ var NO_ESCAPE = { noframes:true, plaintext:true, noscript: true // <- assumes that scripting is enabled. }; +var NO_ESCAPE_FALLBACK = { + iframe: true, + noembed: true, + noframes: true, + noscript: true +}; var localname = function(namestring) { return namestring.replace(/^(svg|math|xlink|xml|xmlns) /, ''); @@ -117,6 +123,7 @@ var serialize_doc = function(filename, fragment, doc) { var clear_add_attr = function() { if (can_add_attr) { result += '>'; + stack_top().contentStart = result.length; can_add_attr = false; } }; @@ -130,10 +137,18 @@ var serialize_doc = function(filename, fragment, doc) { filename, doc); } } else { + if (old.ns === namespace('html') && + NO_ESCAPE[old.tag] && + !NO_ESCAPE_FALLBACK[old.tag] && + old.contentStart !== undefined) { + result = result.slice(0, old.contentStart) + + escapeMatchingClosingTag(result.slice(old.contentStart), old.tag); + } result += ''; } } // save some space in the JSON output by omitting empty lists + old.contentStart = undefined; if (old.children.length===0) { old.children = undefined; } if (old.attrs && old.attrs.length===0) { old.attrs = undefined; } return old; @@ -161,6 +176,76 @@ var serialize_doc = function(filename, fragment, doc) { } }); }; + var escapeRegExp = function(s) { + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + }; + var escapeMatchingClosingTag = function(s, parentTag) { + var parentClosingTag = '') { + return index + 1; + } + if (s.charAt(index) === '-' && s.charAt(index + 1) === '>') { + return index + 2; + } + + var match = /--!?>/.exec(s.slice(index)); + return match ? index + match.index + match[0].length : -1; + }; + var escapeFallbackRawTextComment = function(s, parentTag) { + var parentClosingTag = ' 0) { var m = /^\| ((?: )*)(?:<([^!?>][^>]*)>|([^="\n][^=\n]*)="([^"]*)"|"((?:[^"]|"(?!\n))*)"|))*) -->|]*)>|<\?([^>]+)>|(content))\n/.exec(doc); @@ -246,7 +331,9 @@ var serialize_doc = function(filename, fragment, doc) { if (text !== escape(text) && !obj.no_escape) { obj.escaped = props.escaped = true; } - result += obj.no_escape ? text : escape(text); + result += obj.no_escape + ? (NO_ESCAPE_FALLBACK[stack_top().tag] ? escapeFallbackRawText(text, stack_top().tag) : text) + : escape(text); stack_top().children.push(obj); continue; } @@ -331,6 +418,13 @@ var twiddle_test = function(filename, tc) { //.test(expected)) { expected = expected.replace(/(6869687=[^> ]+) (problem=[^> ]+)/g, '$2 $1'); } + if (filename==='webkit02' && + tc.script==='on' && + /

'; + section.appendChild(iframe); + document.body.appendChild(section); + + document.body + .serialize() + .should.equal( + '
', + ); + + const reparsed = domino.createDocument('' + iframe.serialize() + '').body.innerHTML; + reparsed.should.not.containEql('
'; + div.appendChild(noscript); + document.body.appendChild(div); + + document.body + .serialize() + .should.equal( + '
', + ); + + const reparsed = domino.createDocument('' + noscript.serialize() + '').body.innerHTML; + reparsed.should.not.containEql(''; + document.body.appendChild(noscript); + + document.body + .serialize() + .should.equal( + '', + ); + + const reparsedDocument = domino.createDocument('' + noscript.serialize() + ''); + reparsedDocument.getElementsByTagName('script').length.should.equal(0); + + const html = reparsedDocument.serialize(); + return alertFired(html).should.eventually.be.false('alert fired after normal HTML reparse for: ' + html); +}; + +exports.fallbackRawTextEscapesMarkupAfterAbruptCommentClose = async function () { + const cases = [ + { + tagName: 'iframe', + payload: '', + expected: + '', + }, + { + tagName: 'noscript', + payload: '', + expected: + '', + }, + { + tagName: 'iframe', + payload: '
', + expected: + '', + }, + { + tagName: 'noscript', + payload: '
', + expected: + '', + }, + ]; + + for (const testCase of cases) { + const document = domino.createDocument(''); + const element = document.createElement(testCase.tagName); + element.textContent = testCase.payload; + document.body.appendChild(element); + + const html = document.body.serialize(); + html.should.equal(testCase.expected); + html.should.not.containEql('