Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 80 additions & 3 deletions lib/NodeUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ var hasRawContent = {
PLAINTEXT: true
};

var hasRawContentFallback = {
// Text in these fallback raw-content elements is inert for browser parsing,
// but downstream SSR post-processing may reparse it without raw-text state.
IFRAME: true,
NOEMBED: true,
NOSCRIPT: true,
NOFRAMES: true
};

var emptyElements = {
area: true,
base: true,
Expand Down Expand Up @@ -144,11 +153,77 @@ function escapeMatchingClosingTag(rawText, parentTag) {
// would otherwise shift the replacement and leave a real `</tag>`
// break-out in the output.
return rawText.replace(
new RegExp(parentClosingTag, 'ig'),
new RegExp(escapeRegExp(parentClosingTag), 'ig'),
(m) => '&lt;' + m.slice(1)
);
}

function escapeRegExp(text) {
return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

function escapeFallbackRawText(rawText, parentTag) {
var result = '';
var index = 0;

while (index < rawText.length) {
var commentStart = rawText.indexOf('<!--', index);
if (commentStart === -1) {
result += escape(rawText.slice(index));
break;
}

result += escape(rawText.slice(index, commentStart));

var commentEnd = findCommentEnd(rawText, commentStart + 4);
if (commentEnd === -1) {
result += escapeFallbackRawTextComment(rawText.slice(commentStart), parentTag);
break;
}

// A complete HTML comment remains inert if downstream tooling reparses
// fallback raw text as normal HTML, so preserve its comment semantics.
result += escapeFallbackRawTextComment(rawText.slice(commentStart, commentEnd), parentTag);
index = commentEnd;
}

return result;
}

function escapeFallbackRawTextComment(rawText, parentTag) {
const parentClosingTag = '</' + parentTag;
var lowerRawText = rawText.toLowerCase();
var result = '';
var index = 0;

while (index < rawText.length) {
var matchIndex = lowerRawText.indexOf(parentClosingTag, index);
if (matchIndex === -1) {
result += rawText.slice(index);
break;
}

result += rawText.slice(index, matchIndex);

var matchEnd = matchIndex + parentClosingTag.length;
var match = rawText.slice(matchIndex, matchEnd);
result += '&lt;' + match.slice(1);
index = matchEnd;
}

return result;
}

function findCommentEnd(rawText, index) {
if (rawText.charAt(index) === '>')
return index + 1;
if (rawText.charAt(index) === '-' && rawText.charAt(index + 1) === '>')
return index + 2;

var match = /--!?>/.exec(rawText.slice(index));
return match ? index + match.index + match[0].length : -1;
}

const CLOSING_COMMENT_REGEXP = /--!?>/;

/**
Expand Down Expand Up @@ -196,7 +271,7 @@ function serializeOne(kid, parent) {
// If an element can have raw content, this content may
// potentially require escaping to avoid XSS.
var upperTag = tagname.toUpperCase();
if (hasRawContent[upperTag]) {
if (hasRawContent[upperTag] && !hasRawContentFallback[upperTag]) {
ss = escapeMatchingClosingTag(ss, tagname);
}
if (html && extraNewLine[tagname] && ss.charAt(0)==='\n') s += '\n';
Expand All @@ -215,7 +290,9 @@ function serializeOne(kid, parent) {
parenttag = '';

if (hasRawContent[parenttag]) {
s += kid.data;
// Preserve actual child element markup in fallback elements such as
// <noscript>, but do not emit text-node payloads as raw HTML.
s += hasRawContentFallback[parenttag] ? escapeFallbackRawText(kid.data, parent.localName) : kid.data;
} else {
s += escape(kid.data);
}
Expand Down
36 changes: 18 additions & 18 deletions test/html5lib-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -14216,8 +14216,8 @@
]
}
],
"html": "<html><head><noscript><noframes>XXX</noscript></noframes></noscript></head><body></body></html>",
"noQuirksBodyHtml": "<noscript><noframes>XXX</noscript></noframes></noscript>"
"html": "<html><head><noscript><noframes>XXX&lt;/noscript&gt;</noframes></noscript></head><body></body></html>",
"noQuirksBodyHtml": "<noscript><noframes>XXX&lt;/noscript&gt;</noframes></noscript>"
}
},
{
Expand Down Expand Up @@ -39310,8 +39310,8 @@
]
}
],
"html": "<html><head></head><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes></noframes></frameset></html>",
"noQuirksBodyHtml": "<noframes></frameset><noframes></noframes>"
"html": "<html><head></head><frameset><frame><frameset><frame></frameset><noframes>&lt;/frameset&gt;&lt;noframes&gt;</noframes></frameset></html>",
"noQuirksBodyHtml": "<noframes>&lt;/frameset&gt;&lt;noframes&gt;</noframes>"
}
},
{
Expand Down Expand Up @@ -43467,7 +43467,7 @@
]
}
],
"html": "<!DOCTYPE html><html><head><noscript><!--</noscript></head><body>X<noscript>--></noscript></body></html>",
"html": "<!DOCTYPE html><html><head><noscript><!--</noscript></head><body>X<noscript>--&gt;</noscript></body></html>",
"noQuirksBodyHtml": "<noscript><!--</noscript>X<noscript>--></noscript>"
}
},
Expand Down Expand Up @@ -43563,8 +43563,8 @@
]
}
],
"html": "<!DOCTYPE html><html><head><noscript><iframe></noscript></head><body>X</body></html>",
"noQuirksBodyHtml": "<noscript><iframe></noscript>X</iframe></noscript>"
"html": "<!DOCTYPE html><html><head><noscript>&lt;iframe&gt;</noscript></head><body>X</body></html>",
"noQuirksBodyHtml": "<noscript><iframe>&lt;/noscript&gt;X</iframe></noscript>"
}
},
{
Expand Down Expand Up @@ -43618,8 +43618,8 @@
]
}
],
"html": "<!DOCTYPE html><html><head><noscript></noscript></head><body><iframe></noscript>X</iframe></body></html>",
"noQuirksBodyHtml": "<noscript><iframe></noscript>X</iframe></noscript>"
"html": "<!DOCTYPE html><html><head><noscript></noscript></head><body><iframe>&lt;/noscript&gt;X</iframe></body></html>",
"noQuirksBodyHtml": "<noscript><iframe>&lt;/noscript&gt;X</iframe></noscript>"
}
},
{
Expand Down Expand Up @@ -43717,8 +43717,8 @@
]
}
],
"html": "<!DOCTYPE html><html><head><noframes><body><script><!--...</script></body></noframes></head><body></body></html>",
"noQuirksBodyHtml": "<noframes><body><script><!--...</script></body></noframes>"
"html": "<!DOCTYPE html><html><head><noframes>&lt;body&gt;&lt;script&gt;<!--...</script></body></noframes></head><body></body></html>",
"noQuirksBodyHtml": "<noframes>&lt;body&gt;&lt;script&gt;<!--...</script></body></noframes>"
}
},
{
Expand Down Expand Up @@ -47887,7 +47887,7 @@
]
}
],
"html": "<html><head><noscript><!--</noscript></head><body>X<noscript>--></noscript></body></html>",
"html": "<html><head><noscript><!--</noscript></head><body>X<noscript>--&gt;</noscript></body></html>",
"noQuirksBodyHtml": "<noscript><!--</noscript>X<noscript>--></noscript>"
}
},
Expand Down Expand Up @@ -47979,8 +47979,8 @@
]
}
],
"html": "<html><head><noscript><iframe></noscript></head><body>X</body></html>",
"noQuirksBodyHtml": "<noscript><iframe></noscript>X</iframe></noscript>"
"html": "<html><head><noscript>&lt;iframe&gt;</noscript></head><body>X</body></html>",
"noQuirksBodyHtml": "<noscript><iframe>&lt;/noscript&gt;X</iframe></noscript>"
}
},
{
Expand Down Expand Up @@ -48031,8 +48031,8 @@
]
}
],
"html": "<html><head><noscript></noscript></head><body><iframe></noscript>X</iframe></body></html>",
"noQuirksBodyHtml": "<noscript><iframe></noscript>X</iframe></noscript>"
"html": "<html><head><noscript></noscript></head><body><iframe>&lt;/noscript&gt;X</iframe></body></html>",
"noQuirksBodyHtml": "<noscript><iframe>&lt;/noscript&gt;X</iframe></noscript>"
}
},
{
Expand Down Expand Up @@ -48125,8 +48125,8 @@
]
}
],
"html": "<html><head><noframes><body><script><!--...</script></body></noframes></head><body></body></html>",
"noQuirksBodyHtml": "<noframes><body><script><!--...</script></body></noframes>"
"html": "<html><head><noframes>&lt;body&gt;&lt;script&gt;<!--...</script></body></noframes></head><body></body></html>",
"noQuirksBodyHtml": "<noframes>&lt;body&gt;&lt;script&gt;<!--...</script></body></noframes>"
}
},
{
Expand Down
96 changes: 95 additions & 1 deletion test/tools/update-html5lib-tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ var NO_ESCAPE = {
noframes:true, plaintext:true,
noscript: true // <- assumes that scripting is enabled.
};
var NO_ESCAPE_FALLBACK = {
iframe: true,
noembed: true,
noframes: true,
noscript: true
};

var localname = function(namestring) {
return namestring.replace(/^(svg|math|xlink|xml|xmlns) /, '');
Expand Down Expand Up @@ -117,6 +123,7 @@ var serialize_doc = function(filename, fragment, doc) {
var clear_add_attr = function() {
if (can_add_attr) {
result += '>';
stack_top().contentStart = result.length;
can_add_attr = false;
}
};
Expand All @@ -130,10 +137,18 @@ var serialize_doc = function(filename, fragment, doc) {
filename, doc);
}
} else {
if (old.ns === namespace('html') &&
NO_ESCAPE[old.tag] &&
!NO_ESCAPE_FALLBACK[old.tag] &&
old.contentStart !== undefined) {
result = result.slice(0, old.contentStart) +
escapeMatchingClosingTag(result.slice(old.contentStart), old.tag);
}
result += '</' + old.tag + '>';
}
}
// save some space in the JSON output by omitting empty lists
old.contentStart = undefined;
if (old.children.length===0) { old.children = undefined; }
if (old.attrs && old.attrs.length===0) { old.attrs = undefined; }
return old;
Expand Down Expand Up @@ -161,6 +176,76 @@ var serialize_doc = function(filename, fragment, doc) {
}
});
};
var escapeRegExp = function(s) {
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
};
var escapeMatchingClosingTag = function(s, parentTag) {
var parentClosingTag = '</' + parentTag;
if (!s.toLowerCase().includes(parentClosingTag)) {
return s;
}
return s.replace(
new RegExp(escapeRegExp(parentClosingTag), 'ig'),
function(m) { return '&lt;' + m.slice(1); });
};
var findCommentEnd = function(s, index) {
if (s.charAt(index) === '>') {
return index + 1;
}
if (s.charAt(index) === '-' && s.charAt(index + 1) === '>') {
return index + 2;
}

var match = /--!?>/.exec(s.slice(index));
return match ? index + match.index + match[0].length : -1;
};
var escapeFallbackRawTextComment = function(s, parentTag) {
var parentClosingTag = '</' + parentTag;
var lower = s.toLowerCase();
var result = '';
var index = 0;

while (index < s.length) {
var matchIndex = lower.indexOf(parentClosingTag, index);
if (matchIndex === -1) {
result += s.slice(index);
break;
}

result += s.slice(index, matchIndex);
var matchEnd = matchIndex + parentClosingTag.length;
var match = s.slice(matchIndex, matchEnd);
result += '&lt;' + match.slice(1);
index = matchEnd;
}

return result;
};
var escapeFallbackRawText = function(s, parentTag) {
var result = '';
var index = 0;

while (index < s.length) {
var commentStart = s.indexOf('<!--', index);
if (commentStart === -1) {
result += escape(s.slice(index));
break;
}

result += escape(s.slice(index, commentStart));

var commentEnd = findCommentEnd(s, commentStart + 4);
if (commentEnd === -1) {
result += escapeMatchingClosingTag(s.slice(commentStart), parentTag);
break;
}

result += escapeMatchingClosingTag(s.slice(commentStart, commentEnd), parentTag);
index = commentEnd;
}

return result;
};

while (doc.length > 0) {
var m = /^\| ((?: )*)(?:<([^!?>][^>]*)>|([^="\n][^=\n]*)="([^"]*)"|"((?:[^"]|"(?!\n))*)"|<!-- ((?:[^](?!-->))*) -->|<!DOCTYPE ([^>]*)>|<\?([^>]+)>|(content))\n/.exec(doc);
Expand Down Expand Up @@ -246,7 +331,9 @@ var serialize_doc = function(filename, fragment, doc) {
if (text !== escape(text) && !obj.no_escape) {
obj.escaped = props.escaped = true;
}
result += obj.no_escape ? text : escape(text);
result += obj.no_escape
? (NO_ESCAPE_FALLBACK[stack_top().tag] ? escapeFallbackRawText(text, stack_top().tag) : text)
: escape(text);
stack_top().children.push(obj);
continue;
}
Expand Down Expand Up @@ -331,6 +418,13 @@ var twiddle_test = function(filename, tc) {
/<rdar: 6869687="" problem="">/.test(expected)) {
expected = expected.replace(/(6869687=[^> ]+) (problem=[^> ]+)/g, '$2 $1');
}
if (filename==='webkit02' &&
tc.script==='on' &&
/<p id="status"><noscript><strong>A<\/strong><\/noscript><span>B<\/span><\/p>/.test(tc.data)) {
expected = expected.replace(
'<noscript>&lt;strong&gt;A&lt;/strong&gt;</noscript>',
'<noscript><strong>A</strong></noscript>');
}
tc.document.html = expected;
// Will this pass if parsed as a <body> fragment in no-quirks mode?
// This property is used by some third-party consumers of the parsed
Expand Down
Loading