@@ -266,69 +266,76 @@ private function __construct(
266266 /*
267267 * > The public identifier starts with…
268268 *
269- * @todo Optimize this matching. It shouldn't be a large overall performance issue,
270- * however, as only a single DOCTYPE declaration token should ever be parsed,
271- * and normative documents will have exited before reaching this condition.
269+ * @todo Maintain the list of legacy public-identifier prefixes in a single
270+ * location and consider a more efficient matching strategy (for example
271+ * a trie or compiled regex) only if profiling shows this to be a real
272+ * hotspot. In normal usage this code is rarely reached because only a
273+ * single DOCTYPE token is parsed and the common HTML5 DOCTYPE short-
274+ * circuits earlier checks.
272275 */
273- if (
274- str_starts_with ( $ public_identifier , '+//silmaril//dtd html pro v0r11 19970101// ' ) ||
275- str_starts_with ( $ public_identifier , '-//as//dtd html 3.0 aswedit + extensions// ' ) ||
276- str_starts_with ( $ public_identifier , '-//advasoft ltd//dtd html 3.0 aswedit + extensions// ' ) ||
277- str_starts_with ( $ public_identifier , '-//ietf//dtd html 2.0 level 1// ' ) ||
278- str_starts_with ( $ public_identifier , '-//ietf//dtd html 2.0 level 2// ' ) ||
279- str_starts_with ( $ public_identifier , '-//ietf//dtd html 2.0 strict level 1// ' ) ||
280- str_starts_with ( $ public_identifier , '-//ietf//dtd html 2.0 strict level 2// ' ) ||
281- str_starts_with ( $ public_identifier , '-//ietf//dtd html 2.0 strict// ' ) ||
282- str_starts_with ( $ public_identifier , '-//ietf//dtd html 2.0// ' ) ||
283- str_starts_with ( $ public_identifier , '-//ietf//dtd html 2.1e// ' ) ||
284- str_starts_with ( $ public_identifier , '-//ietf//dtd html 3.0// ' ) ||
285- str_starts_with ( $ public_identifier , '-//ietf//dtd html 3.2 final// ' ) ||
286- str_starts_with ( $ public_identifier , '-//ietf//dtd html 3.2// ' ) ||
287- str_starts_with ( $ public_identifier , '-//ietf//dtd html 3// ' ) ||
288- str_starts_with ( $ public_identifier , '-//ietf//dtd html level 0// ' ) ||
289- str_starts_with ( $ public_identifier , '-//ietf//dtd html level 1// ' ) ||
290- str_starts_with ( $ public_identifier , '-//ietf//dtd html level 2// ' ) ||
291- str_starts_with ( $ public_identifier , '-//ietf//dtd html level 3// ' ) ||
292- str_starts_with ( $ public_identifier , '-//ietf//dtd html strict level 0// ' ) ||
293- str_starts_with ( $ public_identifier , '-//ietf//dtd html strict level 1// ' ) ||
294- str_starts_with ( $ public_identifier , '-//ietf//dtd html strict level 2// ' ) ||
295- str_starts_with ( $ public_identifier , '-//ietf//dtd html strict level 3// ' ) ||
296- str_starts_with ( $ public_identifier , '-//ietf//dtd html strict// ' ) ||
297- str_starts_with ( $ public_identifier , '-//ietf//dtd html// ' ) ||
298- str_starts_with ( $ public_identifier , '-//metrius//dtd metrius presentational// ' ) ||
299- str_starts_with ( $ public_identifier , '-//microsoft//dtd internet explorer 2.0 html strict// ' ) ||
300- str_starts_with ( $ public_identifier , '-//microsoft//dtd internet explorer 2.0 html// ' ) ||
301- str_starts_with ( $ public_identifier , '-//microsoft//dtd internet explorer 2.0 tables// ' ) ||
302- str_starts_with ( $ public_identifier , '-//microsoft//dtd internet explorer 3.0 html strict// ' ) ||
303- str_starts_with ( $ public_identifier , '-//microsoft//dtd internet explorer 3.0 html// ' ) ||
304- str_starts_with ( $ public_identifier , '-//microsoft//dtd internet explorer 3.0 tables// ' ) ||
305- str_starts_with ( $ public_identifier , '-//netscape comm. corp.//dtd html// ' ) ||
306- str_starts_with ( $ public_identifier , '-//netscape comm. corp.//dtd strict html// ' ) ||
307- str_starts_with ( $ public_identifier , "-//o'reilly and associates//dtd html 2.0// " ) ||
308- str_starts_with ( $ public_identifier , "-//o'reilly and associates//dtd html extended 1.0// " ) ||
309- str_starts_with ( $ public_identifier , "-//o'reilly and associates//dtd html extended relaxed 1.0// " ) ||
310- str_starts_with ( $ public_identifier , '-//sq//dtd html 2.0 hotmetal + extensions// ' ) ||
311- str_starts_with ( $ public_identifier , '-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0// ' ) ||
312- str_starts_with ( $ public_identifier , '-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0// ' ) ||
313- str_starts_with ( $ public_identifier , '-//spyglass//dtd html 2.0 extended// ' ) ||
314- str_starts_with ( $ public_identifier , '-//sun microsystems corp.//dtd hotjava html// ' ) ||
315- str_starts_with ( $ public_identifier , '-//sun microsystems corp.//dtd hotjava strict html// ' ) ||
316- str_starts_with ( $ public_identifier , '-//w3c//dtd html 3 1995-03-24// ' ) ||
317- str_starts_with ( $ public_identifier , '-//w3c//dtd html 3.2 draft// ' ) ||
318- str_starts_with ( $ public_identifier , '-//w3c//dtd html 3.2 final// ' ) ||
319- str_starts_with ( $ public_identifier , '-//w3c//dtd html 3.2// ' ) ||
320- str_starts_with ( $ public_identifier , '-//w3c//dtd html 3.2s draft// ' ) ||
321- str_starts_with ( $ public_identifier , '-//w3c//dtd html 4.0 frameset// ' ) ||
322- str_starts_with ( $ public_identifier , '-//w3c//dtd html 4.0 transitional// ' ) ||
323- str_starts_with ( $ public_identifier , '-//w3c//dtd html experimental 19960712// ' ) ||
324- str_starts_with ( $ public_identifier , '-//w3c//dtd html experimental 970421// ' ) ||
325- str_starts_with ( $ public_identifier , '-//w3c//dtd w3 html// ' ) ||
326- str_starts_with ( $ public_identifier , '-//w3o//dtd w3 html 3.0// ' ) ||
327- str_starts_with ( $ public_identifier , '-//webtechs//dtd mozilla html 2.0// ' ) ||
328- str_starts_with ( $ public_identifier , '-//webtechs//dtd mozilla html// ' )
329- ) {
330- $ this ->indicated_compatibility_mode = 'quirks ' ;
331- return ;
276+ $ quirks_prefixes = array (
277+ '+//silmaril//dtd html pro v0r11 19970101// ' ,
278+ '-//as//dtd html 3.0 aswedit + extensions// ' ,
279+ '-//advasoft ltd//dtd html 3.0 aswedit + extensions// ' ,
280+ '-//ietf//dtd html 2.0 level 1// ' ,
281+ '-//ietf//dtd html 2.0 level 2// ' ,
282+ '-//ietf//dtd html 2.0 strict level 1// ' ,
283+ '-//ietf//dtd html 2.0 strict level 2// ' ,
284+ '-//ietf//dtd html 2.0 strict// ' ,
285+ '-//ietf//dtd html 2.0// ' ,
286+ '-//ietf//dtd html 2.1e// ' ,
287+ '-//ietf//dtd html 3.0// ' ,
288+ '-//ietf//dtd html 3.2 final// ' ,
289+ '-//ietf//dtd html 3.2// ' ,
290+ '-//ietf//dtd html 3// ' ,
291+ '-//ietf//dtd html level 0// ' ,
292+ '-//ietf//dtd html level 1// ' ,
293+ '-//ietf//dtd html level 2// ' ,
294+ '-//ietf//dtd html level 3// ' ,
295+ '-//ietf//dtd html strict level 0// ' ,
296+ '-//ietf//dtd html strict level 1// ' ,
297+ '-//ietf//dtd html strict level 2// ' ,
298+ '-//ietf//dtd html strict level 3// ' ,
299+ '-//ietf//dtd html strict// ' ,
300+ '-//ietf//dtd html// ' ,
301+ '-//metrius//dtd metrius presentational// ' ,
302+ '-//microsoft//dtd internet explorer 2.0 html strict// ' ,
303+ '-//microsoft//dtd internet explorer 2.0 html// ' ,
304+ '-//microsoft//dtd internet explorer 2.0 tables// ' ,
305+ '-//microsoft//dtd internet explorer 3.0 html strict// ' ,
306+ '-//microsoft//dtd internet explorer 3.0 html// ' ,
307+ '-//microsoft//dtd internet explorer 3.0 tables// ' ,
308+ '-//netscape comm. corp.//dtd html// ' ,
309+ '-//netscape comm. corp.//dtd strict html// ' ,
310+ "-//o'reilly and associates//dtd html 2.0// " ,
311+ "-//o'reilly and associates//dtd html extended 1.0// " ,
312+ "-//o'reilly and associates//dtd html extended relaxed 1.0// " ,
313+ '-//sq//dtd html 2.0 hotmetal + extensions// ' ,
314+ '-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0// ' ,
315+ '-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0// ' ,
316+ '-//spyglass//dtd html 2.0 extended// ' ,
317+ '-//sun microsystems corp.//dtd hotjava html// ' ,
318+ '-//sun microsystems corp.//dtd hotjava strict html// ' ,
319+ '-//w3c//dtd html 3 1995-03-24// ' ,
320+ '-//w3c//dtd html 3.2 draft// ' ,
321+ '-//w3c//dtd html 3.2 final// ' ,
322+ '-//w3c//dtd html 3.2// ' ,
323+ '-//w3c//dtd html 3.2s draft// ' ,
324+ '-//w3c//dtd html 4.0 frameset// ' ,
325+ '-//w3c//dtd html 4.0 transitional// ' ,
326+ '-//w3c//dtd html experimental 19960712// ' ,
327+ '-//w3c//dtd html experimental 970421// ' ,
328+ '-//w3c//dtd w3 html// ' ,
329+ '-//w3o//dtd w3 html 3.0// ' ,
330+ '-//webtechs//dtd mozilla html 2.0// ' ,
331+ '-//webtechs//dtd mozilla html// ' ,
332+ );
333+
334+ foreach ( $ quirks_prefixes as $ prefix ) {
335+ if ( str_starts_with ( $ public_identifier , $ prefix ) ) {
336+ $ this ->indicated_compatibility_mode = 'quirks ' ;
337+ return ;
338+ }
332339 }
333340
334341 /*
0 commit comments