101101 *
102102 * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
103103 * - Custom elements: All custom elements are supported. :)
104- * - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH.
105- * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
104+ * - Form elements: BUTTON, DATALIST, FIELDSET, INPUT, LABEL, LEGEND, METER, PROGRESS, SEARCH.
105+ * - Formatting elements: B, BIG, CODE, EM, FONT, I, PRE, SMALL, STRIKE, STRONG, TT, U, WBR .
106106 * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
107107 * - Links: A.
108- * - Lists: DD, DL, DT, LI, OL, LI .
109- * - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
110- * - Paragraph: P.
111- * - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
112- * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION.
108+ * - Lists: DD, DL, DT, LI, OL, UL .
109+ * - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, SOURCE, TRACK , VIDEO.
110+ * - Paragraph: BR, P.
111+ * - Phrasing elements: ABBR, AREA, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
112+ * - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION.
113113 * - Templating elements: SLOT.
114114 * - Text decoration: RUBY.
115- * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER.
115+ * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, LISTING, MULTICOL, NEXTID, PARAM , SPACER.
116116 *
117117 * ### Supported markup
118118 *
@@ -149,17 +149,6 @@ class Gutenberg_HTML_Processor_6_5 extends Gutenberg_HTML_Tag_Processor_6_5 {
149149 */
150150 const MAX_BOOKMARKS = 100 ;
151151
152- /**
153- * Static query for instructing the Tag Processor to visit every token.
154- *
155- * @access private
156- *
157- * @since 6.4.0
158- *
159- * @var array
160- */
161- const VISIT_EVERYTHING = array ( 'tag_closers ' => 'visit ' );
162-
163152 /**
164153 * Holds the working state of the parser, including the stack of
165154 * open elements and the stack of active formatting elements.
@@ -424,6 +413,30 @@ public function next_tag( $query = null ) {
424413 return false ;
425414 }
426415
416+ /**
417+ * Ensures internal accounting is maintained for HTML semantic rules while
418+ * the underlying Tag Processor class is seeking to a bookmark.
419+ *
420+ * This doesn't currently have a way to represent non-tags and doesn't process
421+ * semantic rules for text nodes. For access to the raw tokens consider using
422+ * WP_HTML_Tag_Processor instead.
423+ *
424+ * @since 6.5.0 Added for internal support; do not use.
425+ *
426+ * @access private
427+ *
428+ * @return bool
429+ */
430+ public function next_token () {
431+ $ found_a_token = parent ::next_token ();
432+
433+ if ( '#tag ' === $ this ->get_token_type () ) {
434+ $ this ->step ( self ::PROCESS_CURRENT_NODE );
435+ }
436+
437+ return $ found_a_token ;
438+ }
439+
427440 /**
428441 * Indicates if the currently-matched tag matches the given breadcrumbs.
429442 *
@@ -500,7 +513,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
500513 return false ;
501514 }
502515
503- if ( self ::PROCESS_NEXT_NODE = == $ node_to_process ) {
516+ if ( self ::REPROCESS_CURRENT_NODE ! == $ node_to_process ) {
504517 /*
505518 * Void elements still hop onto the stack of open elements even though
506519 * there's no corresponding closing tag. This is important for managing
@@ -519,8 +532,12 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
519532 if ( $ top_node && self ::is_void ( $ top_node ->node_name ) ) {
520533 $ this ->state ->stack_of_open_elements ->pop ();
521534 }
535+ }
522536
523- parent ::next_tag ( self ::VISIT_EVERYTHING );
537+ if ( self ::PROCESS_NEXT_NODE === $ node_to_process ) {
538+ while ( parent ::next_token () && '#tag ' !== $ this ->get_token_type () ) {
539+ continue ;
540+ }
524541 }
525542
526543 // Finish stepping when there are no more tokens in the document.
@@ -531,7 +548,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
531548 $ this ->state ->current_token = new WP_HTML_Token (
532549 $ this ->bookmark_tag (),
533550 $ this ->get_tag (),
534- $ this ->is_tag_closer (),
551+ $ this ->has_self_closing_flag (),
535552 $ this ->release_internal_bookmark_on_destruct
536553 );
537554
@@ -684,10 +701,12 @@ private function step_in_body() {
684701 case '-FOOTER ' :
685702 case '-HEADER ' :
686703 case '-HGROUP ' :
704+ case '-LISTING ' :
687705 case '-MAIN ' :
688706 case '-MENU ' :
689707 case '-NAV ' :
690708 case '-OL ' :
709+ case '-PRE ' :
691710 case '-SEARCH ' :
692711 case '-SECTION ' :
693712 case '-SUMMARY ' :
@@ -732,6 +751,18 @@ private function step_in_body() {
732751 $ this ->insert_html_element ( $ this ->state ->current_token );
733752 return true ;
734753
754+ /*
755+ * > A start tag whose tag name is one of: "pre", "listing"
756+ */
757+ case '+PRE ' :
758+ case '+LISTING ' :
759+ if ( $ this ->state ->stack_of_open_elements ->has_p_in_button_scope () ) {
760+ $ this ->close_a_p_element ();
761+ }
762+ $ this ->insert_html_element ( $ this ->state ->current_token );
763+ $ this ->state ->frameset_ok = false ;
764+ return true ;
765+
735766 /*
736767 * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
737768 */
@@ -934,11 +965,64 @@ private function step_in_body() {
934965 $ this ->run_adoption_agency_algorithm ();
935966 return true ;
936967
968+ /*
969+ * > An end tag whose tag name is "br"
970+ * > Parse error. Drop the attributes from the token, and act as described in the next
971+ * > entry; i.e. act as if this was a "br" start tag token with no attributes, rather
972+ * > than the end tag token that it actually is.
973+ */
974+ case '-BR ' :
975+ $ this ->last_error = self ::ERROR_UNSUPPORTED ;
976+ throw new WP_HTML_Unsupported_Exception ( 'Closing BR tags require unimplemented special handling. ' );
977+
937978 /*
938979 * > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
939980 */
981+ case '+AREA ' :
982+ case '+BR ' :
983+ case '+EMBED ' :
940984 case '+IMG ' :
985+ case '+KEYGEN ' :
986+ case '+WBR ' :
941987 $ this ->reconstruct_active_formatting_elements ();
988+ $ this ->insert_html_element ( $ this ->state ->current_token );
989+ $ this ->state ->frameset_ok = false ;
990+ return true ;
991+
992+ /*
993+ * > A start tag whose tag name is "input"
994+ */
995+ case '+INPUT ' :
996+ $ this ->reconstruct_active_formatting_elements ();
997+ $ this ->insert_html_element ( $ this ->state ->current_token );
998+ $ type_attribute = $ this ->get_attribute ( 'type ' );
999+ /*
1000+ * > If the token does not have an attribute with the name "type", or if it does,
1001+ * > but that attribute's value is not an ASCII case-insensitive match for the
1002+ * > string "hidden", then: set the frameset-ok flag to "not ok".
1003+ */
1004+ if ( ! is_string ( $ type_attribute ) || 'hidden ' !== strtolower ( $ type_attribute ) ) {
1005+ $ this ->state ->frameset_ok = false ;
1006+ }
1007+ return true ;
1008+
1009+ /*
1010+ * > A start tag whose tag name is "hr"
1011+ */
1012+ case '+HR ' :
1013+ if ( $ this ->state ->stack_of_open_elements ->has_p_in_button_scope () ) {
1014+ $ this ->close_a_p_element ();
1015+ }
1016+ $ this ->insert_html_element ( $ this ->state ->current_token );
1017+ $ this ->state ->frameset_ok = false ;
1018+ return true ;
1019+
1020+ /*
1021+ * > A start tag whose tag name is one of: "param", "source", "track"
1022+ */
1023+ case '+PARAM ' :
1024+ case '+SOURCE ' :
1025+ case '+TRACK ' :
9421026 $ this ->insert_html_element ( $ this ->state ->current_token );
9431027 return true ;
9441028 }
@@ -961,30 +1045,20 @@ private function step_in_body() {
9611045 */
9621046 switch ( $ tag_name ) {
9631047 case 'APPLET ' :
964- case 'AREA ' :
9651048 case 'BASE ' :
9661049 case 'BASEFONT ' :
9671050 case 'BGSOUND ' :
9681051 case 'BODY ' :
969- case 'BR ' :
9701052 case 'CAPTION ' :
9711053 case 'COL ' :
9721054 case 'COLGROUP ' :
973- case 'DD ' :
974- case 'DT ' :
975- case 'EMBED ' :
9761055 case 'FORM ' :
9771056 case 'FRAME ' :
9781057 case 'FRAMESET ' :
9791058 case 'HEAD ' :
980- case 'HR ' :
9811059 case 'HTML ' :
9821060 case 'IFRAME ' :
983- case 'INPUT ' :
984- case 'KEYGEN ' :
985- case 'LI ' :
9861061 case 'LINK ' :
987- case 'LISTING ' :
9881062 case 'MARQUEE ' :
9891063 case 'MATH ' :
9901064 case 'META ' :
@@ -993,20 +1067,16 @@ private function step_in_body() {
9931067 case 'NOFRAMES ' :
9941068 case 'NOSCRIPT ' :
9951069 case 'OBJECT ' :
996- case 'OL ' :
9971070 case 'OPTGROUP ' :
9981071 case 'OPTION ' :
999- case 'PARAM ' :
10001072 case 'PLAINTEXT ' :
1001- case 'PRE ' :
10021073 case 'RB ' :
10031074 case 'RP ' :
10041075 case 'RT ' :
10051076 case 'RTC ' :
10061077 case 'SARCASM ' :
10071078 case 'SCRIPT ' :
10081079 case 'SELECT ' :
1009- case 'SOURCE ' :
10101080 case 'STYLE ' :
10111081 case 'SVG ' :
10121082 case 'TABLE ' :
@@ -1019,9 +1089,6 @@ private function step_in_body() {
10191089 case 'THEAD ' :
10201090 case 'TITLE ' :
10211091 case 'TR ' :
1022- case 'TRACK ' :
1023- case 'UL ' :
1024- case 'WBR ' :
10251092 case 'XMP ' :
10261093 $ this ->last_error = self ::ERROR_UNSUPPORTED ;
10271094 throw new WP_HTML_Unsupported_Exception ( "Cannot process {$ tag_name } element. " );
@@ -1675,14 +1742,19 @@ public static function is_void( $tag_name ) {
16751742 return (
16761743 'AREA ' === $ tag_name ||
16771744 'BASE ' === $ tag_name ||
1745+ 'BASEFONT ' === $ tag_name || // Obsolete but still treated as void.
1746+ 'BGSOUND ' === $ tag_name || // Obsolete but still treated as void.
16781747 'BR ' === $ tag_name ||
16791748 'COL ' === $ tag_name ||
16801749 'EMBED ' === $ tag_name ||
1750+ 'FRAME ' === $ tag_name ||
16811751 'HR ' === $ tag_name ||
16821752 'IMG ' === $ tag_name ||
16831753 'INPUT ' === $ tag_name ||
1754+ 'KEYGEN ' === $ tag_name || // Obsolete but still treated as void.
16841755 'LINK ' === $ tag_name ||
16851756 'META ' === $ tag_name ||
1757+ 'PARAM ' === $ tag_name || // Obsolete but still treated as void.
16861758 'SOURCE ' === $ tag_name ||
16871759 'TRACK ' === $ tag_name ||
16881760 'WBR ' === $ tag_name
@@ -1711,6 +1783,15 @@ public static function is_void( $tag_name ) {
17111783 */
17121784 const REPROCESS_CURRENT_NODE = 'reprocess-current-node ' ;
17131785
1786+ /**
1787+ * Indicates that the current HTML token should be processed without advancing the parser.
1788+ *
1789+ * @since 6.5.0
1790+ *
1791+ * @var string
1792+ */
1793+ const PROCESS_CURRENT_NODE = 'process-current-node ' ;
1794+
17141795 /**
17151796 * Indicates that the parser encountered unsupported markup and has bailed.
17161797 *
0 commit comments