102102 * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
103103 * - Custom elements: All custom elements are supported. :)
104104 * - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH.
105- * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
105+ * - Formatting elements: B, BIG, CODE, EM, FONT, I, PRE, SMALL, STRIKE, STRONG, TT, U, WBR .
106106 * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
107107 * - Links: A.
108108 * - Lists: DD, DL, DT, LI, OL, LI.
109- * - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
110- * - Paragraph: P.
111- * - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
112- * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION.
109+ * - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PARAM, PICTURE, SOURCE, VIDEO, TRACK .
110+ * - Paragraph: BR, P.
111+ * - Phrasing elements: AREA, ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
112+ * - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION.
113113 * - Templating elements: SLOT.
114114 * - Text decoration: RUBY.
115- * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER.
115+ * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, LISTING, MULTICOL, NEXTID, SPACER.
116116 *
117117 * ### Supported markup
118118 *
@@ -149,17 +149,6 @@ class Gutenberg_HTML_Processor_6_5 extends Gutenberg_HTML_Tag_Processor_6_5 {
149149 */
150150 const MAX_BOOKMARKS = 100 ;
151151
152- /**
153- * Static query for instructing the Tag Processor to visit every token.
154- *
155- * @access private
156- *
157- * @since 6.4.0
158- *
159- * @var array
160- */
161- const VISIT_EVERYTHING = array ( 'tag_closers ' => 'visit ' );
162-
163152 /**
164153 * Holds the working state of the parser, including the stack of
165154 * open elements and the stack of active formatting elements.
@@ -424,6 +413,30 @@ public function next_tag( $query = null ) {
424413 return false ;
425414 }
426415
416+ /**
417+ * Ensures internal accounting is maintained for HTML semantic rules while
418+ * the underlying Tag Processor class is seeking to a bookmark.
419+ *
420+ * This doesn't currently have a way to represent non-tags and doesn't process
421+ * semantic rules for text nodes. For access to the raw tokens consider using
422+ * WP_HTML_Tag_Processor instead.
423+ *
424+ * @since 6.5.0 Added for internal support; do not use.
425+ *
426+ * @access private
427+ *
428+ * @return bool
429+ */
430+ public function next_token () {
431+ $ found_a_token = parent ::next_token ();
432+
433+ if ( '#tag ' === $ this ->get_token_type () ) {
434+ $ this ->step ( self ::REPROCESS_CURRENT_NODE );
435+ }
436+
437+ return $ found_a_token ;
438+ }
439+
427440 /**
428441 * Indicates if the currently-matched tag matches the given breadcrumbs.
429442 *
@@ -520,7 +533,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
520533 $ this ->state ->stack_of_open_elements ->pop ();
521534 }
522535
523- parent ::next_tag ( self ::VISIT_EVERYTHING );
536+ while ( parent ::next_token () && '#tag ' !== $ this ->get_token_type () ) {
537+ continue ;
538+ }
524539 }
525540
526541 // Finish stepping when there are no more tokens in the document.
@@ -684,10 +699,12 @@ private function step_in_body() {
684699 case '-FOOTER ' :
685700 case '-HEADER ' :
686701 case '-HGROUP ' :
702+ case '-LISTING ' :
687703 case '-MAIN ' :
688704 case '-MENU ' :
689705 case '-NAV ' :
690706 case '-OL ' :
707+ case '-PRE ' :
691708 case '-SEARCH ' :
692709 case '-SECTION ' :
693710 case '-SUMMARY ' :
@@ -732,6 +749,18 @@ private function step_in_body() {
732749 $ this ->insert_html_element ( $ this ->state ->current_token );
733750 return true ;
734751
752+ /*
753+ * > A start tag whose tag name is one of: "pre", "listing"
754+ */
755+ case '+PRE ' :
756+ case '+LISTING ' :
757+ if ( $ this ->state ->stack_of_open_elements ->has_p_in_button_scope () ) {
758+ $ this ->close_a_p_element ();
759+ }
760+ $ this ->insert_html_element ( $ this ->state ->current_token );
761+ $ this ->state ->frameset_ok = false ;
762+ return true ;
763+
735764 /*
736765 * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
737766 */
@@ -934,11 +963,47 @@ private function step_in_body() {
934963 $ this ->run_adoption_agency_algorithm ();
935964 return true ;
936965
966+ /*
967+ * > An end tag whose tag name is "br"
968+ * > Parse error. Drop the attributes from the token, and act as described in the next
969+ * > entry; i.e. act as if this was a "br" start tag token with no attributes, rather
970+ * > than the end tag token that it actually is.
971+ */
972+ case '-BR ' :
973+ $ this ->last_error = self ::ERROR_UNSUPPORTED ;
974+ throw new WP_HTML_Unsupported_Exception ( 'Closing BR tags require unimplemented special handling. ' );
975+
937976 /*
938977 * > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
939978 */
979+ case '+AREA ' :
980+ case '+BR ' :
981+ case '+EMBED ' :
940982 case '+IMG ' :
983+ case '+KEYGEN ' :
984+ case '+WBR ' :
941985 $ this ->reconstruct_active_formatting_elements ();
986+ $ this ->insert_html_element ( $ this ->state ->current_token );
987+ $ this ->state ->frameset_ok = false ;
988+ return true ;
989+
990+ /*
991+ * > A start tag whose tag name is "hr"
992+ */
993+ case '+HR ' :
994+ if ( $ this ->state ->stack_of_open_elements ->has_p_in_button_scope () ) {
995+ $ this ->close_a_p_element ();
996+ }
997+ $ this ->insert_html_element ( $ this ->state ->current_token );
998+ $ this ->state ->frameset_ok = false ;
999+ return true ;
1000+
1001+ /*
1002+ * > A start tag whose tag name is one of: "param", "source", "track"
1003+ */
1004+ case '+PARAM ' :
1005+ case '+SOURCE ' :
1006+ case '+TRACK ' :
9421007 $ this ->insert_html_element ( $ this ->state ->current_token );
9431008 return true ;
9441009 }
@@ -961,30 +1026,21 @@ private function step_in_body() {
9611026 */
9621027 switch ( $ tag_name ) {
9631028 case 'APPLET ' :
964- case 'AREA ' :
9651029 case 'BASE ' :
9661030 case 'BASEFONT ' :
9671031 case 'BGSOUND ' :
9681032 case 'BODY ' :
969- case 'BR ' :
9701033 case 'CAPTION ' :
9711034 case 'COL ' :
9721035 case 'COLGROUP ' :
973- case 'DD ' :
974- case 'DT ' :
975- case 'EMBED ' :
9761036 case 'FORM ' :
9771037 case 'FRAME ' :
9781038 case 'FRAMESET ' :
9791039 case 'HEAD ' :
980- case 'HR ' :
9811040 case 'HTML ' :
9821041 case 'IFRAME ' :
9831042 case 'INPUT ' :
984- case 'KEYGEN ' :
985- case 'LI ' :
9861043 case 'LINK ' :
987- case 'LISTING ' :
9881044 case 'MARQUEE ' :
9891045 case 'MATH ' :
9901046 case 'META ' :
@@ -993,20 +1049,16 @@ private function step_in_body() {
9931049 case 'NOFRAMES ' :
9941050 case 'NOSCRIPT ' :
9951051 case 'OBJECT ' :
996- case 'OL ' :
9971052 case 'OPTGROUP ' :
9981053 case 'OPTION ' :
999- case 'PARAM ' :
10001054 case 'PLAINTEXT ' :
1001- case 'PRE ' :
10021055 case 'RB ' :
10031056 case 'RP ' :
10041057 case 'RT ' :
10051058 case 'RTC ' :
10061059 case 'SARCASM ' :
10071060 case 'SCRIPT ' :
10081061 case 'SELECT ' :
1009- case 'SOURCE ' :
10101062 case 'STYLE ' :
10111063 case 'SVG ' :
10121064 case 'TABLE ' :
@@ -1019,9 +1071,6 @@ private function step_in_body() {
10191071 case 'THEAD ' :
10201072 case 'TITLE ' :
10211073 case 'TR ' :
1022- case 'TRACK ' :
1023- case 'UL ' :
1024- case 'WBR ' :
10251074 case 'XMP ' :
10261075 $ this ->last_error = self ::ERROR_UNSUPPORTED ;
10271076 throw new WP_HTML_Unsupported_Exception ( "Cannot process {$ tag_name } element. " );
@@ -1675,14 +1724,19 @@ public static function is_void( $tag_name ) {
16751724 return (
16761725 'AREA ' === $ tag_name ||
16771726 'BASE ' === $ tag_name ||
1727+ 'BASEFONT ' === $ tag_name || // Obsolete but still treated as void.
1728+ 'BGSOUND ' === $ tag_name || // Obsolete but still treated as void.
16781729 'BR ' === $ tag_name ||
16791730 'COL ' === $ tag_name ||
16801731 'EMBED ' === $ tag_name ||
1732+ 'FRAME ' === $ tag_name ||
16811733 'HR ' === $ tag_name ||
16821734 'IMG ' === $ tag_name ||
16831735 'INPUT ' === $ tag_name ||
1736+ 'KEYGEN ' === $ tag_name || // Obsolete but still treated as void.
16841737 'LINK ' === $ tag_name ||
16851738 'META ' === $ tag_name ||
1739+ 'PARAM ' === $ tag_name || // Obsolete but still treated as void.
16861740 'SOURCE ' === $ tag_name ||
16871741 'TRACK ' === $ tag_name ||
16881742 'WBR ' === $ tag_name
0 commit comments