Skip to content

Commit 8995b66

Browse files
dmsnellSergeyBiryukovsirreal
committed
HTML API: Backport updates from Core
Updates from WordPress/wordpress-develop: - From: WordPress/wordpress-develop@54a09a7 - To: WordPress/wordpress-develop@7a71339 - Coding style changes. - WordPress/wordpress-develop#5762 Adds support for the "any other tag" sections in the HTML Processor. - WordPress/wordpress-develop#5539 Adds support for list elements in the HTML Processor. - WordPress/wordpress-develop#5897 Adds support for HR elements in the HTML Processor. - WordPress/wordpress-develop#5895 Adds support for the AREA, BR, EMBED, KEYGEN, and WBR elements in the HTML Processor. - WordPress/wordpress-develop#5903 Adds support for the PRE and LISTING elements in the HTML Processor. - WordPress/wordpress-develop#5913 Updates "all other tags" support in HTML Processor and updates list of void elements. - WordPress/wordpress-develop#5906 Adds support for the PARAM, SOURCE, and TRACK elements in the HTML Processor. - WordPress/wordpress-develop#5907 Adds support for the INPUT element in the HTML Processor - WordPress/wordpress-develop#5683 Provides mechanism to scan all tokens in an HTML document in the Tag Processor. - WordPress/wordpress-develop#5976 Avoids splitting text nodes on "<" character. - WordPress/wordpress-develop#5992 Only recognize true CDATA-lookalike nodes. - WordPress/wordpress-develop#5975 Prevent void tag nesting when calling `next_token()` - WordPress/wordpress-develop#6021 Reset parser state after seeking. - https://core.trac.wordpress.org/changeset/57528 Fix typo in setting token flag. - WordPress/wordpress-develop#6041 Ensure consecutive text is all joined into one text node. The PHP files in the compatability layer are merged and maintained in the Core repo and all changes or updates need to happen first in Core and then be brought over to Gutenberg as built files. Co-authored-by: sergeybiryukov <[email protected]> Co-authored-by: sirreal <[email protected]> Co-authored-by: dmsnell <[email protected]>
1 parent 1c26a17 commit 8995b66

2 files changed

Lines changed: 946 additions & 163 deletions

File tree

lib/compat/wordpress-6.5/html-api/class-gutenberg-html-processor-6-5.php

Lines changed: 120 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -101,18 +101,18 @@
101101
*
102102
* - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
103103
* - Custom elements: All custom elements are supported. :)
104-
* - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH.
105-
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
104+
* - Form elements: BUTTON, DATALIST, FIELDSET, INPUT, LABEL, LEGEND, METER, PROGRESS, SEARCH.
105+
* - Formatting elements: B, BIG, CODE, EM, FONT, I, PRE, SMALL, STRIKE, STRONG, TT, U, WBR.
106106
* - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
107107
* - Links: A.
108-
* - Lists: DD, DL, DT, LI, OL, LI.
109-
* - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
110-
* - Paragraph: P.
111-
* - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
112-
* - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION.
108+
* - Lists: DD, DL, DT, LI, OL, UL.
109+
* - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, SOURCE, TRACK, VIDEO.
110+
* - Paragraph: BR, P.
111+
* - Phrasing elements: ABBR, AREA, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
112+
* - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION.
113113
* - Templating elements: SLOT.
114114
* - Text decoration: RUBY.
115-
* - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER.
115+
* - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, LISTING, MULTICOL, NEXTID, PARAM, SPACER.
116116
*
117117
* ### Supported markup
118118
*
@@ -149,17 +149,6 @@ class Gutenberg_HTML_Processor_6_5 extends Gutenberg_HTML_Tag_Processor_6_5 {
149149
*/
150150
const MAX_BOOKMARKS = 100;
151151

152-
/**
153-
* Static query for instructing the Tag Processor to visit every token.
154-
*
155-
* @access private
156-
*
157-
* @since 6.4.0
158-
*
159-
* @var array
160-
*/
161-
const VISIT_EVERYTHING = array( 'tag_closers' => 'visit' );
162-
163152
/**
164153
* Holds the working state of the parser, including the stack of
165154
* open elements and the stack of active formatting elements.
@@ -424,6 +413,30 @@ public function next_tag( $query = null ) {
424413
return false;
425414
}
426415

416+
/**
417+
* Ensures internal accounting is maintained for HTML semantic rules while
418+
* the underlying Tag Processor class is seeking to a bookmark.
419+
*
420+
* This doesn't currently have a way to represent non-tags and doesn't process
421+
* semantic rules for text nodes. For access to the raw tokens consider using
422+
* WP_HTML_Tag_Processor instead.
423+
*
424+
* @since 6.5.0 Added for internal support; do not use.
425+
*
426+
* @access private
427+
*
428+
* @return bool
429+
*/
430+
public function next_token() {
431+
$found_a_token = parent::next_token();
432+
433+
if ( '#tag' === $this->get_token_type() ) {
434+
$this->step( self::PROCESS_CURRENT_NODE );
435+
}
436+
437+
return $found_a_token;
438+
}
439+
427440
/**
428441
* Indicates if the currently-matched tag matches the given breadcrumbs.
429442
*
@@ -500,7 +513,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
500513
return false;
501514
}
502515

503-
if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
516+
if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) {
504517
/*
505518
* Void elements still hop onto the stack of open elements even though
506519
* there's no corresponding closing tag. This is important for managing
@@ -519,8 +532,12 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
519532
if ( $top_node && self::is_void( $top_node->node_name ) ) {
520533
$this->state->stack_of_open_elements->pop();
521534
}
535+
}
522536

523-
parent::next_tag( self::VISIT_EVERYTHING );
537+
if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
538+
while ( parent::next_token() && '#tag' !== $this->get_token_type() ) {
539+
continue;
540+
}
524541
}
525542

526543
// Finish stepping when there are no more tokens in the document.
@@ -531,7 +548,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
531548
$this->state->current_token = new WP_HTML_Token(
532549
$this->bookmark_tag(),
533550
$this->get_tag(),
534-
$this->is_tag_closer(),
551+
$this->has_self_closing_flag(),
535552
$this->release_internal_bookmark_on_destruct
536553
);
537554

@@ -684,10 +701,12 @@ private function step_in_body() {
684701
case '-FOOTER':
685702
case '-HEADER':
686703
case '-HGROUP':
704+
case '-LISTING':
687705
case '-MAIN':
688706
case '-MENU':
689707
case '-NAV':
690708
case '-OL':
709+
case '-PRE':
691710
case '-SEARCH':
692711
case '-SECTION':
693712
case '-SUMMARY':
@@ -732,6 +751,18 @@ private function step_in_body() {
732751
$this->insert_html_element( $this->state->current_token );
733752
return true;
734753

754+
/*
755+
* > A start tag whose tag name is one of: "pre", "listing"
756+
*/
757+
case '+PRE':
758+
case '+LISTING':
759+
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
760+
$this->close_a_p_element();
761+
}
762+
$this->insert_html_element( $this->state->current_token );
763+
$this->state->frameset_ok = false;
764+
return true;
765+
735766
/*
736767
* > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
737768
*/
@@ -934,11 +965,64 @@ private function step_in_body() {
934965
$this->run_adoption_agency_algorithm();
935966
return true;
936967

968+
/*
969+
* > An end tag whose tag name is "br"
970+
* > Parse error. Drop the attributes from the token, and act as described in the next
971+
* > entry; i.e. act as if this was a "br" start tag token with no attributes, rather
972+
* > than the end tag token that it actually is.
973+
*/
974+
case '-BR':
975+
$this->last_error = self::ERROR_UNSUPPORTED;
976+
throw new WP_HTML_Unsupported_Exception( 'Closing BR tags require unimplemented special handling.' );
977+
937978
/*
938979
* > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
939980
*/
981+
case '+AREA':
982+
case '+BR':
983+
case '+EMBED':
940984
case '+IMG':
985+
case '+KEYGEN':
986+
case '+WBR':
941987
$this->reconstruct_active_formatting_elements();
988+
$this->insert_html_element( $this->state->current_token );
989+
$this->state->frameset_ok = false;
990+
return true;
991+
992+
/*
993+
* > A start tag whose tag name is "input"
994+
*/
995+
case '+INPUT':
996+
$this->reconstruct_active_formatting_elements();
997+
$this->insert_html_element( $this->state->current_token );
998+
$type_attribute = $this->get_attribute( 'type' );
999+
/*
1000+
* > If the token does not have an attribute with the name "type", or if it does,
1001+
* > but that attribute's value is not an ASCII case-insensitive match for the
1002+
* > string "hidden", then: set the frameset-ok flag to "not ok".
1003+
*/
1004+
if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) {
1005+
$this->state->frameset_ok = false;
1006+
}
1007+
return true;
1008+
1009+
/*
1010+
* > A start tag whose tag name is "hr"
1011+
*/
1012+
case '+HR':
1013+
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
1014+
$this->close_a_p_element();
1015+
}
1016+
$this->insert_html_element( $this->state->current_token );
1017+
$this->state->frameset_ok = false;
1018+
return true;
1019+
1020+
/*
1021+
* > A start tag whose tag name is one of: "param", "source", "track"
1022+
*/
1023+
case '+PARAM':
1024+
case '+SOURCE':
1025+
case '+TRACK':
9421026
$this->insert_html_element( $this->state->current_token );
9431027
return true;
9441028
}
@@ -961,30 +1045,20 @@ private function step_in_body() {
9611045
*/
9621046
switch ( $tag_name ) {
9631047
case 'APPLET':
964-
case 'AREA':
9651048
case 'BASE':
9661049
case 'BASEFONT':
9671050
case 'BGSOUND':
9681051
case 'BODY':
969-
case 'BR':
9701052
case 'CAPTION':
9711053
case 'COL':
9721054
case 'COLGROUP':
973-
case 'DD':
974-
case 'DT':
975-
case 'EMBED':
9761055
case 'FORM':
9771056
case 'FRAME':
9781057
case 'FRAMESET':
9791058
case 'HEAD':
980-
case 'HR':
9811059
case 'HTML':
9821060
case 'IFRAME':
983-
case 'INPUT':
984-
case 'KEYGEN':
985-
case 'LI':
9861061
case 'LINK':
987-
case 'LISTING':
9881062
case 'MARQUEE':
9891063
case 'MATH':
9901064
case 'META':
@@ -993,20 +1067,16 @@ private function step_in_body() {
9931067
case 'NOFRAMES':
9941068
case 'NOSCRIPT':
9951069
case 'OBJECT':
996-
case 'OL':
9971070
case 'OPTGROUP':
9981071
case 'OPTION':
999-
case 'PARAM':
10001072
case 'PLAINTEXT':
1001-
case 'PRE':
10021073
case 'RB':
10031074
case 'RP':
10041075
case 'RT':
10051076
case 'RTC':
10061077
case 'SARCASM':
10071078
case 'SCRIPT':
10081079
case 'SELECT':
1009-
case 'SOURCE':
10101080
case 'STYLE':
10111081
case 'SVG':
10121082
case 'TABLE':
@@ -1019,9 +1089,6 @@ private function step_in_body() {
10191089
case 'THEAD':
10201090
case 'TITLE':
10211091
case 'TR':
1022-
case 'TRACK':
1023-
case 'UL':
1024-
case 'WBR':
10251092
case 'XMP':
10261093
$this->last_error = self::ERROR_UNSUPPORTED;
10271094
throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." );
@@ -1675,14 +1742,19 @@ public static function is_void( $tag_name ) {
16751742
return (
16761743
'AREA' === $tag_name ||
16771744
'BASE' === $tag_name ||
1745+
'BASEFONT' === $tag_name || // Obsolete but still treated as void.
1746+
'BGSOUND' === $tag_name || // Obsolete but still treated as void.
16781747
'BR' === $tag_name ||
16791748
'COL' === $tag_name ||
16801749
'EMBED' === $tag_name ||
1750+
'FRAME' === $tag_name ||
16811751
'HR' === $tag_name ||
16821752
'IMG' === $tag_name ||
16831753
'INPUT' === $tag_name ||
1754+
'KEYGEN' === $tag_name || // Obsolete but still treated as void.
16841755
'LINK' === $tag_name ||
16851756
'META' === $tag_name ||
1757+
'PARAM' === $tag_name || // Obsolete but still treated as void.
16861758
'SOURCE' === $tag_name ||
16871759
'TRACK' === $tag_name ||
16881760
'WBR' === $tag_name
@@ -1711,6 +1783,15 @@ public static function is_void( $tag_name ) {
17111783
*/
17121784
const REPROCESS_CURRENT_NODE = 'reprocess-current-node';
17131785

1786+
/**
1787+
* Indicates that the current HTML token should be processed without advancing the parser.
1788+
*
1789+
* @since 6.5.0
1790+
*
1791+
* @var string
1792+
*/
1793+
const PROCESS_CURRENT_NODE = 'process-current-node';
1794+
17141795
/**
17151796
* Indicates that the parser encountered unsupported markup and has bailed.
17161797
*

0 commit comments

Comments
 (0)