Skip to content

Commit cbb2ad4

Browse files
committed
Rename token functions and update comments.
1 parent b712690 commit cbb2ad4

2 files changed

Lines changed: 164 additions & 15 deletions

File tree

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
505505
$this->state->stack_of_open_elements->pop();
506506
}
507507

508-
while ( parent::next_token() && '#tag' !== $this->get_node_type() ) {
508+
while ( parent::next_token() && '#tag' !== $this->get_token_type() ) {
509509
continue;
510510
}
511511
}

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 163 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2501,7 +2501,29 @@ public function is_tag_closer() {
25012501
);
25022502
}
25032503

2504-
public function get_node_type() {
2504+
/**
2505+
* Indicates the kind of matched token, if any.
2506+
*
2507+
* This differs from `get_token_name()` in that it always
2508+
* returns a static string indicating the type, whereas
2509+
* `get_token_name()` may return values derived from the
2510+
* token itself, such as a tag name or processing
2511+
* instruction tag.
2512+
*
2513+
* Possible values:
2514+
* - `#tag` when matched on a tag.
2515+
* - `#text` when matched on a text node.
2516+
* - `#cdata-section` when matched on a CDATA node.
2517+
* - `#processing-instruction` when matched on a processing instruction.
2518+
* - `#comment` when matched on a comment.
2519+
* - `#presumptuous-tag` when matched on an empty tag closer.
2520+
* - `#funky-comment` when matched on a funky comment.
2521+
*
2522+
* @since 6.5.0
2523+
*
2524+
* @return string|null What kind of token is matched, or null.
2525+
*/
2526+
public function get_token_type() {
25052527
switch ( $this->parser_state ) {
25062528
case self::STATE_MATCHED_TAG:
25072529
return '#tag';
@@ -2513,11 +2535,32 @@ public function get_node_type() {
25132535
return '#processing-instruction';
25142536

25152537
default:
2516-
return $this->get_node_name();
2538+
return $this->get_token_name();
25172539
}
25182540
}
25192541

2520-
public function get_node_name() {
2542+
/**
2543+
* Returns the node name represented by the token.
2544+
*
2545+
* This matches the DOM API value `nodeName`. Some values
2546+
* are static, such as `#text` for a text node, while others
2547+
* are dynamically generated from the token itself.
2548+
*
2549+
* Dynamic names:
2550+
* - Uppercase tag name for tag matches.
2551+
* - Tag name for processing instructions.
2552+
* - `html` for DOCTYPE declarations.
2553+
*
2554+
* Note that if the Tag Processor is not matched on a token
2555+
* then this function will return `null`, either because it
2556+
* hasn't yet found a token or because it reached the end
2557+
* of the document without matching a token.
2558+
*
2559+
* @since 6.5.0
2560+
*
2561+
* @return string|null Name of the matched token.
2562+
*/
2563+
public function get_token_name() {
25212564
switch ( $this->parser_state ) {
25222565
case self::STATE_MATCHED_TAG:
25232566
return $this->get_tag();
@@ -2546,7 +2589,26 @@ public function get_node_name() {
25462589
}
25472590
}
25482591

2549-
public function get_node_text() {
2592+
/**
2593+
* Returns the modifiable text for a matched token, or an empty string.
2594+
*
2595+
* Modifiable text is text content that may be read and changed without
2596+
* changing the HTML structure of the document around it. This includes
2597+
* the contents of `#text` nodes in the HTML as well as the inner
2598+
* contents of HTML comments, CDATA sections, Processing Instructions,
2599+
* and others, even though these nodes aren't part of a parsed DOM tree.
2600+
* They also contain the contents of SCRIPT and STYLE tags, of TEXTAREA
2601+
* tags, and of any other section in an HTML document which cannot
2602+
* contain HTML markup (DATA).
2603+
*
2604+
* If a token has no modifiable text then an empty string is returned to
2605+
* avoid needless crashing or type errors. An empty string does not mean
2606+
* that a token has modifiable text, and a token with modifiable text may
2607+
* have an empty string (e.g. a comment with no contents).
2608+
*
2609+
* @return string
2610+
*/
2611+
public function get_modifiable_text() {
25502612
$at = $this->text_starts_at;
25512613
$length = $this->text_length;
25522614

@@ -3030,7 +3092,7 @@ private function matches() {
30303092
}
30313093

30323094
/**
3033-
* Parser Ready State
3095+
* Parser Ready State.
30343096
*
30353097
* Indicates that the parser is ready to run and waiting for a state transition.
30363098
* It may not have started yet, or it may have just finished parsing a token and
@@ -3043,7 +3105,7 @@ private function matches() {
30433105
const STATE_READY = 'STATE_READY';
30443106

30453107
/**
3046-
* Parser Complete State
3108+
* Parser Complete State.
30473109
*
30483110
* Indicates that the parser has reached the end of the document and there is
30493111
* nothing left to scan. It finished parsing the last token completely.
@@ -3055,7 +3117,7 @@ private function matches() {
30553117
const STATE_COMPLETE = 'STATE_COMPLETE';
30563118

30573119
/**
3058-
* Parser Incomplete State
3120+
* Parser Incomplete State.
30593121
*
30603122
* Indicates that the parser has reached the end of the document before finishing
30613123
* a token. It started parsing a token but there is a possibility that the input
@@ -3071,7 +3133,7 @@ private function matches() {
30713133
const STATE_INCOMPLETE = 'STATE_INCOMPLETE';
30723134

30733135
/**
3074-
* Parser Matched Tag State
3136+
* Parser Matched Tag State.
30753137
*
30763138
* Indicates that the parser has found an HTML tag and it's possible to get
30773139
* the tag name and read or modify its attributes (if it's not a closing tag).
@@ -3082,11 +3144,98 @@ private function matches() {
30823144
*/
30833145
const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG';
30843146

3085-
const STATE_TEXT_NODE = 'STATE_TEXT_NODE';
3086-
const STATE_CDATA_NODE = 'STATE_CDATA_NODE';
3087-
const STATE_PI_NODE = 'STATE_PI_NODE';
3088-
const STATE_COMMENT = 'STATE_COMMENT';
3089-
const STATE_DOCTYPE = 'STATE_DOCTYPE';
3147+
/**
3148+
* Parser Text Node State.
3149+
*
3150+
* Indicates that the parser has found a text node and it's possible
3151+
* to read and modify that text.
3152+
*
3153+
* @since 6.5.0
3154+
*
3155+
* @access private
3156+
*/
3157+
const STATE_TEXT_NODE = 'STATE_TEXT_NODE';
3158+
3159+
/**
3160+
* Parser CDATA Node State.
3161+
*
3162+
* Indicates that the parser has found a CDADA node and it's possible
3163+
* to read and modify its modifiable text. Note that in HTML there are
3164+
* no CDATA nodes outside foreign elements (SVG and MathML). Outside
3165+
* of foreign elements, they are treated as HTML comments. Nonetheless,
3166+
* the Tag Processor still recognizes them as they appear in the HTML
3167+
* stream and exposes them for inspection and modification.
3168+
*
3169+
* @since 6.5.0
3170+
*
3171+
* @access private
3172+
*/
3173+
const STATE_CDATA_NODE = 'STATE_CDATA_NODE';
3174+
3175+
/**
3176+
* Parser Processing Instruction State.
3177+
*
3178+
* Indicates that the parser has found a Processing Instruction and
3179+
* it's possible to read and modify its modifiable text. Note that in
3180+
* HTML there are no Processing Instruction nodes and they are treated
3181+
* as HTML comments. Nonetheless, the Tag Processor still recognizes
3182+
* them as they appear in the HTML stream and exposes them for
3183+
* inspection and modification.
3184+
*
3185+
* @since 6.5.0
3186+
*
3187+
* @access private
3188+
*/
3189+
const STATE_PI_NODE = 'STATE_PI_NODE';
3190+
3191+
/**
3192+
* Indicates that the parser has found an HTML comment and it's
3193+
* possible to read and modify its modifiable text.
3194+
*
3195+
* @since 6.5.0
3196+
*
3197+
* @access private
3198+
*/
3199+
const STATE_COMMENT = 'STATE_COMMENT';
3200+
3201+
/**
3202+
* Indicates that the parser has found a DOCTYPE node and it's
3203+
* possible to read and modify its modifiable text.
3204+
*
3205+
* @since 6.5.0
3206+
*
3207+
* @access private
3208+
*/
3209+
const STATE_DOCTYPE = 'STATE_DOCTYPE';
3210+
3211+
/**
3212+
* Indicates that the parser has found an empty tag closer.
3213+
* Note that in HTML there are no empty tag closers, and they
3214+
* are ignored. Nonetheless, the Tag Processor still
3215+
* recognizes them as they appear in the HTML stream.
3216+
*
3217+
* These were historically discussed as a "presumptuous tag
3218+
* closer," which would close the nearest open tag, but were
3219+
* dismissed in favor of explicitly-closing tags.
3220+
*
3221+
* @since 6.5.0
3222+
*
3223+
* @access private
3224+
*/
30903225
const STATE_PRESUMPTUOUS_TAG = 'STATE_PRESUMPTUOUS_TAG';
3091-
const STATE_FUNKY_COMMENT = 'STATE_WP_FUNKY';
3226+
3227+
/**
3228+
* Indicates that the parser has found a "funky comment"
3229+
* and it's possible to read and modify its modifiable text.
3230+
*
3231+
* Funky comments are tag closers with invalid tag names. Note
3232+
* that in HTML these are treated as HTML comments. Nonetheless,
3233+
* the Tag Processor recognizes them in a stream of HTML and
3234+
* exposes them for inspection and modification.
3235+
*
3236+
* @since 6.5.0
3237+
*
3238+
* @access private
3239+
*/
3240+
const STATE_FUNKY_COMMENT = 'STATE_WP_FUNKY';
30923241
}

0 commit comments

Comments
 (0)