@@ -2501,7 +2501,29 @@ public function is_tag_closer() {
25012501 );
25022502 }
25032503
2504- public function get_node_type () {
2504+ /**
2505+ * Indicates the kind of matched token, if any.
2506+ *
2507+ * This differs from `get_token_name()` in that it always
2508+ * returns a static string indicating the type, whereas
2509+ * `get_token_name()` may return values derived from the
2510+ * token itself, such as a tag name or processing
2511+ * instruction tag.
2512+ *
2513+ * Possible values:
2514+ * - `#tag` when matched on a tag.
2515+ * - `#text` when matched on a text node.
2516+ * - `#cdata-section` when matched on a CDATA node.
2517+ * - `#processing-instruction` when matched on a processing instruction.
2518+ * - `#comment` when matched on a comment.
2519+ * - `#presumptuous-tag` when matched on an empty tag closer.
2520+ * - `#funky-comment` when matched on a funky comment.
2521+ *
2522+ * @since 6.5.0
2523+ *
2524+ * @return string|null What kind of token is matched, or null.
2525+ */
2526+ public function get_token_type () {
25052527 switch ( $ this ->parser_state ) {
25062528 case self ::STATE_MATCHED_TAG :
25072529 return '#tag ' ;
@@ -2513,11 +2535,32 @@ public function get_node_type() {
25132535 return '#processing-instruction ' ;
25142536
25152537 default :
2516- return $ this ->get_node_name ();
2538+ return $ this ->get_token_name ();
25172539 }
25182540 }
25192541
2520- public function get_node_name () {
2542+ /**
2543+ * Returns the node name represented by the token.
2544+ *
2545+ * This matches the DOM API value `nodeName`. Some values
2546+ * are static, such as `#text` for a text node, while others
2547+ * are dynamically generated from the token itself.
2548+ *
2549+ * Dynamic names:
2550+ * - Uppercase tag name for tag matches.
2551+ * - Tag name for processing instructions.
2552+ * - `html` for DOCTYPE declarations.
2553+ *
2554+ * Note that if the Tag Processor is not matched on a token
2555+ * then this function will return `null`, either because it
2556+ * hasn't yet found a token or because it reached the end
2557+ * of the document without matching a token.
2558+ *
2559+ * @since 6.5.0
2560+ *
2561+ * @return string|null Name of the matched token.
2562+ */
2563+ public function get_token_name () {
25212564 switch ( $ this ->parser_state ) {
25222565 case self ::STATE_MATCHED_TAG :
25232566 return $ this ->get_tag ();
@@ -2546,7 +2589,26 @@ public function get_node_name() {
25462589 }
25472590 }
25482591
2549- public function get_node_text () {
2592+ /**
2593+ * Returns the modifiable text for a matched token, or an empty string.
2594+ *
2595+ * Modifiable text is text content that may be read and changed without
2596+ * changing the HTML structure of the document around it. This includes
2597+ * the contents of `#text` nodes in the HTML as well as the inner
2598+ * contents of HTML comments, CDATA sections, Processing Instructions,
2599+ * and others, even though these nodes aren't part of a parsed DOM tree.
2600+ * They also contain the contents of SCRIPT and STYLE tags, of TEXTAREA
2601+ * tags, and of any other section in an HTML document which cannot
2602+ * contain HTML markup (DATA).
2603+ *
2604+ * If a token has no modifiable text then an empty string is returned to
2605+ * avoid needless crashing or type errors. An empty string does not mean
2606+ * that a token has modifiable text, and a token with modifiable text may
2607+ * have an empty string (e.g. a comment with no contents).
2608+ *
2609+ * @return string
2610+ */
2611+ public function get_modifiable_text () {
25502612 $ at = $ this ->text_starts_at ;
25512613 $ length = $ this ->text_length ;
25522614
@@ -3030,7 +3092,7 @@ private function matches() {
30303092 }
30313093
30323094 /**
3033- * Parser Ready State
3095+ * Parser Ready State.
30343096 *
30353097 * Indicates that the parser is ready to run and waiting for a state transition.
30363098 * It may not have started yet, or it may have just finished parsing a token and
@@ -3043,7 +3105,7 @@ private function matches() {
30433105 const STATE_READY = 'STATE_READY ' ;
30443106
30453107 /**
3046- * Parser Complete State
3108+ * Parser Complete State.
30473109 *
30483110 * Indicates that the parser has reached the end of the document and there is
30493111 * nothing left to scan. It finished parsing the last token completely.
@@ -3055,7 +3117,7 @@ private function matches() {
30553117 const STATE_COMPLETE = 'STATE_COMPLETE ' ;
30563118
30573119 /**
3058- * Parser Incomplete State
3120+ * Parser Incomplete State.
30593121 *
30603122 * Indicates that the parser has reached the end of the document before finishing
30613123 * a token. It started parsing a token but there is a possibility that the input
@@ -3071,7 +3133,7 @@ private function matches() {
30713133 const STATE_INCOMPLETE = 'STATE_INCOMPLETE ' ;
30723134
30733135 /**
3074- * Parser Matched Tag State
3136+ * Parser Matched Tag State.
30753137 *
30763138 * Indicates that the parser has found an HTML tag and it's possible to get
30773139 * the tag name and read or modify its attributes (if it's not a closing tag).
@@ -3082,11 +3144,98 @@ private function matches() {
30823144 */
30833145 const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG ' ;
30843146
3085- const STATE_TEXT_NODE = 'STATE_TEXT_NODE ' ;
3086- const STATE_CDATA_NODE = 'STATE_CDATA_NODE ' ;
3087- const STATE_PI_NODE = 'STATE_PI_NODE ' ;
3088- const STATE_COMMENT = 'STATE_COMMENT ' ;
3089- const STATE_DOCTYPE = 'STATE_DOCTYPE ' ;
3147+ /**
3148+ * Parser Text Node State.
3149+ *
3150+ * Indicates that the parser has found a text node and it's possible
3151+ * to read and modify that text.
3152+ *
3153+ * @since 6.5.0
3154+ *
3155+ * @access private
3156+ */
3157+ const STATE_TEXT_NODE = 'STATE_TEXT_NODE ' ;
3158+
3159+ /**
3160+ * Parser CDATA Node State.
3161+ *
3162+ * Indicates that the parser has found a CDADA node and it's possible
3163+ * to read and modify its modifiable text. Note that in HTML there are
3164+ * no CDATA nodes outside foreign elements (SVG and MathML). Outside
3165+ * of foreign elements, they are treated as HTML comments. Nonetheless,
3166+ * the Tag Processor still recognizes them as they appear in the HTML
3167+ * stream and exposes them for inspection and modification.
3168+ *
3169+ * @since 6.5.0
3170+ *
3171+ * @access private
3172+ */
3173+ const STATE_CDATA_NODE = 'STATE_CDATA_NODE ' ;
3174+
3175+ /**
3176+ * Parser Processing Instruction State.
3177+ *
3178+ * Indicates that the parser has found a Processing Instruction and
3179+ * it's possible to read and modify its modifiable text. Note that in
3180+ * HTML there are no Processing Instruction nodes and they are treated
3181+ * as HTML comments. Nonetheless, the Tag Processor still recognizes
3182+ * them as they appear in the HTML stream and exposes them for
3183+ * inspection and modification.
3184+ *
3185+ * @since 6.5.0
3186+ *
3187+ * @access private
3188+ */
3189+ const STATE_PI_NODE = 'STATE_PI_NODE ' ;
3190+
3191+ /**
3192+ * Indicates that the parser has found an HTML comment and it's
3193+ * possible to read and modify its modifiable text.
3194+ *
3195+ * @since 6.5.0
3196+ *
3197+ * @access private
3198+ */
3199+ const STATE_COMMENT = 'STATE_COMMENT ' ;
3200+
3201+ /**
3202+ * Indicates that the parser has found a DOCTYPE node and it's
3203+ * possible to read and modify its modifiable text.
3204+ *
3205+ * @since 6.5.0
3206+ *
3207+ * @access private
3208+ */
3209+ const STATE_DOCTYPE = 'STATE_DOCTYPE ' ;
3210+
3211+ /**
3212+ * Indicates that the parser has found an empty tag closer.
3213+ * Note that in HTML there are no empty tag closers, and they
3214+ * are ignored. Nonetheless, the Tag Processor still
3215+ * recognizes them as they appear in the HTML stream.
3216+ *
3217+ * These were historically discussed as a "presumptuous tag
3218+ * closer," which would close the nearest open tag, but were
3219+ * dismissed in favor of explicitly-closing tags.
3220+ *
3221+ * @since 6.5.0
3222+ *
3223+ * @access private
3224+ */
30903225 const STATE_PRESUMPTUOUS_TAG = 'STATE_PRESUMPTUOUS_TAG ' ;
3091- const STATE_FUNKY_COMMENT = 'STATE_WP_FUNKY ' ;
3226+
3227+ /**
3228+ * Indicates that the parser has found a "funky comment"
3229+ * and it's possible to read and modify its modifiable text.
3230+ *
3231+ * Funky comments are tag closers with invalid tag names. Note
3232+ * that in HTML these are treated as HTML comments. Nonetheless,
3233+ * the Tag Processor recognizes them in a stream of HTML and
3234+ * exposes them for inspection and modification.
3235+ *
3236+ * @since 6.5.0
3237+ *
3238+ * @access private
3239+ */
3240+ const STATE_FUNKY_COMMENT = 'STATE_WP_FUNKY ' ;
30923241}
0 commit comments