From fde8f609bde80cb529c29f00d99b1c1cbcff7fac Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 14:50:14 +0100
Subject: [PATCH 01/43] Perf: Remove redundant after_tag() call from
 parse_next_tag()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

parse_next_tag() is only called from base_class_next_token(), which
already calls after_tag() before invoking parse_next_tag(). The second
call is redundant — all state has already been cleaned up.

~8% improvement in tokenization throughput (1250ms → 1150ms).
---
 bootstrap-html-api.php                        | 68 +++++++++++++++++++
 .../html-api/class-wp-html-tag-processor.php  |  2 -
 2 files changed, 68 insertions(+), 2 deletions(-)
 create mode 100644 bootstrap-html-api.php

diff --git a/bootstrap-html-api.php b/bootstrap-html-api.php
new file mode 100644
index 0000000000000..0af11e44fcdf7
--- /dev/null
+++ b/bootstrap-html-api.php
@@ -0,0 +1,68 @@
+<?php
+
+// require_once '/Users/jonsurrell/jon/plugin-sirreal-dev/_require_.php';
+
+require_once __DIR__ . '/src/wp-includes/compat.php';
+require_once __DIR__ . '/src/wp-includes/utf8.php';
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-doctype-info.php';
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-attribute-token.php';
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-span.php';
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-text-replacement.php';
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-tag-processor.php';
+
+// HTML Processor
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-stack-event.php';
+require_once __DIR__ . '/src/wp-includes/class-wp-token-map.php';
+require_once __DIR__ . '/src/wp-includes/html-api/html5-named-character-references.php';
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-decoder.php';
+
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-unsupported-exception.php';
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php';
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-open-elements.php';
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-token.php';
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-processor-state.php';
+require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-processor.php';
+
+
+// HTML Templating #60229
+if ( file_exists( __DIR__ . '/src/wp-includes/html-api/class-wp-html-template.php' ) ) {
+	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-template.php';
+}
+
+// CSS Processor
+if ( file_exists( __DIR__ . '/src/wp-includes/html-api/class-wp-css-complex-selector-list.php' ) ) {
+	require_once __DIR__ . '/src/wp-includes/html-api/interface-wp-css-html-tag-processor-matcher.php';
+	require_once __DIR__ . '/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php';
+	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-attribute-selector.php';
+	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-class-selector.php';
+	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-id-selector.php';
+	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-type-selector.php';
+	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-compound-selector.php';
+	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-complex-selector.php';
+	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-compound-selector-list.php';
+	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-complex-selector-list.php';
+}
+
+if ( ! function_exists( 'esc_attr' ) ) {
+	function esc_attr( $s ) {
+		return str_replace( array( '<', '>', '"' ), array( '&lt;', '&gt;', '&quot;' ), $s );
+	}
+}
+
+if ( ! function_exists( '__' ) ) {
+	function __( $s ) {
+		return $s;
+	}
+}
+
+if ( ! function_exists( '_doing_it_wrong' ) ) {
+	function _doing_it_wrong( $message ) {
+		trigger_error( $message );
+	}
+}
+
+if ( ! function_exists( 'wp_kses_uri_attributes' ) ) {
+	function wp_kses_uri_attributes() {
+		return array();
+	}
+}
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 8397ecf520fa2..087392363a014 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1710,8 +1710,6 @@ private function skip_script_data(): bool {
 	 * @return bool Whether a tag was found before the end of the document.
 	 */
 	private function parse_next_tag(): bool {
-		$this->after_tag();
-
 		$html       = $this->html;
 		$doc_length = strlen( $html );
 		$was_at     = $this->bytes_already_parsed;

From c33ce0e2da2b37eea5dda74092245e4f41e1b5ec Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 14:52:04 +0100
Subject: [PATCH 02/43] Perf: Cache $this->html in local variable in
 parse_next_attribute()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

parse_next_attribute() accesses $this->html 8+ times per call via
object property lookup. Local variable access is faster in PHP.

~4% improvement (1150ms → 1105ms).
---
 .../html-api/class-wp-html-tag-processor.php  | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 087392363a014..fd9bef9d0b6e3 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2130,10 +2130,11 @@ private function parse_next_tag(): bool {
 	 * @return bool Whether an attribute was found before the end of the document.
 	 */
 	private function parse_next_attribute(): bool {
-		$doc_length = strlen( $this->html );
+		$html       = $this->html;
+		$doc_length = strlen( $html );
 
 		// Skip whitespace and slashes.
-		$this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n/", $this->bytes_already_parsed );
+		$this->bytes_already_parsed += strspn( $html, " \t\f\r\n/", $this->bytes_already_parsed );
 		if ( $this->bytes_already_parsed >= $doc_length ) {
 			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 
@@ -2146,9 +2147,9 @@ private function parse_next_attribute(): bool {
 		 *
 		 * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
 		 */
-		$name_length = '=' === $this->html[ $this->bytes_already_parsed ]
-			? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed + 1 )
-			: strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed );
+		$name_length = '=' === $html[ $this->bytes_already_parsed ]
+			? 1 + strcspn( $html, "=/> \t\f\r\n", $this->bytes_already_parsed + 1 )
+			: strcspn( $html, "=/> \t\f\r\n", $this->bytes_already_parsed );
 
 		// No attribute, just tag closer.
 		if ( 0 === $name_length || $this->bytes_already_parsed + $name_length >= $doc_length ) {
@@ -2156,7 +2157,7 @@ private function parse_next_attribute(): bool {
 		}
 
 		$attribute_start             = $this->bytes_already_parsed;
-		$attribute_name              = substr( $this->html, $attribute_start, $name_length );
+		$attribute_name              = substr( $html, $attribute_start, $name_length );
 		$this->bytes_already_parsed += $name_length;
 		if ( $this->bytes_already_parsed >= $doc_length ) {
 			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
@@ -2171,7 +2172,7 @@ private function parse_next_attribute(): bool {
 			return false;
 		}
 
-		$has_value = '=' === $this->html[ $this->bytes_already_parsed ];
+		$has_value = '=' === $html[ $this->bytes_already_parsed ];
 		if ( $has_value ) {
 			++$this->bytes_already_parsed;
 			$this->skip_whitespace();
@@ -2181,12 +2182,12 @@ private function parse_next_attribute(): bool {
 				return false;
 			}
 
-			switch ( $this->html[ $this->bytes_already_parsed ] ) {
+			switch ( $html[ $this->bytes_already_parsed ] ) {
 				case "'":
 				case '"':
-					$quote                      = $this->html[ $this->bytes_already_parsed ];
+					$quote                      = $html[ $this->bytes_already_parsed ];
 					$value_start                = $this->bytes_already_parsed + 1;
-					$end_quote_at               = strpos( $this->html, $quote, $value_start );
+					$end_quote_at               = strpos( $html, $quote, $value_start );
 					$end_quote_at               = false === $end_quote_at ? $doc_length : $end_quote_at;
 					$value_length               = $end_quote_at - $value_start;
 					$attribute_end              = $end_quote_at + 1;
@@ -2195,7 +2196,7 @@ private function parse_next_attribute(): bool {
 
 				default:
 					$value_start                = $this->bytes_already_parsed;
-					$value_length               = strcspn( $this->html, "> \t\f\r\n", $value_start );
+					$value_length               = strcspn( $html, "> \t\f\r\n", $value_start );
 					$attribute_end              = $value_start + $value_length;
 					$this->bytes_already_parsed = $attribute_end;
 			}

From 137d548b19d0558feabbd282b27b8510acd68840 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 14:53:34 +0100
Subject: [PATCH 03/43] Perf: Inline skip_whitespace() calls in
 parse_next_attribute()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace two skip_whitespace() method calls with inline strspn() using
the local $html variable. This avoids function call overhead and allows
PHP's JIT to optimize the entire parse_next_attribute() as one unit.

Massive improvement: 1105ms → 316ms (~71% faster).
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index fd9bef9d0b6e3..2172fef6e463b 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2165,7 +2165,7 @@ private function parse_next_attribute(): bool {
 			return false;
 		}
 
-		$this->skip_whitespace();
+		$this->bytes_already_parsed += strspn( $html, " \t\f\r\n", $this->bytes_already_parsed );
 		if ( $this->bytes_already_parsed >= $doc_length ) {
 			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 
@@ -2175,7 +2175,7 @@ private function parse_next_attribute(): bool {
 		$has_value = '=' === $html[ $this->bytes_already_parsed ];
 		if ( $has_value ) {
 			++$this->bytes_already_parsed;
-			$this->skip_whitespace();
+			$this->bytes_already_parsed += strspn( $html, " \t\f\r\n", $this->bytes_already_parsed );
 			if ( $this->bytes_already_parsed >= $doc_length ) {
 				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 

From a67da3a3827fe382cf4265c732b2a38f852896fa Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 14:54:52 +0100
Subject: [PATCH 04/43] Perf: Cache $this->html and strlen in
 base_class_next_token()

Avoid repeated property lookups and strlen() calls by caching in local
variables. Eliminates 2 strlen() calls and 1 property access per token.
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 2172fef6e463b..35156b57f29ed 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -969,7 +969,10 @@ private function base_class_next_token(): bool {
 		 */
 		$this->parser_state = self::STATE_READY;
 
-		if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
+		$html       = $this->html;
+		$doc_length = strlen( $html );
+
+		if ( $this->bytes_already_parsed >= $doc_length ) {
 			$this->parser_state = self::STATE_COMPLETE;
 			return false;
 		}
@@ -1005,7 +1008,7 @@ private function base_class_next_token(): bool {
 		// Ensure that the tag closes before the end of the document.
 		if (
 			self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
-			$this->bytes_already_parsed >= strlen( $this->html )
+			$this->bytes_already_parsed >= $doc_length
 		) {
 			// Does this appropriately clear state (parsed attributes)?
 			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
@@ -1014,7 +1017,7 @@ private function base_class_next_token(): bool {
 			return false;
 		}
 
-		$tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed );
+		$tag_ends_at = strpos( $html, '>', $this->bytes_already_parsed );
 		if ( false === $tag_ends_at ) {
 			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
 			$this->bytes_already_parsed = $was_at;

From 7ba66fd95f8a31a643353aeda098f5bcef99ead4 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 14:55:56 +0100
Subject: [PATCH 05/43] Perf: Skip update processing in after_tag() when no
 modifications queued
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Guard class_name_updates_to_attributes_updates() and lexical_updates
processing behind empty() checks. In read-only mode (the common case),
these arrays are always empty, avoiding unnecessary function calls.

~7% improvement (312ms → 291ms).
---
 .../html-api/class-wp-html-tag-processor.php  | 50 ++++++++++---------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 35156b57f29ed..6ff4f4531fd81 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2286,37 +2286,41 @@ private function after_tag(): void {
 		 * attributes across the two tags, lexical updates with names
 		 * need to be flushed to raw lexical updates.
 		 */
-		$this->class_name_updates_to_attributes_updates();
-
-		/*
-		 * Purge updates if there are too many. The actual count isn't
-		 * scientific, but a few values from 100 to a few thousand were
-		 * tests to find a practically-useful limit.
-		 *
-		 * If the update queue grows too big, then the Tag Processor
-		 * will spend more time iterating through them and lose the
-		 * efficiency gains of deferring applying them.
-		 */
-		if ( 1000 < count( $this->lexical_updates ) ) {
-			$this->get_updated_html();
+		if ( ! empty( $this->classname_updates ) ) {
+			$this->class_name_updates_to_attributes_updates();
 		}
 
-		foreach ( $this->lexical_updates as $name => $update ) {
+		if ( ! empty( $this->lexical_updates ) ) {
 			/*
-			 * Any updates appearing after the cursor should be applied
-			 * before proceeding, otherwise they may be overlooked.
+			 * Purge updates if there are too many. The actual count isn't
+			 * scientific, but a few values from 100 to a few thousand were
+			 * tests to find a practically-useful limit.
+			 *
+			 * If the update queue grows too big, then the Tag Processor
+			 * will spend more time iterating through them and lose the
+			 * efficiency gains of deferring applying them.
 			 */
-			if ( $update->start >= $this->bytes_already_parsed ) {
+			if ( 1000 < count( $this->lexical_updates ) ) {
 				$this->get_updated_html();
-				break;
 			}
 
-			if ( is_int( $name ) ) {
-				continue;
-			}
+			foreach ( $this->lexical_updates as $name => $update ) {
+				/*
+				 * Any updates appearing after the cursor should be applied
+				 * before proceeding, otherwise they may be overlooked.
+				 */
+				if ( $update->start >= $this->bytes_already_parsed ) {
+					$this->get_updated_html();
+					break;
+				}
 
-			$this->lexical_updates[] = $update;
-			unset( $this->lexical_updates[ $name ] );
+				if ( is_int( $name ) ) {
+					continue;
+				}
+
+				$this->lexical_updates[] = $update;
+				unset( $this->lexical_updates[ $name ] );
+			}
 		}
 
 		$this->token_starts_at          = null;

From 2ccfcdb03b8edd6587feceed280438094c0dc187 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 15:00:49 +0100
Subject: [PATCH 06/43] Perf: Use local $at variable in parse_next_attribute()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace ~15 $this->bytes_already_parsed property accesses with a local
$at variable, writing back once at the end. Local variable access is
significantly faster than object property access in PHP's VM.

~2.3% improvement (291ms → 284ms).
---
 .../html-api/class-wp-html-tag-processor.php  | 74 ++++++++++---------
 1 file changed, 41 insertions(+), 33 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 6ff4f4531fd81..1aa6c64a67523 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2135,11 +2135,13 @@ private function parse_next_tag(): bool {
 	private function parse_next_attribute(): bool {
 		$html       = $this->html;
 		$doc_length = strlen( $html );
+		$at         = $this->bytes_already_parsed;
 
 		// Skip whitespace and slashes.
-		$this->bytes_already_parsed += strspn( $html, " \t\f\r\n/", $this->bytes_already_parsed );
-		if ( $this->bytes_already_parsed >= $doc_length ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+		$at += strspn( $html, " \t\f\r\n/", $at );
+		if ( $at >= $doc_length ) {
+			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->bytes_already_parsed = $at;
 
 			return false;
 		}
@@ -2150,65 +2152,71 @@ private function parse_next_attribute(): bool {
 		 *
 		 * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
 		 */
-		$name_length = '=' === $html[ $this->bytes_already_parsed ]
-			? 1 + strcspn( $html, "=/> \t\f\r\n", $this->bytes_already_parsed + 1 )
-			: strcspn( $html, "=/> \t\f\r\n", $this->bytes_already_parsed );
+		$name_length = '=' === $html[ $at ]
+			? 1 + strcspn( $html, "=/> \t\f\r\n", $at + 1 )
+			: strcspn( $html, "=/> \t\f\r\n", $at );
 
 		// No attribute, just tag closer.
-		if ( 0 === $name_length || $this->bytes_already_parsed + $name_length >= $doc_length ) {
+		if ( 0 === $name_length || $at + $name_length >= $doc_length ) {
+			$this->bytes_already_parsed = $at;
 			return false;
 		}
 
-		$attribute_start             = $this->bytes_already_parsed;
-		$attribute_name              = substr( $html, $attribute_start, $name_length );
-		$this->bytes_already_parsed += $name_length;
-		if ( $this->bytes_already_parsed >= $doc_length ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+		$attribute_start = $at;
+		$attribute_name  = substr( $html, $attribute_start, $name_length );
+		$at             += $name_length;
+		if ( $at >= $doc_length ) {
+			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->bytes_already_parsed = $at;
 
 			return false;
 		}
 
-		$this->bytes_already_parsed += strspn( $html, " \t\f\r\n", $this->bytes_already_parsed );
-		if ( $this->bytes_already_parsed >= $doc_length ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+		$at += strspn( $html, " \t\f\r\n", $at );
+		if ( $at >= $doc_length ) {
+			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->bytes_already_parsed = $at;
 
 			return false;
 		}
 
-		$has_value = '=' === $html[ $this->bytes_already_parsed ];
+		$has_value = '=' === $html[ $at ];
 		if ( $has_value ) {
-			++$this->bytes_already_parsed;
-			$this->bytes_already_parsed += strspn( $html, " \t\f\r\n", $this->bytes_already_parsed );
-			if ( $this->bytes_already_parsed >= $doc_length ) {
-				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+			++$at;
+			$at += strspn( $html, " \t\f\r\n", $at );
+			if ( $at >= $doc_length ) {
+				$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+				$this->bytes_already_parsed = $at;
 
 				return false;
 			}
 
-			switch ( $html[ $this->bytes_already_parsed ] ) {
+			switch ( $html[ $at ] ) {
 				case "'":
 				case '"':
-					$quote                      = $html[ $this->bytes_already_parsed ];
-					$value_start                = $this->bytes_already_parsed + 1;
-					$end_quote_at               = strpos( $html, $quote, $value_start );
-					$end_quote_at               = false === $end_quote_at ? $doc_length : $end_quote_at;
-					$value_length               = $end_quote_at - $value_start;
-					$attribute_end              = $end_quote_at + 1;
-					$this->bytes_already_parsed = $attribute_end;
+					$quote         = $html[ $at ];
+					$value_start   = $at + 1;
+					$end_quote_at  = strpos( $html, $quote, $value_start );
+					$end_quote_at  = false === $end_quote_at ? $doc_length : $end_quote_at;
+					$value_length  = $end_quote_at - $value_start;
+					$attribute_end = $end_quote_at + 1;
+					$at            = $attribute_end;
 					break;
 
 				default:
-					$value_start                = $this->bytes_already_parsed;
-					$value_length               = strcspn( $html, "> \t\f\r\n", $value_start );
-					$attribute_end              = $value_start + $value_length;
-					$this->bytes_already_parsed = $attribute_end;
+					$value_start   = $at;
+					$value_length  = strcspn( $html, "> \t\f\r\n", $value_start );
+					$attribute_end = $value_start + $value_length;
+					$at            = $attribute_end;
 			}
 		} else {
-			$value_start   = $this->bytes_already_parsed;
+			$value_start   = $at;
 			$value_length  = 0;
 			$attribute_end = $attribute_start + $name_length;
 		}
 
+		$this->bytes_already_parsed = $at;
+
 		if ( $attribute_end >= $doc_length ) {
 			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 

From cf2abc6e815847218fe41bd8f58f93b8930cb2ae Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 15:02:57 +0100
Subject: [PATCH 07/43] restore trunk

---
 .../html-api/class-wp-html-tag-processor.php  | 138 ++++++++----------
 1 file changed, 62 insertions(+), 76 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 1aa6c64a67523..8397ecf520fa2 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -969,10 +969,7 @@ private function base_class_next_token(): bool {
 		 */
 		$this->parser_state = self::STATE_READY;
 
-		$html       = $this->html;
-		$doc_length = strlen( $html );
-
-		if ( $this->bytes_already_parsed >= $doc_length ) {
+		if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
 			$this->parser_state = self::STATE_COMPLETE;
 			return false;
 		}
@@ -1008,7 +1005,7 @@ private function base_class_next_token(): bool {
 		// Ensure that the tag closes before the end of the document.
 		if (
 			self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
-			$this->bytes_already_parsed >= $doc_length
+			$this->bytes_already_parsed >= strlen( $this->html )
 		) {
 			// Does this appropriately clear state (parsed attributes)?
 			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
@@ -1017,7 +1014,7 @@ private function base_class_next_token(): bool {
 			return false;
 		}
 
-		$tag_ends_at = strpos( $html, '>', $this->bytes_already_parsed );
+		$tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed );
 		if ( false === $tag_ends_at ) {
 			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
 			$this->bytes_already_parsed = $was_at;
@@ -1713,6 +1710,8 @@ private function skip_script_data(): bool {
 	 * @return bool Whether a tag was found before the end of the document.
 	 */
 	private function parse_next_tag(): bool {
+		$this->after_tag();
+
 		$html       = $this->html;
 		$doc_length = strlen( $html );
 		$was_at     = $this->bytes_already_parsed;
@@ -2133,15 +2132,12 @@ private function parse_next_tag(): bool {
 	 * @return bool Whether an attribute was found before the end of the document.
 	 */
 	private function parse_next_attribute(): bool {
-		$html       = $this->html;
-		$doc_length = strlen( $html );
-		$at         = $this->bytes_already_parsed;
+		$doc_length = strlen( $this->html );
 
 		// Skip whitespace and slashes.
-		$at += strspn( $html, " \t\f\r\n/", $at );
-		if ( $at >= $doc_length ) {
-			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
-			$this->bytes_already_parsed = $at;
+		$this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n/", $this->bytes_already_parsed );
+		if ( $this->bytes_already_parsed >= $doc_length ) {
+			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 
 			return false;
 		}
@@ -2152,71 +2148,65 @@ private function parse_next_attribute(): bool {
 		 *
 		 * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
 		 */
-		$name_length = '=' === $html[ $at ]
-			? 1 + strcspn( $html, "=/> \t\f\r\n", $at + 1 )
-			: strcspn( $html, "=/> \t\f\r\n", $at );
+		$name_length = '=' === $this->html[ $this->bytes_already_parsed ]
+			? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed + 1 )
+			: strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed );
 
 		// No attribute, just tag closer.
-		if ( 0 === $name_length || $at + $name_length >= $doc_length ) {
-			$this->bytes_already_parsed = $at;
+		if ( 0 === $name_length || $this->bytes_already_parsed + $name_length >= $doc_length ) {
 			return false;
 		}
 
-		$attribute_start = $at;
-		$attribute_name  = substr( $html, $attribute_start, $name_length );
-		$at             += $name_length;
-		if ( $at >= $doc_length ) {
-			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
-			$this->bytes_already_parsed = $at;
+		$attribute_start             = $this->bytes_already_parsed;
+		$attribute_name              = substr( $this->html, $attribute_start, $name_length );
+		$this->bytes_already_parsed += $name_length;
+		if ( $this->bytes_already_parsed >= $doc_length ) {
+			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 
 			return false;
 		}
 
-		$at += strspn( $html, " \t\f\r\n", $at );
-		if ( $at >= $doc_length ) {
-			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
-			$this->bytes_already_parsed = $at;
+		$this->skip_whitespace();
+		if ( $this->bytes_already_parsed >= $doc_length ) {
+			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 
 			return false;
 		}
 
-		$has_value = '=' === $html[ $at ];
+		$has_value = '=' === $this->html[ $this->bytes_already_parsed ];
 		if ( $has_value ) {
-			++$at;
-			$at += strspn( $html, " \t\f\r\n", $at );
-			if ( $at >= $doc_length ) {
-				$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
-				$this->bytes_already_parsed = $at;
+			++$this->bytes_already_parsed;
+			$this->skip_whitespace();
+			if ( $this->bytes_already_parsed >= $doc_length ) {
+				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 
 				return false;
 			}
 
-			switch ( $html[ $at ] ) {
+			switch ( $this->html[ $this->bytes_already_parsed ] ) {
 				case "'":
 				case '"':
-					$quote         = $html[ $at ];
-					$value_start   = $at + 1;
-					$end_quote_at  = strpos( $html, $quote, $value_start );
-					$end_quote_at  = false === $end_quote_at ? $doc_length : $end_quote_at;
-					$value_length  = $end_quote_at - $value_start;
-					$attribute_end = $end_quote_at + 1;
-					$at            = $attribute_end;
+					$quote                      = $this->html[ $this->bytes_already_parsed ];
+					$value_start                = $this->bytes_already_parsed + 1;
+					$end_quote_at               = strpos( $this->html, $quote, $value_start );
+					$end_quote_at               = false === $end_quote_at ? $doc_length : $end_quote_at;
+					$value_length               = $end_quote_at - $value_start;
+					$attribute_end              = $end_quote_at + 1;
+					$this->bytes_already_parsed = $attribute_end;
 					break;
 
 				default:
-					$value_start   = $at;
-					$value_length  = strcspn( $html, "> \t\f\r\n", $value_start );
-					$attribute_end = $value_start + $value_length;
-					$at            = $attribute_end;
+					$value_start                = $this->bytes_already_parsed;
+					$value_length               = strcspn( $this->html, "> \t\f\r\n", $value_start );
+					$attribute_end              = $value_start + $value_length;
+					$this->bytes_already_parsed = $attribute_end;
 			}
 		} else {
-			$value_start   = $at;
+			$value_start   = $this->bytes_already_parsed;
 			$value_length  = 0;
 			$attribute_end = $attribute_start + $name_length;
 		}
 
-		$this->bytes_already_parsed = $at;
-
 		if ( $attribute_end >= $doc_length ) {
 			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 
@@ -2294,41 +2284,37 @@ private function after_tag(): void {
 		 * attributes across the two tags, lexical updates with names
 		 * need to be flushed to raw lexical updates.
 		 */
-		if ( ! empty( $this->classname_updates ) ) {
-			$this->class_name_updates_to_attributes_updates();
+		$this->class_name_updates_to_attributes_updates();
+
+		/*
+		 * Purge updates if there are too many. The actual count isn't
+		 * scientific, but a few values from 100 to a few thousand were
+		 * tests to find a practically-useful limit.
+		 *
+		 * If the update queue grows too big, then the Tag Processor
+		 * will spend more time iterating through them and lose the
+		 * efficiency gains of deferring applying them.
+		 */
+		if ( 1000 < count( $this->lexical_updates ) ) {
+			$this->get_updated_html();
 		}
 
-		if ( ! empty( $this->lexical_updates ) ) {
+		foreach ( $this->lexical_updates as $name => $update ) {
 			/*
-			 * Purge updates if there are too many. The actual count isn't
-			 * scientific, but a few values from 100 to a few thousand were
-			 * tests to find a practically-useful limit.
-			 *
-			 * If the update queue grows too big, then the Tag Processor
-			 * will spend more time iterating through them and lose the
-			 * efficiency gains of deferring applying them.
+			 * Any updates appearing after the cursor should be applied
+			 * before proceeding, otherwise they may be overlooked.
 			 */
-			if ( 1000 < count( $this->lexical_updates ) ) {
+			if ( $update->start >= $this->bytes_already_parsed ) {
 				$this->get_updated_html();
+				break;
 			}
 
-			foreach ( $this->lexical_updates as $name => $update ) {
-				/*
-				 * Any updates appearing after the cursor should be applied
-				 * before proceeding, otherwise they may be overlooked.
-				 */
-				if ( $update->start >= $this->bytes_already_parsed ) {
-					$this->get_updated_html();
-					break;
-				}
-
-				if ( is_int( $name ) ) {
-					continue;
-				}
-
-				$this->lexical_updates[] = $update;
-				unset( $this->lexical_updates[ $name ] );
+			if ( is_int( $name ) ) {
+				continue;
 			}
+
+			$this->lexical_updates[] = $update;
+			unset( $this->lexical_updates[ $name ] );
 		}
 
 		$this->token_starts_at          = null;

From a2b1ed467921e68d801f2d443da498c1770d05af Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 15:03:42 +0100
Subject: [PATCH 08/43] update benches

---
 bench.php              | 14 ++++++++++++++
 bootstrap-html-api.php | 22 ----------------------
 2 files changed, 14 insertions(+), 22 deletions(-)
 create mode 100755 bench.php

diff --git a/bench.php b/bench.php
new file mode 100755
index 0000000000000..6279051835a55
--- /dev/null
+++ b/bench.php
@@ -0,0 +1,14 @@
+#!/usr/bin/env php
+<?php
+require_once __DIR__ . '/bootstrap-html-api.php';
+$html = file_get_contents( dirname( __DIR__ ) . '/bench-html-api/tests/benchmarks/data/html-standard.html' );
+
+$p = new WP_HTML_Tag_Processor( $html );
+while ( $p->next_token() ) {
+}
+$p = new WP_HTML_Tag_Processor( $html );
+while ( $p->next_token() ) {
+}
+$p = new WP_HTML_Tag_Processor( $html );
+while ( $p->next_token() ) {
+}
diff --git a/bootstrap-html-api.php b/bootstrap-html-api.php
index 0af11e44fcdf7..aa9ac94e2689a 100644
--- a/bootstrap-html-api.php
+++ b/bootstrap-html-api.php
@@ -1,7 +1,5 @@
 <?php
 
-// require_once '/Users/jonsurrell/jon/plugin-sirreal-dev/_require_.php';
-
 require_once __DIR__ . '/src/wp-includes/compat.php';
 require_once __DIR__ . '/src/wp-includes/utf8.php';
 require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-doctype-info.php';
@@ -23,26 +21,6 @@
 require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-processor-state.php';
 require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-processor.php';
 
-
-// HTML Templating #60229
-if ( file_exists( __DIR__ . '/src/wp-includes/html-api/class-wp-html-template.php' ) ) {
-	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-html-template.php';
-}
-
-// CSS Processor
-if ( file_exists( __DIR__ . '/src/wp-includes/html-api/class-wp-css-complex-selector-list.php' ) ) {
-	require_once __DIR__ . '/src/wp-includes/html-api/interface-wp-css-html-tag-processor-matcher.php';
-	require_once __DIR__ . '/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php';
-	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-attribute-selector.php';
-	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-class-selector.php';
-	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-id-selector.php';
-	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-type-selector.php';
-	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-compound-selector.php';
-	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-complex-selector.php';
-	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-compound-selector-list.php';
-	require_once __DIR__ . '/src/wp-includes/html-api/class-wp-css-complex-selector-list.php';
-}
-
 if ( ! function_exists( 'esc_attr' ) ) {
 	function esc_attr( $s ) {
 		return str_replace( array( '<', '>', '"' ), array( '&lt;', '&gt;', '&quot;' ), $s );

From b0847f3e9e7dbd46603d4c09347a2c4f4e1abd72 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 15:09:09 +0100
Subject: [PATCH 09/43] Perf: Remove redundant after_tag() call from
 parse_next_tag()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

parse_next_tag() is only called from base_class_next_token(), which
already calls after_tag() immediately before. The second call does
redundant work resetting state that was just reset.

852ms → 757ms (-11%)
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 8397ecf520fa2..087392363a014 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1710,8 +1710,6 @@ private function skip_script_data(): bool {
 	 * @return bool Whether a tag was found before the end of the document.
 	 */
 	private function parse_next_tag(): bool {
-		$this->after_tag();
-
 		$html       = $this->html;
 		$doc_length = strlen( $html );
 		$was_at     = $this->bytes_already_parsed;

From 0407016a8b21bd1ca78f738292ea2c92a54b8688 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 15:11:42 +0100
Subject: [PATCH 10/43] Perf: Skip update processing in after_tag() when no
 modifications queued
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For read-only tokenization, classname_updates and lexical_updates are
always empty. Guard the update processing with a count check to avoid
calling class_name_updates_to_attributes_updates() and iterating an
empty array.

757ms → 723ms (-4.5%)
---
 .../html-api/class-wp-html-tag-processor.php  | 62 ++++++++++---------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 087392363a014..e029e0abef57d 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2277,42 +2277,48 @@ private function skip_whitespace(): void {
 	 */
 	private function after_tag(): void {
 		/*
-		 * There could be lexical updates enqueued for an attribute that
-		 * also exists on the next tag. In order to avoid conflating the
-		 * attributes across the two tags, lexical updates with names
-		 * need to be flushed to raw lexical updates.
+		 * Skip update processing when no modifications are queued.
+		 * This is the common case for read-only tokenization.
 		 */
-		$this->class_name_updates_to_attributes_updates();
-
-		/*
-		 * Purge updates if there are too many. The actual count isn't
-		 * scientific, but a few values from 100 to a few thousand were
-		 * tests to find a practically-useful limit.
-		 *
-		 * If the update queue grows too big, then the Tag Processor
-		 * will spend more time iterating through them and lose the
-		 * efficiency gains of deferring applying them.
-		 */
-		if ( 1000 < count( $this->lexical_updates ) ) {
-			$this->get_updated_html();
-		}
+		if ( count( $this->classname_updates ) > 0 || count( $this->lexical_updates ) > 0 ) {
+			/*
+			 * There could be lexical updates enqueued for an attribute that
+			 * also exists on the next tag. In order to avoid conflating the
+			 * attributes across the two tags, lexical updates with names
+			 * need to be flushed to raw lexical updates.
+			 */
+			$this->class_name_updates_to_attributes_updates();
 
-		foreach ( $this->lexical_updates as $name => $update ) {
 			/*
-			 * Any updates appearing after the cursor should be applied
-			 * before proceeding, otherwise they may be overlooked.
+			 * Purge updates if there are too many. The actual count isn't
+			 * scientific, but a few values from 100 to a few thousand were
+			 * tests to find a practically-useful limit.
+			 *
+			 * If the update queue grows too big, then the Tag Processor
+			 * will spend more time iterating through them and lose the
+			 * efficiency gains of deferring applying them.
 			 */
-			if ( $update->start >= $this->bytes_already_parsed ) {
+			if ( 1000 < count( $this->lexical_updates ) ) {
 				$this->get_updated_html();
-				break;
 			}
 
-			if ( is_int( $name ) ) {
-				continue;
-			}
+			foreach ( $this->lexical_updates as $name => $update ) {
+				/*
+				 * Any updates appearing after the cursor should be applied
+				 * before proceeding, otherwise they may be overlooked.
+				 */
+				if ( $update->start >= $this->bytes_already_parsed ) {
+					$this->get_updated_html();
+					break;
+				}
 
-			$this->lexical_updates[] = $update;
-			unset( $this->lexical_updates[ $name ] );
+				if ( is_int( $name ) ) {
+					continue;
+				}
+
+				$this->lexical_updates[] = $update;
+				unset( $this->lexical_updates[ $name ] );
+			}
 		}
 
 		$this->token_starts_at          = null;

From bb92778d9b73cd25da612bd39d8d16607762f1e2 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 15:13:45 +0100
Subject: [PATCH 11/43] Perf: Cache $this->html and strlen in
 base_class_next_token()

Avoids repeated property lookups and strlen() calls in the main
tokenization loop.
---
 .../html-api/class-wp-html-tag-processor.php          | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index e029e0abef57d..a4d484888da3c 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -969,7 +969,10 @@ private function base_class_next_token(): bool {
 		 */
 		$this->parser_state = self::STATE_READY;
 
-		if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
+		$html       = $this->html;
+		$doc_length = strlen( $html );
+
+		if ( $this->bytes_already_parsed >= $doc_length ) {
 			$this->parser_state = self::STATE_COMPLETE;
 			return false;
 		}
@@ -1005,7 +1008,7 @@ private function base_class_next_token(): bool {
 		// Ensure that the tag closes before the end of the document.
 		if (
 			self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
-			$this->bytes_already_parsed >= strlen( $this->html )
+			$this->bytes_already_parsed >= $doc_length
 		) {
 			// Does this appropriately clear state (parsed attributes)?
 			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
@@ -1014,7 +1017,7 @@ private function base_class_next_token(): bool {
 			return false;
 		}
 
-		$tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed );
+		$tag_ends_at = strpos( $html, '>', $this->bytes_already_parsed );
 		if ( false === $tag_ends_at ) {
 			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
 			$this->bytes_already_parsed = $was_at;
@@ -1043,7 +1046,7 @@ private function base_class_next_token(): bool {
 		if (
 			$this->is_closing_tag ||
 			'html' !== $this->parsing_namespace ||
-			1 !== strspn( $this->html, 'iIlLnNpPsStTxX', $this->tag_name_starts_at, 1 )
+			1 !== strspn( $html, 'iIlLnNpPsStTxX', $this->tag_name_starts_at, 1 )
 		) {
 			return true;
 		}

From b1d7f0ad1484ef9fd376b1741d4e2b2b9c960ade Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 15:15:37 +0100
Subject: [PATCH 12/43] Perf: Use local $at and $html variables in
 parse_next_attribute()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace repeated $this->bytes_already_parsed and $this->html property
access with local variables. Also inlines skip_whitespace() calls to
avoid method call overhead and use the local $html variable.

728ms → 702ms (-3.6%)
---
 .../html-api/class-wp-html-tag-processor.php  | 81 +++++++++++--------
 1 file changed, 46 insertions(+), 35 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index a4d484888da3c..347d9dd7ae1fd 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2133,12 +2133,15 @@ private function parse_next_tag(): bool {
 	 * @return bool Whether an attribute was found before the end of the document.
 	 */
 	private function parse_next_attribute(): bool {
-		$doc_length = strlen( $this->html );
+		$html       = $this->html;
+		$doc_length = strlen( $html );
+		$at         = $this->bytes_already_parsed;
 
 		// Skip whitespace and slashes.
-		$this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n/", $this->bytes_already_parsed );
-		if ( $this->bytes_already_parsed >= $doc_length ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+		$at += strspn( $html, " \t\f\r\n/", $at );
+		if ( $at >= $doc_length ) {
+			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->bytes_already_parsed = $at;
 
 			return false;
 		}
@@ -2149,71 +2152,79 @@ private function parse_next_attribute(): bool {
 		 *
 		 * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
 		 */
-		$name_length = '=' === $this->html[ $this->bytes_already_parsed ]
-			? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed + 1 )
-			: strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed );
+		$name_length = '=' === $html[ $at ]
+			? 1 + strcspn( $html, "=/> \t\f\r\n", $at + 1 )
+			: strcspn( $html, "=/> \t\f\r\n", $at );
 
 		// No attribute, just tag closer.
-		if ( 0 === $name_length || $this->bytes_already_parsed + $name_length >= $doc_length ) {
+		if ( 0 === $name_length || $at + $name_length >= $doc_length ) {
 			return false;
 		}
 
-		$attribute_start             = $this->bytes_already_parsed;
-		$attribute_name              = substr( $this->html, $attribute_start, $name_length );
-		$this->bytes_already_parsed += $name_length;
-		if ( $this->bytes_already_parsed >= $doc_length ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+		$attribute_start = $at;
+		$attribute_name  = substr( $html, $attribute_start, $name_length );
+		$at             += $name_length;
+		if ( $at >= $doc_length ) {
+			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->bytes_already_parsed = $at;
 
 			return false;
 		}
 
-		$this->skip_whitespace();
-		if ( $this->bytes_already_parsed >= $doc_length ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+		// Skip whitespace.
+		$at += strspn( $html, " \t\f\r\n", $at );
+		if ( $at >= $doc_length ) {
+			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->bytes_already_parsed = $at;
 
 			return false;
 		}
 
-		$has_value = '=' === $this->html[ $this->bytes_already_parsed ];
+		$has_value = '=' === $html[ $at ];
 		if ( $has_value ) {
-			++$this->bytes_already_parsed;
-			$this->skip_whitespace();
-			if ( $this->bytes_already_parsed >= $doc_length ) {
-				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+			++$at;
+			// Skip whitespace.
+			$at += strspn( $html, " \t\f\r\n", $at );
+			if ( $at >= $doc_length ) {
+				$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+				$this->bytes_already_parsed = $at;
 
 				return false;
 			}
 
-			switch ( $this->html[ $this->bytes_already_parsed ] ) {
+			switch ( $html[ $at ] ) {
 				case "'":
 				case '"':
-					$quote                      = $this->html[ $this->bytes_already_parsed ];
-					$value_start                = $this->bytes_already_parsed + 1;
-					$end_quote_at               = strpos( $this->html, $quote, $value_start );
-					$end_quote_at               = false === $end_quote_at ? $doc_length : $end_quote_at;
-					$value_length               = $end_quote_at - $value_start;
-					$attribute_end              = $end_quote_at + 1;
-					$this->bytes_already_parsed = $attribute_end;
+					$quote        = $html[ $at ];
+					$value_start  = $at + 1;
+					$end_quote_at = strpos( $html, $quote, $value_start );
+					$end_quote_at = false === $end_quote_at ? $doc_length : $end_quote_at;
+					$value_length = $end_quote_at - $value_start;
+					$attribute_end = $end_quote_at + 1;
+					$at           = $attribute_end;
 					break;
 
 				default:
-					$value_start                = $this->bytes_already_parsed;
-					$value_length               = strcspn( $this->html, "> \t\f\r\n", $value_start );
-					$attribute_end              = $value_start + $value_length;
-					$this->bytes_already_parsed = $attribute_end;
+					$value_start   = $at;
+					$value_length  = strcspn( $html, "> \t\f\r\n", $value_start );
+					$attribute_end = $value_start + $value_length;
+					$at            = $attribute_end;
 			}
 		} else {
-			$value_start   = $this->bytes_already_parsed;
+			$value_start   = $at;
 			$value_length  = 0;
 			$attribute_end = $attribute_start + $name_length;
 		}
 
 		if ( $attribute_end >= $doc_length ) {
-			$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+			$this->bytes_already_parsed = $at;
 
 			return false;
 		}
 
+		$this->bytes_already_parsed = $at;
+
 		if ( $this->is_closing_tag ) {
 			return true;
 		}

From bad7d220236650d665f65f6dfc49fe8e3043521e Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 15:20:37 +0100
Subject: [PATCH 13/43] Perf: Defer substr() for attribute name until needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For closing tags, attribute names are parsed but never stored, so the
substr() and strtolower() calls are wasted. Move them after the
closing tag early-return.

702ms → 696ms
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 347d9dd7ae1fd..f95eaf16805d0 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2162,7 +2162,6 @@ private function parse_next_attribute(): bool {
 		}
 
 		$attribute_start = $at;
-		$attribute_name  = substr( $html, $attribute_start, $name_length );
 		$at             += $name_length;
 		if ( $at >= $doc_length ) {
 			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
@@ -2237,6 +2236,7 @@ private function parse_next_attribute(): bool {
 		 *
 		 * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive
 		 */
+		$attribute_name  = substr( $html, $attribute_start, $name_length );
 		$comparable_name = strtolower( $attribute_name );
 
 		// If an attribute is listed many times, only use the first declaration and ignore the rest.

From 1170fb6a7ea192c43ca9f08c7573acb1160b0c27 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:10:40 +0100
Subject: [PATCH 14/43] Perf: Replace per-attribute function call loop with
 skip_attributes_and_find_closer()

Replaces the parse_next_attribute(false) while loop + strpos('>') with a
single method call that scans past all attributes and finds the tag-closing
'>' in one pass. Eliminates N function call overheads per tag during
read-only tokenization.
---
 .../html-api/class-wp-html-tag-processor.php  | 171 ++++++++++++++++--
 1 file changed, 151 insertions(+), 20 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index f95eaf16805d0..ecbd1f7e5b9ad 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -682,6 +682,20 @@ class WP_HTML_Tag_Processor {
 	 */
 	private $is_closing_tag;
 
+	/**
+	 * Byte offset where attribute scanning should start for lazy parsing.
+	 *
+	 * @var int|null
+	 */
+	private $attribute_scan_from = null;
+
+	/**
+	 * Whether attributes have been parsed and stored for the current tag.
+	 *
+	 * @var bool
+	 */
+	private $attributes_parsed = true;
+
 	/**
 	 * Lazily-built index of attributes found within an HTML tag, keyed by the attribute name.
 	 *
@@ -1000,24 +1014,10 @@ private function base_class_next_token(): bool {
 			return true;
 		}
 
-		// Parse all of its attributes.
-		while ( $this->parse_next_attribute() ) {
-			continue;
-		}
-
-		// Ensure that the tag closes before the end of the document.
-		if (
-			self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
-			$this->bytes_already_parsed >= $doc_length
-		) {
-			// Does this appropriately clear state (parsed attributes)?
-			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
-			$this->bytes_already_parsed = $was_at;
-
-			return false;
-		}
-
-		$tag_ends_at = strpos( $html, '>', $this->bytes_already_parsed );
+		// Fast-scan past all attributes and find the tag-closing '>'.
+		$this->attribute_scan_from = $this->bytes_already_parsed;
+		$this->attributes_parsed   = false;
+		$tag_ends_at = $this->skip_attributes_and_find_closer( $html, $doc_length );
 		if ( false === $tag_ends_at ) {
 			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
 			$this->bytes_already_parsed = $was_at;
@@ -1082,6 +1082,7 @@ private function base_class_next_token(): bool {
 		$tag_name_starts_at   = $this->tag_name_starts_at;
 		$tag_name_length      = $this->tag_name_length;
 		$tag_ends_at          = $this->token_starts_at + $this->token_length;
+		$this->ensure_attributes_parsed();
 		$attributes           = $this->attributes;
 		$duplicate_attributes = $this->duplicate_attributes;
 
@@ -2132,7 +2133,7 @@ private function parse_next_tag(): bool {
 	 *
 	 * @return bool Whether an attribute was found before the end of the document.
 	 */
-	private function parse_next_attribute(): bool {
+	private function parse_next_attribute( bool $store = true ): bool {
 		$html       = $this->html;
 		$doc_length = strlen( $html );
 		$at         = $this->bytes_already_parsed;
@@ -2224,7 +2225,7 @@ private function parse_next_attribute(): bool {
 
 		$this->bytes_already_parsed = $at;
 
-		if ( $this->is_closing_tag ) {
+		if ( ! $store || $this->is_closing_tag ) {
 			return true;
 		}
 
@@ -2273,6 +2274,128 @@ private function parse_next_attribute(): bool {
 		return true;
 	}
 
+	/**
+	 * Skips past all attributes and finds the tag-closing '>'.
+	 *
+	 * This replaces the parse_next_attribute(false) loop + strpos('>')
+	 * with a single method call, eliminating per-attribute function call
+	 * overhead during read-only tokenization.
+	 *
+	 * @since 6.9.0
+	 * @ignore
+	 *
+	 * @param string $html       The HTML being parsed.
+	 * @param int    $doc_length Length of the HTML string.
+	 * @return int|false Position of the closing '>' or false if incomplete.
+	 */
+	private function skip_attributes_and_find_closer( string $html, int $doc_length ) {
+		$at = $this->bytes_already_parsed;
+
+		while ( true ) {
+			// Skip whitespace and slashes.
+			$at += strspn( $html, " \t\f\r\n/", $at );
+			if ( $at >= $doc_length ) {
+				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+				return false;
+			}
+
+			/*
+			 * Treat the equal sign as a part of the attribute
+			 * name if it is the first encountered byte.
+			 */
+			$name_length = '=' === $html[ $at ]
+				? 1 + strcspn( $html, "=/> \t\f\r\n", $at + 1 )
+				: strcspn( $html, "=/> \t\f\r\n", $at );
+
+			// No attribute name means we've reached the tag closer.
+			if ( 0 === $name_length ) {
+				return $at;
+			}
+
+			if ( $at + $name_length >= $doc_length ) {
+				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+				return false;
+			}
+
+			$at += $name_length;
+			if ( $at >= $doc_length ) {
+				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+				return false;
+			}
+
+			// Skip whitespace after attribute name.
+			$at += strspn( $html, " \t\f\r\n", $at );
+			if ( $at >= $doc_length ) {
+				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+				return false;
+			}
+
+			// No value, boolean attribute.
+			if ( '=' !== $html[ $at ] ) {
+				continue;
+			}
+
+			++$at;
+			// Skip whitespace after '='.
+			$at += strspn( $html, " \t\f\r\n", $at );
+			if ( $at >= $doc_length ) {
+				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+				return false;
+			}
+
+			switch ( $html[ $at ] ) {
+				case "'":
+				case '"':
+					$end_quote_at = strpos( $html, $html[ $at ], $at + 1 );
+					if ( false === $end_quote_at ) {
+						$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+						return false;
+					}
+					$at = $end_quote_at + 1;
+					break;
+
+				default:
+					$at += strcspn( $html, "> \t\f\r\n", $at );
+					break;
+			}
+
+			if ( $at >= $doc_length ) {
+				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+				return false;
+			}
+		}
+	}
+
+	/**
+	 * Ensures attributes have been parsed and stored for the current tag.
+	 *
+	 * Attributes are lazily parsed on first access to avoid the overhead
+	 * of creating WP_HTML_Attribute_Token objects when they aren't needed.
+	 *
+	 * @since 6.9.0
+	 * @ignore
+	 */
+	private function ensure_attributes_parsed(): void {
+		if ( $this->attributes_parsed ) {
+			return;
+		}
+
+		$this->attributes_parsed = true;
+
+		if ( null === $this->attribute_scan_from || $this->is_closing_tag ) {
+			return;
+		}
+
+		$saved_at                   = $this->bytes_already_parsed;
+		$this->bytes_already_parsed = $this->attribute_scan_from;
+
+		while ( $this->parse_next_attribute() ) {
+			continue;
+		}
+
+		$this->bytes_already_parsed = $saved_at;
+	}
+
 	/**
 	 * Move the internal cursor past any immediate successive whitespace.
 	 *
@@ -2346,6 +2469,8 @@ private function after_tag(): void {
 		$this->comment_type             = null;
 		$this->text_node_classification = self::TEXT_IS_GENERIC;
 		$this->duplicate_attributes     = null;
+		$this->attribute_scan_from      = null;
+		$this->attributes_parsed        = true;
 	}
 
 	/**
@@ -2363,6 +2488,7 @@ private function class_name_updates_to_attributes_updates(): void {
 			return;
 		}
 
+		$this->ensure_attributes_parsed();
 		$existing_class = $this->get_enqueued_attribute_value( 'class' );
 		if ( null === $existing_class || true === $existing_class ) {
 			$existing_class = '';
@@ -2790,6 +2916,7 @@ public function get_attribute( $name ) {
 			return null;
 		}
 
+		$this->ensure_attributes_parsed();
 		$comparable = strtolower( $name );
 
 		/*
@@ -2873,6 +3000,7 @@ public function get_attribute_names_with_prefix( $prefix ): ?array {
 			return null;
 		}
 
+		$this->ensure_attributes_parsed();
 		$comparable = strtolower( $prefix );
 
 		$matches = array();
@@ -4331,6 +4459,7 @@ public function set_attribute( $name, $value ): bool {
 			return false;
 		}
 
+		$this->ensure_attributes_parsed();
 		$name_length = strlen( $name );
 
 		/**
@@ -4482,6 +4611,8 @@ public function remove_attribute( $name ): bool {
 			return false;
 		}
 
+		$this->ensure_attributes_parsed();
+
 		/*
 		 * > There must never be two or more attributes on
 		 * > the same start tag whose names are an ASCII

From 635378f477787030e7dddb4e03d0a9378d4df04b Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:13:28 +0100
Subject: [PATCH 15/43] Perf: Minor hot-path optimizations in tag processor

- Use local $html variable instead of $this->html for array access in parse_next_tag()
- Avoid property read for text_length by computing $at - $was_at directly
- Use truthiness check instead of count() > 0 for empty array detection in after_tag()
- Move attributes/duplicate_attributes reset from after_tag() to ensure_attributes_parsed()
  to avoid empty array allocation on every token during read-only tokenization
---
 .../html-api/class-wp-html-tag-processor.php       | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index ecbd1f7e5b9ad..13d7e18290179 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1745,16 +1745,16 @@ private function parse_next_tag(): bool {
 
 				$this->parser_state         = self::STATE_TEXT_NODE;
 				$this->token_starts_at      = $was_at;
-				$this->token_length         = $at - $was_at;
 				$this->text_starts_at       = $was_at;
-				$this->text_length          = $this->token_length;
+				$this->token_length         = $at - $was_at;
+				$this->text_length          = $at - $was_at;
 				$this->bytes_already_parsed = $at;
 				return true;
 			}
 
 			$this->token_starts_at = $at;
 
-			if ( $at + 1 < $doc_length && '/' === $this->html[ $at + 1 ] ) {
+			if ( $at + 1 < $doc_length && '/' === $html[ $at + 1 ] ) {
 				$this->is_closing_tag = true;
 				++$at;
 			} else {
@@ -2380,7 +2380,9 @@ private function ensure_attributes_parsed(): void {
 			return;
 		}
 
-		$this->attributes_parsed = true;
+		$this->attributes_parsed    = true;
+		$this->attributes           = array();
+		$this->duplicate_attributes = null;
 
 		if ( null === $this->attribute_scan_from || $this->is_closing_tag ) {
 			return;
@@ -2417,7 +2419,7 @@ private function after_tag(): void {
 		 * Skip update processing when no modifications are queued.
 		 * This is the common case for read-only tokenization.
 		 */
-		if ( count( $this->classname_updates ) > 0 || count( $this->lexical_updates ) > 0 ) {
+		if ( $this->classname_updates || $this->lexical_updates ) {
 			/*
 			 * There could be lexical updates enqueued for an attribute that
 			 * also exists on the next tag. In order to avoid conflating the
@@ -2465,10 +2467,8 @@ private function after_tag(): void {
 		$this->text_starts_at           = 0;
 		$this->text_length              = 0;
 		$this->is_closing_tag           = null;
-		$this->attributes               = array();
 		$this->comment_type             = null;
 		$this->text_node_classification = self::TEXT_IS_GENERIC;
-		$this->duplicate_attributes     = null;
 		$this->attribute_scan_from      = null;
 		$this->attributes_parsed        = true;
 	}

From 5b90c801e36924307cf3981fa4f00a54ecaee9c6 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:19:09 +0100
Subject: [PATCH 16/43] Perf: Cache doc_length as instance variable to avoid
 repeated strlen() calls

---
 .../html-api/class-wp-html-tag-processor.php  | 30 +++++++++++++------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 13d7e18290179..043d53ba1a5cc 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -439,6 +439,16 @@ class WP_HTML_Tag_Processor {
 	 */
 	protected $html;
 
+	/**
+	 * Cached byte length of the HTML string.
+	 *
+	 * Updated whenever $this->html is set to avoid repeated strlen() calls.
+	 *
+	 * @since 6.9.0
+	 * @var int
+	 */
+	private $doc_length = 0;
+
 	/**
 	 * The last query passed to next_tag().
 	 *
@@ -856,7 +866,8 @@ public function __construct( $html ) {
 			);
 			$html = '';
 		}
-		$this->html = $html;
+		$this->html       = $html;
+		$this->doc_length = strlen( $html );
 	}
 
 	/**
@@ -984,7 +995,7 @@ private function base_class_next_token(): bool {
 		$this->parser_state = self::STATE_READY;
 
 		$html       = $this->html;
-		$doc_length = strlen( $html );
+		$doc_length = $this->doc_length;
 
 		if ( $this->bytes_already_parsed >= $doc_length ) {
 			$this->parser_state = self::STATE_COMPLETE;
@@ -1416,7 +1427,7 @@ private function skip_rawtext( string $tag_name ): bool {
 	 */
 	private function skip_rcdata( string $tag_name ): bool {
 		$html       = $this->html;
-		$doc_length = strlen( $html );
+		$doc_length = $this->doc_length;
 		$tag_length = strlen( $tag_name );
 
 		$at = $this->bytes_already_parsed;
@@ -1453,7 +1464,7 @@ private function skip_rcdata( string $tag_name ): bool {
 			$at                        += $tag_length;
 			$this->bytes_already_parsed = $at;
 
-			if ( $at >= strlen( $html ) ) {
+			if ( $at >= $doc_length ) {
 				return false;
 			}
 
@@ -1506,7 +1517,7 @@ private function skip_rcdata( string $tag_name ): bool {
 	private function skip_script_data(): bool {
 		$state      = 'unescaped';
 		$html       = $this->html;
-		$doc_length = strlen( $html );
+		$doc_length = $this->doc_length;
 		$at         = $this->bytes_already_parsed;
 
 		while ( false !== $at && $at < $doc_length ) {
@@ -1715,7 +1726,7 @@ private function skip_script_data(): bool {
 	 */
 	private function parse_next_tag(): bool {
 		$html       = $this->html;
-		$doc_length = strlen( $html );
+		$doc_length = $this->doc_length;
 		$was_at     = $this->bytes_already_parsed;
 		$at         = $was_at;
 
@@ -1912,7 +1923,7 @@ private function parse_next_tag(): bool {
 
 				if (
 					'html' !== $this->parsing_namespace &&
-					strlen( $html ) > $at + 8 &&
+					$doc_length > $at + 8 &&
 					'[' === $html[ $at + 2 ] &&
 					'C' === $html[ $at + 3 ] &&
 					'D' === $html[ $at + 4 ] &&
@@ -2135,7 +2146,7 @@ private function parse_next_tag(): bool {
 	 */
 	private function parse_next_attribute( bool $store = true ): bool {
 		$html       = $this->html;
-		$doc_length = strlen( $html );
+		$doc_length = $this->doc_length;
 		$at         = $this->bytes_already_parsed;
 
 		// Skip whitespace and slashes.
@@ -2687,7 +2698,8 @@ private function apply_attributes_updates( int $shift_this_point ): int {
 			$bytes_already_copied = $diff->start + $diff->length;
 		}
 
-		$this->html = $output_buffer . substr( $this->html, $bytes_already_copied );
+		$this->html       = $output_buffer . substr( $this->html, $bytes_already_copied );
+		$this->doc_length = strlen( $this->html );
 
 		/*
 		 * Adjust bookmark locations to account for how the text

From 05f63144a935c998f22f898ee8807b461a18fcef Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:21:12 +0100
Subject: [PATCH 17/43] Perf: Add fast path in
 skip_attributes_and_find_closer() for immediate '>'

Tags without attributes (closing tags, simple void tags) have '>' immediately
after the tag name. A direct byte check before entering the attribute-scanning
loop avoids strspn/strcspn overhead for these common cases.
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 043d53ba1a5cc..6027cd8604404 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2302,6 +2302,11 @@ private function parse_next_attribute( bool $store = true ): bool {
 	private function skip_attributes_and_find_closer( string $html, int $doc_length ) {
 		$at = $this->bytes_already_parsed;
 
+		// Fast path: '>' immediately after tag name (closing tags, void tags).
+		if ( $at < $doc_length && '>' === $html[ $at ] ) {
+			return $at;
+		}
+
 		while ( true ) {
 			// Skip whitespace and slashes.
 			$at += strspn( $html, " \t\f\r\n/", $at );

From 087f0da9410b1fcde294a9231e775e1744972f41 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:22:33 +0100
Subject: [PATCH 18/43] Perf: Replace strspn single-char checks with direct
 byte comparisons in parse_next_tag()

Replace strspn() calls used for single-character validation with direct
character range comparisons. Also simplify tag name length calculation
to use a single strcspn() instead of strspn() + strcspn(), since the
first-character alpha check is now done via direct comparison.
---
 .../html-api/class-wp-html-tag-processor.php         | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 6027cd8604404..7d9577540b7c6 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1749,7 +1749,11 @@ private function parse_next_tag(): bool {
 				 *
 				 * @see https://html.spec.whatwg.org/#tag-open-state
 				 */
-				if ( 1 !== strspn( $html, '!/?abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1, 1 ) ) {
+				$next_byte = $html[ $at + 1 ] ?? '';
+				if (
+					'!' !== $next_byte && '/' !== $next_byte && '?' !== $next_byte &&
+					( $next_byte < 'A' || ( $next_byte > 'Z' && $next_byte < 'a' ) || $next_byte > 'z' )
+				) {
 					++$at;
 					continue;
 				}
@@ -1786,12 +1790,12 @@ private function parse_next_tag(): bool {
 			 * * https://html.spec.whatwg.org/multipage/parsing.html#data-state
 			 * * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
 			 */
-			$tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 );
-			if ( $tag_name_prefix_length > 0 ) {
+			$first_char = $html[ $at + 1 ] ?? '';
+			if ( ( $first_char >= 'a' && $first_char <= 'z' ) || ( $first_char >= 'A' && $first_char <= 'Z' ) ) {
 				++$at;
 				$this->parser_state         = self::STATE_MATCHED_TAG;
 				$this->tag_name_starts_at   = $at;
-				$this->tag_name_length      = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
+				$this->tag_name_length      = strcspn( $html, " \t\f\r\n/>", $at );
 				$this->bytes_already_parsed = $at + $this->tag_name_length;
 				return true;
 			}

From 7d1ab37f96312caadb36988a033d75d5f065705f Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:24:30 +0100
Subject: [PATCH 19/43] Perf: Inline after_tag() into base_class_next_token()
 and reduce property resets

Inline the single-callsite after_tag() method into base_class_next_token()
to eliminate method call overhead in the hot tokenization loop. Also remove
4 property resets (token_starts_at, token_length, is_closing_tag, comment_type)
that are always overwritten before use or guarded by parser_state checks.
---
 .../html-api/class-wp-html-tag-processor.php  | 36 ++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 7d9577540b7c6..3a07c5cd0381d 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -978,7 +978,41 @@ public function next_token(): bool {
 	 */
 	private function base_class_next_token(): bool {
 		$was_at = $this->bytes_already_parsed;
-		$this->after_tag();
+
+		/*
+		 * Apply attribute updates and clean up the previous tag.
+		 * Inlined from after_tag() to avoid method call overhead
+		 * in the hot tokenization loop.
+		 */
+		if ( $this->classname_updates || $this->lexical_updates ) {
+			$this->class_name_updates_to_attributes_updates();
+
+			if ( 1000 < count( $this->lexical_updates ) ) {
+				$this->get_updated_html();
+			}
+
+			foreach ( $this->lexical_updates as $name => $update ) {
+				if ( $update->start >= $this->bytes_already_parsed ) {
+					$this->get_updated_html();
+					break;
+				}
+
+				if ( is_int( $name ) ) {
+					continue;
+				}
+
+				$this->lexical_updates[] = $update;
+				unset( $this->lexical_updates[ $name ] );
+			}
+		}
+
+		$this->tag_name_starts_at       = null;
+		$this->tag_name_length          = null;
+		$this->text_starts_at           = 0;
+		$this->text_length              = 0;
+		$this->text_node_classification = self::TEXT_IS_GENERIC;
+		$this->attribute_scan_from      = null;
+		$this->attributes_parsed        = true;
 
 		// Don't proceed if there's nothing more to scan.
 		if (

From f05977567656f7306b63bcb28a23d46f6dfa1487 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:27:05 +0100
Subject: [PATCH 20/43] Perf: Inline common fast paths for text nodes and
 regular tags

Inline the most common token paths (text nodes and regular tags) directly
into base_class_next_token(), eliminating the parse_next_tag() function
call for ~95% of tokens. Complex tokens (comments, DOCTYPE, CDATA) still
use the full parse_next_tag() method.

Also integrates the skip_attributes_and_find_closer fast path for tags
with '>' immediately after the tag name, avoiding an additional method
call for closing tags and attribute-less tags.
---
 .../html-api/class-wp-html-tag-processor.php  | 111 +++++++++++++++---
 1 file changed, 97 insertions(+), 14 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 3a07c5cd0381d..ee511276618c4 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1031,12 +1031,103 @@ private function base_class_next_token(): bool {
 		$html       = $this->html;
 		$doc_length = $this->doc_length;
 
-		if ( $this->bytes_already_parsed >= $doc_length ) {
+		$at = $this->bytes_already_parsed;
+
+		if ( $at >= $doc_length ) {
 			$this->parser_state = self::STATE_COMPLETE;
 			return false;
 		}
 
-		// Find the next tag if it exists.
+		/*
+		 * Fast path: handle the two most common token types inline.
+		 *
+		 *  1. Text nodes: text between tags (strpos finds next '<').
+		 *  2. Regular tags: '<' followed by alpha or '/'+alpha.
+		 *
+		 * Complex tokens (comments, DOCTYPE, CDATA, etc.) fall through
+		 * to the full parse_next_tag() method.
+		 */
+		$at = strpos( $html, '<', $at );
+
+		// No '<' found: the rest of the document is a text node.
+		if ( false === $at ) {
+			$this->parser_state         = self::STATE_TEXT_NODE;
+			$this->token_starts_at      = $was_at;
+			$this->text_starts_at       = $was_at;
+			$this->token_length         = $doc_length - $was_at;
+			$this->text_length          = $doc_length - $was_at;
+			$this->bytes_already_parsed = $doc_length;
+			return true;
+		}
+
+		// Text before the '<': return it as a text node.
+		if ( $at > $was_at ) {
+			$next_byte = $html[ $at + 1 ] ?? '';
+			if (
+				'!' !== $next_byte && '/' !== $next_byte && '?' !== $next_byte &&
+				( $next_byte < 'A' || ( $next_byte > 'Z' && $next_byte < 'a' ) || $next_byte > 'z' )
+			) {
+				/*
+				 * The '<' doesn't start a valid token. Fall through to
+				 * the full parse_next_tag() which handles continuation.
+				 */
+				goto full_parse;
+			}
+
+			$this->parser_state         = self::STATE_TEXT_NODE;
+			$this->token_starts_at      = $was_at;
+			$this->text_starts_at       = $was_at;
+			$this->token_length         = $at - $was_at;
+			$this->text_length          = $at - $was_at;
+			$this->bytes_already_parsed = $at;
+			return true;
+		}
+
+		// At '<': try to match a regular tag.
+		$first_char = $html[ $at + 1 ] ?? '';
+		$is_closer  = '/' === $first_char;
+		if ( $is_closer ) {
+			$first_char = $html[ $at + 2 ] ?? '';
+		}
+
+		if ( ( $first_char >= 'a' && $first_char <= 'z' ) || ( $first_char >= 'A' && $first_char <= 'Z' ) ) {
+			$tag_at = $at + 1 + ( $is_closer ? 1 : 0 );
+
+			$this->token_starts_at    = $at;
+			$this->is_closing_tag     = $is_closer;
+			$this->parser_state       = self::STATE_MATCHED_TAG;
+			$this->tag_name_starts_at = $tag_at;
+			$this->tag_name_length    = strcspn( $html, " \t\f\r\n/>", $tag_at );
+
+			$after_name = $tag_at + $this->tag_name_length;
+
+			// Fast-scan past all attributes and find the tag-closing '>'.
+			$this->attribute_scan_from = $after_name;
+			$this->attributes_parsed   = false;
+
+			// Fast path: '>' immediately after tag name.
+			if ( $after_name < $doc_length && '>' === $html[ $after_name ] ) {
+				$tag_ends_at = $after_name;
+			} else {
+				$this->bytes_already_parsed = $after_name;
+				$tag_ends_at = $this->skip_attributes_and_find_closer( $html, $doc_length );
+				if ( false === $tag_ends_at ) {
+					$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
+					$this->bytes_already_parsed = $was_at;
+					return false;
+				}
+			}
+
+			$this->parser_state         = self::STATE_MATCHED_TAG;
+			$this->bytes_already_parsed = $tag_ends_at + 1;
+			$this->token_length         = $this->bytes_already_parsed - $at;
+
+			goto after_tag_match;
+		}
+
+		// Complex token: fall through to full parse_next_tag().
+		full_parse:
+
 		if ( false === $this->parse_next_tag() ) {
 			if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
 				$this->bytes_already_parsed = $was_at;
@@ -1045,21 +1136,11 @@ private function base_class_next_token(): bool {
 			return false;
 		}
 
-		/*
-		 * For legacy reasons the rest of this function handles tags and their
-		 * attributes. If the processor has reached the end of the document
-		 * or if it matched any other token then it should return here to avoid
-		 * attempting to process tag-specific syntax.
-		 */
-		if (
-			self::STATE_INCOMPLETE_INPUT !== $this->parser_state &&
-			self::STATE_COMPLETE !== $this->parser_state &&
-			self::STATE_MATCHED_TAG !== $this->parser_state
-		) {
+		if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
 			return true;
 		}
 
-		// Fast-scan past all attributes and find the tag-closing '>'.
+		// Tag found by parse_next_tag — scan attributes.
 		$this->attribute_scan_from = $this->bytes_already_parsed;
 		$this->attributes_parsed   = false;
 		$tag_ends_at = $this->skip_attributes_and_find_closer( $html, $doc_length );
@@ -1073,6 +1154,8 @@ private function base_class_next_token(): bool {
 		$this->bytes_already_parsed = $tag_ends_at + 1;
 		$this->token_length         = $this->bytes_already_parsed - $this->token_starts_at;
 
+		after_tag_match:
+
 		/*
 		 * Certain tags require additional processing. The first-letter pre-check
 		 * avoids unnecessary string allocation when comparing the tag names.

From 7277e2e5e5018623afc2b922ba8aa5ce534eac11 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:30:20 +0100
Subject: [PATCH 21/43] Perf: Defer property resets to type-specific return
 paths in base_class_next_token()

Instead of resetting all token properties at the top of every iteration,
only reset the properties relevant to each token type at its return point.
This eliminates ~3.4M unnecessary property writes per benchmark run.
---
 .../html-api/class-wp-html-tag-processor.php  | 84 ++++++++++---------
 1 file changed, 45 insertions(+), 39 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index ee511276618c4..24684698c3186 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1006,14 +1006,6 @@ private function base_class_next_token(): bool {
 			}
 		}
 
-		$this->tag_name_starts_at       = null;
-		$this->tag_name_length          = null;
-		$this->text_starts_at           = 0;
-		$this->text_length              = 0;
-		$this->text_node_classification = self::TEXT_IS_GENERIC;
-		$this->attribute_scan_from      = null;
-		$this->attributes_parsed        = true;
-
 		// Don't proceed if there's nothing more to scan.
 		if (
 			self::STATE_COMPLETE === $this->parser_state ||
@@ -1022,12 +1014,6 @@ private function base_class_next_token(): bool {
 			return false;
 		}
 
-		/*
-		 * The next step in the parsing loop determines the parsing state;
-		 * clear it so that state doesn't linger from the previous step.
-		 */
-		$this->parser_state = self::STATE_READY;
-
 		$html       = $this->html;
 		$doc_length = $this->doc_length;
 
@@ -1051,12 +1037,16 @@ private function base_class_next_token(): bool {
 
 		// No '<' found: the rest of the document is a text node.
 		if ( false === $at ) {
-			$this->parser_state         = self::STATE_TEXT_NODE;
-			$this->token_starts_at      = $was_at;
-			$this->text_starts_at       = $was_at;
-			$this->token_length         = $doc_length - $was_at;
-			$this->text_length          = $doc_length - $was_at;
-			$this->bytes_already_parsed = $doc_length;
+			$this->parser_state             = self::STATE_TEXT_NODE;
+			$this->token_starts_at          = $was_at;
+			$this->text_starts_at           = $was_at;
+			$this->token_length             = $doc_length - $was_at;
+			$this->text_length              = $doc_length - $was_at;
+			$this->tag_name_starts_at       = null;
+			$this->tag_name_length          = null;
+			$this->text_node_classification = self::TEXT_IS_GENERIC;
+			$this->attributes_parsed        = true;
+			$this->bytes_already_parsed     = $doc_length;
 			return true;
 		}
 
@@ -1074,12 +1064,16 @@ private function base_class_next_token(): bool {
 				goto full_parse;
 			}
 
-			$this->parser_state         = self::STATE_TEXT_NODE;
-			$this->token_starts_at      = $was_at;
-			$this->text_starts_at       = $was_at;
-			$this->token_length         = $at - $was_at;
-			$this->text_length          = $at - $was_at;
-			$this->bytes_already_parsed = $at;
+			$this->parser_state             = self::STATE_TEXT_NODE;
+			$this->token_starts_at          = $was_at;
+			$this->text_starts_at           = $was_at;
+			$this->token_length             = $at - $was_at;
+			$this->text_length              = $at - $was_at;
+			$this->tag_name_starts_at       = null;
+			$this->tag_name_length          = null;
+			$this->text_node_classification = self::TEXT_IS_GENERIC;
+			$this->attributes_parsed        = true;
+			$this->bytes_already_parsed     = $at;
 			return true;
 		}
 
@@ -1091,19 +1085,19 @@ private function base_class_next_token(): bool {
 		}
 
 		if ( ( $first_char >= 'a' && $first_char <= 'z' ) || ( $first_char >= 'A' && $first_char <= 'Z' ) ) {
-			$tag_at = $at + 1 + ( $is_closer ? 1 : 0 );
-
-			$this->token_starts_at    = $at;
-			$this->is_closing_tag     = $is_closer;
-			$this->parser_state       = self::STATE_MATCHED_TAG;
-			$this->tag_name_starts_at = $tag_at;
-			$this->tag_name_length    = strcspn( $html, " \t\f\r\n/>", $tag_at );
-
-			$after_name = $tag_at + $this->tag_name_length;
-
-			// Fast-scan past all attributes and find the tag-closing '>'.
-			$this->attribute_scan_from = $after_name;
-			$this->attributes_parsed   = false;
+			$tag_at     = $at + 1 + ( $is_closer ? 1 : 0 );
+			$tag_length = strcspn( $html, " \t\f\r\n/>", $tag_at );
+			$after_name = $tag_at + $tag_length;
+
+			$this->token_starts_at          = $at;
+			$this->is_closing_tag           = $is_closer;
+			$this->tag_name_starts_at       = $tag_at;
+			$this->tag_name_length          = $tag_length;
+			$this->text_starts_at           = 0;
+			$this->text_length              = 0;
+			$this->text_node_classification = self::TEXT_IS_GENERIC;
+			$this->attribute_scan_from      = $after_name;
+			$this->attributes_parsed        = false;
 
 			// Fast path: '>' immediately after tag name.
 			if ( $after_name < $doc_length && '>' === $html[ $after_name ] ) {
@@ -1128,6 +1122,18 @@ private function base_class_next_token(): bool {
 		// Complex token: fall through to full parse_next_tag().
 		full_parse:
 
+		/*
+		 * Reset state for the full parse path.
+		 */
+		$this->parser_state             = self::STATE_READY;
+		$this->tag_name_starts_at       = null;
+		$this->tag_name_length          = null;
+		$this->text_starts_at           = 0;
+		$this->text_length              = 0;
+		$this->text_node_classification = self::TEXT_IS_GENERIC;
+		$this->attribute_scan_from      = null;
+		$this->attributes_parsed        = true;
+
 		if ( false === $this->parse_next_tag() ) {
 			if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
 				$this->bytes_already_parsed = $was_at;

From 0c9f3065e0d1f9e9da6fecabba480b9ba428f18d Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:35:41 +0100
Subject: [PATCH 22/43] Perf: Add tag name length filter before special element
 check

Special elements (SCRIPT, STYLE, TEXTAREA, etc.) have name lengths of
3, 5, 6, 7, or 8 characters. Adding a quick length check before calling
get_tag() avoids substr() + strtoupper() allocations for the many common
tags (p, li, span, div, section, etc.) that pass the first-letter check
but can never be special elements.
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 24684698c3186..cb9735086cd60 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1185,6 +1185,15 @@ private function base_class_next_token(): bool {
 			return true;
 		}
 
+		/*
+		 * Quick length filter: special elements have name lengths 3, 5, 6, 7, or 8.
+		 * Tags with other lengths can be returned immediately.
+		 */
+		$special_tag_name_length = $this->tag_name_length;
+		if ( $special_tag_name_length < 3 || $special_tag_name_length > 8 || 4 === $special_tag_name_length ) {
+			return true;
+		}
+
 		$tag_name = $this->get_tag();
 
 		/*

From 13a7d3f52c2b2c9f33ae23ccd723ead27204021c Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:36:50 +0100
Subject: [PATCH 23/43] Perf: Reorder special element checks to put length
 filter before strspn

Move the tag name length filter before the strspn first-letter check.
This avoids a function call for the many common tags with non-matching
lengths (a, p, li, div, span, etc.) that would pass the closing-tag
check but be eliminated by strspn.
---
 .../html-api/class-wp-html-tag-processor.php          | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index cb9735086cd60..0fe0450b70de0 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1179,18 +1179,21 @@ private function base_class_next_token(): bool {
 		 */
 		if (
 			$this->is_closing_tag ||
-			'html' !== $this->parsing_namespace ||
-			1 !== strspn( $html, 'iIlLnNpPsStTxX', $this->tag_name_starts_at, 1 )
+			'html' !== $this->parsing_namespace
 		) {
 			return true;
 		}
 
 		/*
 		 * Quick length filter: special elements have name lengths 3, 5, 6, 7, or 8.
-		 * Tags with other lengths can be returned immediately.
+		 * Checking length before the first-letter strspn avoids a function call for
+		 * the many common tags (a, p, li, div, span, etc.) with non-matching lengths.
 		 */
 		$special_tag_name_length = $this->tag_name_length;
-		if ( $special_tag_name_length < 3 || $special_tag_name_length > 8 || 4 === $special_tag_name_length ) {
+		if (
+			$special_tag_name_length < 3 || $special_tag_name_length > 8 || 4 === $special_tag_name_length ||
+			1 !== strspn( $html, 'iIlLnNpPsStTxX', $this->tag_name_starts_at, 1 )
+		) {
 			return true;
 		}
 

From f2c0421d7eb6def947bb98f779d2f1ad2b6e2a64 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:39:42 +0100
Subject: [PATCH 24/43] Perf: Optimize attribute scanner for common
 name="value" pattern

Restructure skip_attributes_and_find_closer() to check for '=' and
quote characters directly after the attribute name, avoiding two
strspn() calls per attribute that almost always return 0. Well-formed
attributes like class="foo" now go through a fast path with only
byte-level comparisons + a single strpos for the closing quote.
---
 .../html-api/class-wp-html-tag-processor.php  | 107 ++++++++++++++----
 1 file changed, 83 insertions(+), 24 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 0fe0450b70de0..378d95ffe89ea 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2478,42 +2478,101 @@ private function skip_attributes_and_find_closer( string $html, int $doc_length
 				return false;
 			}
 
-			// Skip whitespace after attribute name.
-			$at += strspn( $html, " \t\f\r\n", $at );
-			if ( $at >= $doc_length ) {
-				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-				return false;
-			}
+			/*
+			 * Fast path for the most common pattern: name="value"
+			 * where '=' immediately follows the name and a quote
+			 * immediately follows '='. Avoids two strspn calls
+			 * that typically return 0.
+			 */
+			if ( '=' === $html[ $at ] ) {
+				++$at;
+				if ( $at >= $doc_length ) {
+					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+					return false;
+				}
+
+				$quote_char = $html[ $at ];
+				if ( '"' === $quote_char || "'" === $quote_char ) {
+					$end_quote_at = strpos( $html, $quote_char, $at + 1 );
+					if ( false === $end_quote_at ) {
+						$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+						return false;
+					}
+					$at = $end_quote_at + 1;
+					if ( $at >= $doc_length ) {
+						$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+						return false;
+					}
+					continue;
+				}
+
+				// Whitespace after '=': skip and read value.
+				if ( ' ' === $quote_char || "\t" === $quote_char || "\f" === $quote_char || "\r" === $quote_char || "\n" === $quote_char ) {
+					$at += strspn( $html, " \t\f\r\n", $at );
+					if ( $at >= $doc_length ) {
+						$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+						return false;
+					}
+
+					if ( '"' === $html[ $at ] || "'" === $html[ $at ] ) {
+						$end_quote_at = strpos( $html, $html[ $at ], $at + 1 );
+						if ( false === $end_quote_at ) {
+							$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+							return false;
+						}
+						$at = $end_quote_at + 1;
+						if ( $at >= $doc_length ) {
+							$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+							return false;
+						}
+						continue;
+					}
+				}
 
-			// No value, boolean attribute.
-			if ( '=' !== $html[ $at ] ) {
+				// Unquoted value.
+				$at += strcspn( $html, "> \t\f\r\n", $at );
 				continue;
 			}
 
-			++$at;
-			// Skip whitespace after '='.
-			$at += strspn( $html, " \t\f\r\n", $at );
-			if ( $at >= $doc_length ) {
-				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-				return false;
-			}
+			// No '=' immediately: check for whitespace before '='.
+			if ( ' ' === $html[ $at ] || "\t" === $html[ $at ] || "\f" === $html[ $at ] || "\r" === $html[ $at ] || "\n" === $html[ $at ] ) {
+				$at += strspn( $html, " \t\f\r\n", $at );
+				if ( $at >= $doc_length ) {
+					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+					return false;
+				}
 
-			switch ( $html[ $at ] ) {
-				case "'":
-				case '"':
-					$end_quote_at = strpos( $html, $html[ $at ], $at + 1 );
-					if ( false === $end_quote_at ) {
+				// After whitespace, check for '=' to distinguish value from next attribute.
+				if ( '=' === $html[ $at ] ) {
+					++$at;
+					$at += strspn( $html, " \t\f\r\n", $at );
+					if ( $at >= $doc_length ) {
 						$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 						return false;
 					}
-					$at = $end_quote_at + 1;
-					break;
 
-				default:
+					if ( '"' === $html[ $at ] || "'" === $html[ $at ] ) {
+						$end_quote_at = strpos( $html, $html[ $at ], $at + 1 );
+						if ( false === $end_quote_at ) {
+							$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+							return false;
+						}
+						$at = $end_quote_at + 1;
+						if ( $at >= $doc_length ) {
+							$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+							return false;
+						}
+						continue;
+					}
+
+					// Unquoted value.
 					$at += strcspn( $html, "> \t\f\r\n", $at );
-					break;
+					continue;
+				}
 			}
 
+			// Boolean attribute (no value).
+
 			if ( $at >= $doc_length ) {
 				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
 				return false;

From ac24e7de4319e5d51ad7a20a38b2640c98a06104 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:43:52 +0100
Subject: [PATCH 25/43] Perf: Inline single-space and '>' checks in attribute
 scanner loop

Replace the strspn() call at the top of the attribute scanning loop
with direct byte comparisons for the two most common cases: a single
space separator between attributes and the '>' tag closer. Only falls
back to strspn() for uncommon whitespace characters (tabs, newlines,
form feeds, slashes).
---
 .../html-api/class-wp-html-tag-processor.php  | 36 +++++++++++++------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 378d95ffe89ea..8a38fd89807ec 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2447,25 +2447,39 @@ private function skip_attributes_and_find_closer( string $html, int $doc_length
 		}
 
 		while ( true ) {
-			// Skip whitespace and slashes.
-			$at += strspn( $html, " \t\f\r\n/", $at );
-			if ( $at >= $doc_length ) {
-				$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-				return false;
+			/*
+			 * Skip whitespace and slashes between attributes.
+			 * Fast path: single space (most common) or '>' (tag closer).
+			 */
+			$c = $html[ $at ] ?? '';
+			if ( '>' === $c ) {
+				return $at;
+			}
+			if ( ' ' === $c ) {
+				++$at;
+				if ( $at >= $doc_length ) {
+					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+					return false;
+				}
+			} elseif ( "\t" === $c || "\r" === $c || "\n" === $c || "\f" === $c || '/' === $c ) {
+				$at += strspn( $html, " \t\f\r\n/", $at );
+				if ( $at >= $doc_length ) {
+					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
+					return false;
+				}
 			}
 
 			/*
 			 * Treat the equal sign as a part of the attribute
 			 * name if it is the first encountered byte.
 			 */
-			$name_length = '=' === $html[ $at ]
-				? 1 + strcspn( $html, "=/> \t\f\r\n", $at + 1 )
-				: strcspn( $html, "=/> \t\f\r\n", $at );
-
-			// No attribute name means we've reached the tag closer.
-			if ( 0 === $name_length ) {
+			$c = $html[ $at ];
+			if ( '>' === $c ) {
 				return $at;
 			}
+			$name_length = '=' === $c
+				? 1 + strcspn( $html, "=/> \t\f\r\n", $at + 1 )
+				: strcspn( $html, "=/> \t\f\r\n", $at );
 
 			if ( $at + $name_length >= $doc_length ) {
 				$this->parser_state = self::STATE_INCOMPLETE_INPUT;

From 387be39f4800f262d3b4ed8e1f609e1d9204809f Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:45:45 +0100
Subject: [PATCH 26/43] Perf: Remove redundant state check and property write
 from hot path

Remove the STATE_COMPLETE check from base_class_next_token() since the
$at >= $doc_length bounds check already handles this case. Also remove
the text_node_classification property write from the tag fast path since
this value is never read for tag tokens.
---
 .../html-api/class-wp-html-tag-processor.php  | 22 ++++++++-----------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 8a38fd89807ec..811de3a1636d5 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1007,10 +1007,7 @@ private function base_class_next_token(): bool {
 		}
 
 		// Don't proceed if there's nothing more to scan.
-		if (
-			self::STATE_COMPLETE === $this->parser_state ||
-			self::STATE_INCOMPLETE_INPUT === $this->parser_state
-		) {
+		if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
 			return false;
 		}
 
@@ -1089,15 +1086,14 @@ private function base_class_next_token(): bool {
 			$tag_length = strcspn( $html, " \t\f\r\n/>", $tag_at );
 			$after_name = $tag_at + $tag_length;
 
-			$this->token_starts_at          = $at;
-			$this->is_closing_tag           = $is_closer;
-			$this->tag_name_starts_at       = $tag_at;
-			$this->tag_name_length          = $tag_length;
-			$this->text_starts_at           = 0;
-			$this->text_length              = 0;
-			$this->text_node_classification = self::TEXT_IS_GENERIC;
-			$this->attribute_scan_from      = $after_name;
-			$this->attributes_parsed        = false;
+			$this->token_starts_at     = $at;
+			$this->is_closing_tag      = $is_closer;
+			$this->tag_name_starts_at  = $tag_at;
+			$this->tag_name_length     = $tag_length;
+			$this->text_starts_at      = 0;
+			$this->text_length         = 0;
+			$this->attribute_scan_from = $after_name;
+			$this->attributes_parsed   = false;
 
 			// Fast path: '>' immediately after tag name.
 			if ( $after_name < $doc_length && '>' === $html[ $after_name ] ) {

From 901a97c4c7862af57a05c2918706d510acbf5824 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:48:48 +0100
Subject: [PATCH 27/43] Perf: Use null text_starts_at for tags instead of
 setting two properties

Set text_starts_at to null instead of 0 for tag tokens in the fast path,
and remove the text_length = 0 write. get_modifiable_text() already
returns '' when text_starts_at is null, making the text_length write
redundant. Saves one property write per tag token.
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 811de3a1636d5..6212c6d42cdb4 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1090,8 +1090,7 @@ private function base_class_next_token(): bool {
 			$this->is_closing_tag      = $is_closer;
 			$this->tag_name_starts_at  = $tag_at;
 			$this->tag_name_length     = $tag_length;
-			$this->text_starts_at      = 0;
-			$this->text_length         = 0;
+			$this->text_starts_at      = null;
 			$this->attribute_scan_from = $after_name;
 			$this->attributes_parsed   = false;
 

From 8843ddbcca31d95202210b349d129a4011b1c38e Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 16:51:22 +0100
Subject: [PATCH 28/43] Perf: Avoid redundant bytes_already_parsed property
 read

Cache bytes_already_parsed in a local variable at function entry and
only re-read the property when lexical updates have been applied, as
those may adjust the position. Saves one property read per token in
the common read-only tokenization case.
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 6212c6d42cdb4..e2e60ed5a6bba 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -978,6 +978,7 @@ public function next_token(): bool {
 	 */
 	private function base_class_next_token(): bool {
 		$was_at = $this->bytes_already_parsed;
+		$at     = $was_at;
 
 		/*
 		 * Apply attribute updates and clean up the previous tag.
@@ -1004,6 +1005,8 @@ private function base_class_next_token(): bool {
 				$this->lexical_updates[] = $update;
 				unset( $this->lexical_updates[ $name ] );
 			}
+
+			$at = $this->bytes_already_parsed;
 		}
 
 		// Don't proceed if there's nothing more to scan.
@@ -1014,8 +1017,6 @@ private function base_class_next_token(): bool {
 		$html       = $this->html;
 		$doc_length = $this->doc_length;
 
-		$at = $this->bytes_already_parsed;
-
 		if ( $at >= $doc_length ) {
 			$this->parser_state = self::STATE_COMPLETE;
 			return false;

From 75dd6a3e27f1fbc44663e3f0ac2ef93a10f2aeff Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:05:58 +0100
Subject: [PATCH 29/43] Perf: Remove attribute_scan_from property, compute on
 demand

Eliminates the attribute_scan_from property entirely. The scan
position is now computed as tag_name_starts_at + tag_name_length
in ensure_attributes_parsed() on demand, avoiding ~646K property
writes per benchmark iteration.
---
 .../html-api/class-wp-html-tag-processor.php      | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index e2e60ed5a6bba..1ef8d35a75ba1 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -692,13 +692,6 @@ class WP_HTML_Tag_Processor {
 	 */
 	private $is_closing_tag;
 
-	/**
-	 * Byte offset where attribute scanning should start for lazy parsing.
-	 *
-	 * @var int|null
-	 */
-	private $attribute_scan_from = null;
-
 	/**
 	 * Whether attributes have been parsed and stored for the current tag.
 	 *
@@ -1092,7 +1085,6 @@ private function base_class_next_token(): bool {
 			$this->tag_name_starts_at  = $tag_at;
 			$this->tag_name_length     = $tag_length;
 			$this->text_starts_at      = null;
-			$this->attribute_scan_from = $after_name;
 			$this->attributes_parsed   = false;
 
 			// Fast path: '>' immediately after tag name.
@@ -1127,7 +1119,6 @@ private function base_class_next_token(): bool {
 		$this->text_starts_at           = 0;
 		$this->text_length              = 0;
 		$this->text_node_classification = self::TEXT_IS_GENERIC;
-		$this->attribute_scan_from      = null;
 		$this->attributes_parsed        = true;
 
 		if ( false === $this->parse_next_tag() ) {
@@ -1143,7 +1134,6 @@ private function base_class_next_token(): bool {
 		}
 
 		// Tag found by parse_next_tag — scan attributes.
-		$this->attribute_scan_from = $this->bytes_already_parsed;
 		$this->attributes_parsed   = false;
 		$tag_ends_at = $this->skip_attributes_and_find_closer( $html, $doc_length );
 		if ( false === $tag_ends_at ) {
@@ -2608,12 +2598,12 @@ private function ensure_attributes_parsed(): void {
 		$this->attributes           = array();
 		$this->duplicate_attributes = null;
 
-		if ( null === $this->attribute_scan_from || $this->is_closing_tag ) {
+		if ( null === $this->tag_name_starts_at || $this->is_closing_tag ) {
 			return;
 		}
 
 		$saved_at                   = $this->bytes_already_parsed;
-		$this->bytes_already_parsed = $this->attribute_scan_from;
+		$this->bytes_already_parsed = $this->tag_name_starts_at + $this->tag_name_length;
 
 		while ( $this->parse_next_attribute() ) {
 			continue;
@@ -2693,7 +2683,6 @@ private function after_tag(): void {
 		$this->is_closing_tag           = null;
 		$this->comment_type             = null;
 		$this->text_node_classification = self::TEXT_IS_GENERIC;
-		$this->attribute_scan_from      = null;
 		$this->attributes_parsed        = true;
 	}
 

From 8e8dc9a6bcb726f6d6e0c506f9e0d941fdbb8527 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:07:57 +0100
Subject: [PATCH 30/43] Perf: Remove unnecessary attributes_parsed write from
 text nodes

All callers of ensure_attributes_parsed() guard with
STATE_MATCHED_TAG check, so the attributes_parsed flag is never
read for text nodes. Removing this write saves ~378K property
writes per benchmark iteration.
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 1ef8d35a75ba1..841fb13400c63 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1036,7 +1036,6 @@ private function base_class_next_token(): bool {
 			$this->tag_name_starts_at       = null;
 			$this->tag_name_length          = null;
 			$this->text_node_classification = self::TEXT_IS_GENERIC;
-			$this->attributes_parsed        = true;
 			$this->bytes_already_parsed     = $doc_length;
 			return true;
 		}
@@ -1063,7 +1062,6 @@ private function base_class_next_token(): bool {
 			$this->tag_name_starts_at       = null;
 			$this->tag_name_length          = null;
 			$this->text_node_classification = self::TEXT_IS_GENERIC;
-			$this->attributes_parsed        = true;
 			$this->bytes_already_parsed     = $at;
 			return true;
 		}

From 3e77efc4729350d27cdbcdfc9bdde322cbfe8006 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:10:49 +0100
Subject: [PATCH 31/43] Perf: Skip tag_name null writes for text nodes,
 restructure get_tag()

Text nodes don't need to null out tag_name_starts_at and
tag_name_length. Instead, restructure get_tag() to check
parser_state first, which correctly returns null for non-tag
tokens without requiring the null sentinel. Saves ~756K property
writes per benchmark iteration.
---
 .../html-api/class-wp-html-tag-processor.php     | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 841fb13400c63..0185d15951b98 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1033,8 +1033,6 @@ private function base_class_next_token(): bool {
 			$this->text_starts_at           = $was_at;
 			$this->token_length             = $doc_length - $was_at;
 			$this->text_length              = $doc_length - $was_at;
-			$this->tag_name_starts_at       = null;
-			$this->tag_name_length          = null;
 			$this->text_node_classification = self::TEXT_IS_GENERIC;
 			$this->bytes_already_parsed     = $doc_length;
 			return true;
@@ -1059,8 +1057,6 @@ private function base_class_next_token(): bool {
 			$this->text_starts_at           = $was_at;
 			$this->token_length             = $at - $was_at;
 			$this->text_length              = $at - $was_at;
-			$this->tag_name_starts_at       = null;
-			$this->tag_name_length          = null;
 			$this->text_node_classification = self::TEXT_IS_GENERIC;
 			$this->bytes_already_parsed     = $at;
 			return true;
@@ -3252,21 +3248,19 @@ public function get_namespace(): string {
 	 * @return string|null Name of currently matched tag in input HTML, or `null` if none found.
 	 */
 	public function get_tag(): ?string {
-		if ( null === $this->tag_name_starts_at ) {
-			return null;
+		if ( self::STATE_MATCHED_TAG === $this->parser_state ) {
+			return strtoupper( substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length ) );
 		}
 
-		$tag_name = substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length );
-
-		if ( self::STATE_MATCHED_TAG === $this->parser_state ) {
-			return strtoupper( $tag_name );
+		if ( null === $this->tag_name_starts_at ) {
+			return null;
 		}
 
 		if (
 			self::STATE_COMMENT === $this->parser_state &&
 			self::COMMENT_AS_PI_NODE_LOOKALIKE === $this->get_comment_type()
 		) {
-			return $tag_name;
+			return substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length );
 		}
 
 		return null;

From 6a3ac015787a7c0bd47e3871674604c69697b594 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:14:08 +0100
Subject: [PATCH 32/43] Perf: Short-circuit closing tags in fast path, avoid
 after_tag_match

Closing tags never need special element processing, so return
immediately from the fast path instead of goto after_tag_match.
Avoids property reads for is_closing_tag and parsing_namespace
checks for ~300K+ closing tags. Also avoid re-reading
bytes_already_parsed for token_length computation.
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 0185d15951b98..3eaff090b6552 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1096,7 +1096,11 @@ private function base_class_next_token(): bool {
 
 			$this->parser_state         = self::STATE_MATCHED_TAG;
 			$this->bytes_already_parsed = $tag_ends_at + 1;
-			$this->token_length         = $this->bytes_already_parsed - $at;
+			$this->token_length         = $tag_ends_at + 1 - $at;
+
+			if ( $is_closer ) {
+				return true;
+			}
 
 			goto after_tag_match;
 		}

From 8a00e32e0267e4f3f28740a99a6cd07b0c85afba Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:15:18 +0100
Subject: [PATCH 33/43] Perf: Move closer check before after_tag_match for
 full_parse path

Both fast path and full_parse path now return early for closing
tags before reaching after_tag_match, which now only handles
opening tags. Eliminates is_closing_tag property read from the
shared after_tag_match section.
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 3eaff090b6552..fb4afc51808b5 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1144,6 +1144,10 @@ private function base_class_next_token(): bool {
 		$this->bytes_already_parsed = $tag_ends_at + 1;
 		$this->token_length         = $this->bytes_already_parsed - $this->token_starts_at;
 
+		if ( $this->is_closing_tag ) {
+			return true;
+		}
+
 		after_tag_match:
 
 		/*
@@ -1161,10 +1165,7 @@ private function base_class_next_token(): bool {
 		 *  - TITLE
 		 *  - XMP (deprecated)
 		 */
-		if (
-			$this->is_closing_tag ||
-			'html' !== $this->parsing_namespace
-		) {
+		if ( 'html' !== $this->parsing_namespace ) {
 			return true;
 		}
 

From 0aa5ce2f6977b7c1ec638145b796eddbc63b55ae Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:22:07 +0100
Subject: [PATCH 34/43] Perf: Skip strpos when already at '<' for tag tokens
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Check for '<' at current position before calling strpos(). When
tokens alternate text→tag, the tag iteration starts at '<' and
can skip strpos entirely. Since ~63% of tokens are tags, this
eliminates ~646K strpos calls per benchmark iteration.
---
 .../html-api/class-wp-html-tag-processor.php  | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index fb4afc51808b5..d47187254e0dd 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1018,28 +1018,28 @@ private function base_class_next_token(): bool {
 		/*
 		 * Fast path: handle the two most common token types inline.
 		 *
-		 *  1. Text nodes: text between tags (strpos finds next '<').
-		 *  2. Regular tags: '<' followed by alpha or '/'+alpha.
+		 *  1. At '<': try to match a regular tag directly (skip strpos).
+		 *  2. Text nodes: text between tags (strpos finds next '<').
 		 *
 		 * Complex tokens (comments, DOCTYPE, CDATA, etc.) fall through
 		 * to the full parse_next_tag() method.
 		 */
-		$at = strpos( $html, '<', $at );
+		if ( '<' !== $html[ $at ] ) {
+			$at = strpos( $html, '<', $at );
 
-		// No '<' found: the rest of the document is a text node.
-		if ( false === $at ) {
-			$this->parser_state             = self::STATE_TEXT_NODE;
-			$this->token_starts_at          = $was_at;
-			$this->text_starts_at           = $was_at;
-			$this->token_length             = $doc_length - $was_at;
-			$this->text_length              = $doc_length - $was_at;
-			$this->text_node_classification = self::TEXT_IS_GENERIC;
-			$this->bytes_already_parsed     = $doc_length;
-			return true;
-		}
+			// No '<' found: the rest of the document is a text node.
+			if ( false === $at ) {
+				$this->parser_state             = self::STATE_TEXT_NODE;
+				$this->token_starts_at          = $was_at;
+				$this->text_starts_at           = $was_at;
+				$this->token_length             = $doc_length - $was_at;
+				$this->text_length              = $doc_length - $was_at;
+				$this->text_node_classification = self::TEXT_IS_GENERIC;
+				$this->bytes_already_parsed     = $doc_length;
+				return true;
+			}
 
-		// Text before the '<': return it as a text node.
-		if ( $at > $was_at ) {
+			// Validate the '<' starts a valid token before returning text.
 			$next_byte = $html[ $at + 1 ] ?? '';
 			if (
 				'!' !== $next_byte && '/' !== $next_byte && '?' !== $next_byte &&

From 073a6ab2c86f65a1f34fc646b7eca529428ea205 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:29:49 +0100
Subject: [PATCH 35/43] Perf: Remove text_starts_at null write for tags, use
 bounds check

Instead of setting text_starts_at = null for every tag to prevent
stale text in get_modifiable_text(), add a bounds check that
detects stale text_starts_at (from previous text nodes) by
comparing against token_starts_at. Saves ~646K property writes.
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index d47187254e0dd..f31d6c7488d91 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1078,7 +1078,6 @@ private function base_class_next_token(): bool {
 			$this->is_closing_tag      = $is_closer;
 			$this->tag_name_starts_at  = $tag_at;
 			$this->tag_name_length     = $tag_length;
-			$this->text_starts_at      = null;
 			$this->attributes_parsed   = false;
 
 			// Fast path: '>' immediately after tag name.
@@ -4000,7 +3999,7 @@ public function subdivide_text_appropriately(): bool {
 	public function get_modifiable_text(): string {
 		$has_enqueued_update = isset( $this->lexical_updates['modifiable text'] );
 
-		if ( ! $has_enqueued_update && ( null === $this->text_starts_at || 0 === $this->text_length ) ) {
+		if ( ! $has_enqueued_update && ( null === $this->text_starts_at || 0 === $this->text_length || $this->text_starts_at < $this->token_starts_at ) ) {
 			return '';
 		}
 

From ae0586c2039232b95b94b9e6833e21e5a6a01a7d Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:30:22 +0100
Subject: [PATCH 36/43] Update autoresearch.md with optimizations 16-22

Current: ~330ms (52.8% faster than 699ms baseline)
---
 autoresearch.md | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 autoresearch.md

diff --git a/autoresearch.md b/autoresearch.md
new file mode 100644
index 0000000000000..b18bd49470442
--- /dev/null
+++ b/autoresearch.md
@@ -0,0 +1,73 @@
+# Autoresearch: HTML Tag Processor Performance
+
+## Objective
+Optimize `WP_HTML_Tag_Processor::next_token()` tokenization throughput on html-standard.html (~large real-world HTML). The benchmark iterates all tokens with no modifications — purely read-only tokenization speed.
+
+## Metrics
+- **Primary**: mean execution time (ms, lower is better) via `hyperfine`
+- **Secondary**: peak memory (bytes, lower is better) via `/usr/bin/time -l`
+
+## How to Run
+`./autoresearch.sh` — runs hyperfine, outputs `METRIC mean_ms=number` lines.
+
+## Files in Scope
+- `src/wp-includes/html-api/class-wp-html-tag-processor.php` — main parser, all hot path methods
+- `src/wp-includes/html-api/class-wp-html-attribute-token.php` — attribute token object (6 props, allocated per attr)
+- `src/wp-includes/html-api/class-wp-html-span.php` — span object (2 props, allocated on dup attrs)
+- `src/wp-includes/html-api/class-wp-html-text-replacement.php` — text replacement (3 props, not in hot path for read-only)
+
+## Off Limits
+- Test files
+- `bench.php` and `bootstrap-html-api.php`
+- Any file outside `src/wp-includes/html-api/`
+
+## Constraints
+- PHPUnit tests must pass: `./vendor/bin/phpunit -c tests/phpunit/tests/html-api/phpunit.xml --stop-on-error --stop-on-failure --stop-on-warning --stop-on-defect`
+- No new dependencies
+- stddev and outliers from hyperfine must remain acceptable
+- Changes must preserve all existing behavior
+
+## What's Been Tried
+
+### Baseline: ~699ms
+
+### Wins (cumulative, all committed)
+1. **Replace per-attribute function call loop with skip_attributes_and_find_closer()** — eliminates parse_next_attribute(false) calls. Single method scans for `>` handling quoted values.
+2. **Inline after_tag() into base_class_next_token()** — removes method call overhead per token.
+3. **Inline fast paths for text nodes and regular tags** — handles the two most common token types (text ~378K, tags ~646K) directly in base_class_next_token, falling through to full parse_next_tag() only for complex tokens.
+4. **Direct byte comparisons for single-char strspn** — replace strspn for single-character checks with direct `===` comparisons.
+5. **Cache doc_length as instance variable** — avoid strlen() per token.
+6. **Fast path for '>' immediately after tag name** — skip attribute scanning for tags like `</div>`, `<br>`.
+7. **Defer property resets to type-specific return paths** — text nodes only reset tag-related properties, tags only reset text-related properties.
+8. **Tag name length filter before special element check** — special elements have lengths 3,5,6,7,8. Tags of other lengths return immediately without calling get_tag().
+9. **Reorder checks: length before strspn** — many common tags eliminated by cheap integer comparison before the strspn function call.
+10. **Optimize attribute scanner for common name="value" pattern** — check for `=` and quote char directly after attribute name, avoiding two strspn() calls that typically return 0.
+11. **Inline single-space and '>' checks in attribute scanner loop** — replace strspn for whitespace between attributes with direct byte comparisons for single-space (most common) and '>' (tag closer).
+12. **Remove redundant STATE_COMPLETE check** — $at >= $doc_length bounds check handles this case.
+13. **Remove text_node_classification write from tag fast path** — never read for tag tokens.
+14. **Use null text_starts_at for tags** — allows removing text_length=0 write. get_modifiable_text() returns '' on null text_starts_at.
+15. **Avoid redundant bytes_already_parsed property read** — use local $was_at for $at when no lexical updates.
+
+16. **Remove attribute_scan_from property** — compute scan position as tag_name_starts_at + tag_name_length on demand in ensure_attributes_parsed(). Eliminates property and 3 writes.
+17. **Remove attributes_parsed write from text nodes** — all callers of ensure_attributes_parsed() guard with STATE_MATCHED_TAG check, so the flag is never read for non-tag tokens.
+18. **Short-circuit closing tags before after_tag_match** — closing tags never need special element processing. Return early using local $is_closer instead of reading property through the shared label.
+19. **Move closer check out of after_tag_match** — both fast path and full_parse path return early for closers. after_tag_match now only handles openers, eliminating is_closing_tag read.
+20. **Skip strpos when at '<'** — check for '<' at current position before calling strpos(). Tags (~63% of tokens) start at '<' and skip the function call entirely.
+21. **Remove text_starts_at null write for tags** — use bounds check (text_starts_at < token_starts_at) in get_modifiable_text() to detect stale text instead of proactively nulling.
+22. **Restructure get_tag() for state-based dispatch** — check STATE_MATCHED_TAG first instead of null check on tag_name_starts_at. Allows skipping tag_name null writes for text nodes (~756K writes eliminated).
+
+### Current: ~330ms (52.8% faster)
+
+### Dead Ends
+- **First-letter bitwise OR + 7 comparisons** — replacing strspn('iIlLnNpPsStTxX',...) was WORSE (655→605ms regression). PHP bitwise string OR creates allocation; 7 comparisons slower than one C-level strspn.
+- **substr_compare for special element names** — replacing get_tag()+switch with substr_compare+switch-on-length showed no measurable improvement. The special element check is already rare (filtered by length + first letter). Added code complexity for zero gain.
+- **Simplified closer detection** — removing ternary `$is_closer ? 1 : 0` by computing $tag_at incrementally. Neutral result.
+- **Local vars for after_tag_match** — passing tag_length/tag_at as locals through the goto label. Neutral — property reads are hot in PHP's cache.
+- **Pass $at parameter to skip_attributes_and_find_closer** — extra function parameter overhead cancels the property write/read savings.
+
+### Architecture Notes
+- **Token distribution**: ~646K tags, ~378K text nodes, ~247K attributes across ~1M tokens in html-standard.html
+- **Text-tag alternation**: Most tokens alternate text→tag→text→tag. The strpos skip optimization exploits this — tags start at '<' so no search is needed.
+- **PHP overhead dominates**: At 330ms / 1M tokens = 330ns/token. Property reads (~20-30ns each), property writes (~20-30ns), method dispatch (~50-100ns) are the main costs.
+- **next_token()→base_class_next_token() dispatch**: ~1M extra method calls, but cannot be eliminated because get_updated_html() needs the base implementation.
+- **Remaining writes per token**: text nodes ~7 writes, tags ~8 writes. Total ~8M writes per benchmark run at ~20ns each = ~160ms (48% of total).

From 52190592707c998707cd08c09cfdda755ec0489e Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:37:36 +0100
Subject: [PATCH 37/43] Perf: Replace attributes_parsed boolean with
 version-based staleness check

Use attributes_parsed_at integer compared against token_starts_at to detect
stale attribute data, eliminating ~646K attributes_parsed=false writes per
parse iteration. The version check in ensure_attributes_parsed() automatically
invalidates when a new token is parsed.
---
 .../html-api/class-wp-html-tag-processor.php  | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index f31d6c7488d91..0db25b87e6e15 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -693,11 +693,14 @@ class WP_HTML_Tag_Processor {
 	private $is_closing_tag;
 
 	/**
-	 * Whether attributes have been parsed and stored for the current tag.
+	 * The token_starts_at value when attributes were last parsed.
 	 *
-	 * @var bool
+	 * Used to detect whether cached attributes are stale. When this
+	 * doesn't match token_starts_at, attributes need re-parsing.
+	 *
+	 * @var int
 	 */
-	private $attributes_parsed = true;
+	private $attributes_parsed_at = -1;
 
 	/**
 	 * Lazily-built index of attributes found within an HTML tag, keyed by the attribute name.
@@ -1078,7 +1081,6 @@ private function base_class_next_token(): bool {
 			$this->is_closing_tag      = $is_closer;
 			$this->tag_name_starts_at  = $tag_at;
 			$this->tag_name_length     = $tag_length;
-			$this->attributes_parsed   = false;
 
 			// Fast path: '>' immediately after tag name.
 			if ( $after_name < $doc_length && '>' === $html[ $after_name ] ) {
@@ -1116,7 +1118,6 @@ private function base_class_next_token(): bool {
 		$this->text_starts_at           = 0;
 		$this->text_length              = 0;
 		$this->text_node_classification = self::TEXT_IS_GENERIC;
-		$this->attributes_parsed        = true;
 
 		if ( false === $this->parse_next_tag() ) {
 			if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
@@ -1131,7 +1132,6 @@ private function base_class_next_token(): bool {
 		}
 
 		// Tag found by parse_next_tag — scan attributes.
-		$this->attributes_parsed   = false;
 		$tag_ends_at = $this->skip_attributes_and_find_closer( $html, $doc_length );
 		if ( false === $tag_ends_at ) {
 			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
@@ -2588,11 +2588,11 @@ private function skip_attributes_and_find_closer( string $html, int $doc_length
 	 * @ignore
 	 */
 	private function ensure_attributes_parsed(): void {
-		if ( $this->attributes_parsed ) {
+		if ( $this->attributes_parsed_at === $this->token_starts_at ) {
 			return;
 		}
 
-		$this->attributes_parsed    = true;
+		$this->attributes_parsed_at = $this->token_starts_at;
 		$this->attributes           = array();
 		$this->duplicate_attributes = null;
 
@@ -2681,7 +2681,7 @@ private function after_tag(): void {
 		$this->is_closing_tag           = null;
 		$this->comment_type             = null;
 		$this->text_node_classification = self::TEXT_IS_GENERIC;
-		$this->attributes_parsed        = true;
+		$this->attributes_parsed_at     = -1;
 	}
 
 	/**
@@ -5035,6 +5035,7 @@ public function get_updated_html(): string {
 		 *                 └←─┘ back up by strlen("em") + 1 ==> 3
 		 */
 		$this->bytes_already_parsed = $before_current_tag;
+		$this->attributes_parsed_at = -1;
 		$this->base_class_next_token();
 
 		return $this->html;

From e5df992ff8ba0dcc0d4d7b40126792d038d57354 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:41:41 +0100
Subject: [PATCH 38/43] Perf: Pre-filter special element length in fast path
 before goto

Check tag name length against special element lengths (3,5,6,7,8) before
goto after_tag_match. Common tags with lengths 1,2,4 (a, p, br, li, span,
code, etc.) return immediately, avoiding goto dispatch + namespace read +
property reads for the majority of opener tags.
---
 .../html-api/class-wp-html-tag-processor.php           | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 0db25b87e6e15..d153b7b44c9a6 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1103,6 +1103,16 @@ private function base_class_next_token(): bool {
 				return true;
 			}
 
+			/*
+			 * Quick length filter for special elements before goto.
+			 * Special element names have lengths 3, 5, 6, 7, or 8.
+			 * Common tags with other lengths (a, p, br, li, span, code, etc.)
+			 * can return immediately without the goto dispatch.
+			 */
+			if ( $tag_length < 3 || $tag_length > 8 || 4 === $tag_length ) {
+				return true;
+			}
+
 			goto after_tag_match;
 		}
 

From fc047b49d394b291dd4acf8700f54fa98d0cb744 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:58:53 +0100
Subject: [PATCH 39/43] Perf: Merge STATE_INCOMPLETE_INPUT check into bounds
 check

Remove the dedicated parser_state read for STATE_INCOMPLETE_INPUT at the
top of the hot loop. Instead, set bytes_already_parsed = doc_length when
incomplete input is detected, so the bounds check ($at >= $doc_length)
handles it. This eliminates one property read per token (~1M reads per pass).
---
 .../html-api/class-wp-html-tag-processor.php    | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index d153b7b44c9a6..009709934f27d 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1005,16 +1005,13 @@ private function base_class_next_token(): bool {
 			$at = $this->bytes_already_parsed;
 		}
 
-		// Don't proceed if there's nothing more to scan.
-		if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
-			return false;
-		}
-
 		$html       = $this->html;
 		$doc_length = $this->doc_length;
 
 		if ( $at >= $doc_length ) {
-			$this->parser_state = self::STATE_COMPLETE;
+			if ( self::STATE_INCOMPLETE_INPUT !== $this->parser_state ) {
+				$this->parser_state = self::STATE_COMPLETE;
+			}
 			return false;
 		}
 
@@ -1090,7 +1087,7 @@ private function base_class_next_token(): bool {
 				$tag_ends_at = $this->skip_attributes_and_find_closer( $html, $doc_length );
 				if ( false === $tag_ends_at ) {
 					$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
-					$this->bytes_already_parsed = $was_at;
+					$this->bytes_already_parsed = $doc_length;
 					return false;
 				}
 			}
@@ -1131,7 +1128,7 @@ private function base_class_next_token(): bool {
 
 		if ( false === $this->parse_next_tag() ) {
 			if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
-				$this->bytes_already_parsed = $was_at;
+				$this->bytes_already_parsed = $doc_length;
 			}
 
 			return false;
@@ -1145,7 +1142,7 @@ private function base_class_next_token(): bool {
 		$tag_ends_at = $this->skip_attributes_and_find_closer( $html, $doc_length );
 		if ( false === $tag_ends_at ) {
 			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
-			$this->bytes_already_parsed = $was_at;
+			$this->bytes_already_parsed = $doc_length;
 
 			return false;
 		}
@@ -1263,7 +1260,7 @@ private function base_class_next_token(): bool {
 
 		if ( ! $found_closer ) {
 			$this->parser_state         = self::STATE_INCOMPLETE_INPUT;
-			$this->bytes_already_parsed = $was_at;
+			$this->bytes_already_parsed = $doc_length;
 			return false;
 		}
 

From 9c08c0e2fde80fdadaab6553b2ecd37124eabaf5 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 18:41:47 +0100
Subject: [PATCH 40/43] Update autoresearch.md with optimizations 23-25, dead
 ends, and ideas backlog

Document new wins (attributes_parsed_at, fast-path length filter, merged
incomplete check), failed experiments, detailed architecture notes from
profiling data, and create ideas backlog for next session.
---
 autoresearch.ideas.md | 16 +++++++++++++++
 autoresearch.md       | 47 +++++++++++++++++++++++++++++++++----------
 2 files changed, 52 insertions(+), 11 deletions(-)
 create mode 100644 autoresearch.ideas.md

diff --git a/autoresearch.ideas.md b/autoresearch.ideas.md
new file mode 100644
index 0000000000000..3ff482776cbad
--- /dev/null
+++ b/autoresearch.ideas.md
@@ -0,0 +1,16 @@
+# Autoresearch Ideas Backlog
+
+## High Priority (user-suggested)
+- **Stack on_push/on_pop callbacks** — the HTML processor stack operations have push/pop callbacks. If these fire during tokenization (even indirectly), they could be significant overhead. Investigate whether any stack operations happen in the tag processor's read-only path, or whether these only apply to the HTML processor's tree-building.
+- **Bookmark on_destroy callback** — bookmarks may have cleanup behavior that adds overhead. Check if any bookmark operations happen during pure tokenization.
+
+## Medium Priority
+- **Lazy token_length** — derive from bytes_already_parsed - token_starts_at instead of writing per token. Saves ~1M writes/pass. Requires changing all read sites.
+- **Lazy is_closing_tag** — derive from html bytes. Saves 1 write/tag but adds cost to reads.
+- **Deferred property writes with lazy flush** — save all non-essential writes, flush on demand. Big win for read-only, slight overhead for read-write. Protected properties can't be deferred.
+- **Single boolean for modification check** — replace 2 array reads with 1 boolean read in hot loop.
+
+## Low Priority / Speculative
+- **Integer state constants** — replace string comparisons with int. API-breaking for protected parser_state.
+- **Packed tag name properties** — combine tag_name_starts_at + tag_name_length into single int.
+- **Static variable caching** — cache html/doc_length across calls.
diff --git a/autoresearch.md b/autoresearch.md
index b18bd49470442..3636b973f1433 100644
--- a/autoresearch.md
+++ b/autoresearch.md
@@ -47,7 +47,6 @@ Optimize `WP_HTML_Tag_Processor::next_token()` tokenization throughput on html-s
 13. **Remove text_node_classification write from tag fast path** — never read for tag tokens.
 14. **Use null text_starts_at for tags** — allows removing text_length=0 write. get_modifiable_text() returns '' on null text_starts_at.
 15. **Avoid redundant bytes_already_parsed property read** — use local $was_at for $at when no lexical updates.
-
 16. **Remove attribute_scan_from property** — compute scan position as tag_name_starts_at + tag_name_length on demand in ensure_attributes_parsed(). Eliminates property and 3 writes.
 17. **Remove attributes_parsed write from text nodes** — all callers of ensure_attributes_parsed() guard with STATE_MATCHED_TAG check, so the flag is never read for non-tag tokens.
 18. **Short-circuit closing tags before after_tag_match** — closing tags never need special element processing. Return early using local $is_closer instead of reading property through the shared label.
@@ -55,19 +54,45 @@ Optimize `WP_HTML_Tag_Processor::next_token()` tokenization throughput on html-s
 20. **Skip strpos when at '<'** — check for '<' at current position before calling strpos(). Tags (~63% of tokens) start at '<' and skip the function call entirely.
 21. **Remove text_starts_at null write for tags** — use bounds check (text_starts_at < token_starts_at) in get_modifiable_text() to detect stale text instead of proactively nulling.
 22. **Restructure get_tag() for state-based dispatch** — check STATE_MATCHED_TAG first instead of null check on tag_name_starts_at. Allows skipping tag_name null writes for text nodes (~756K writes eliminated).
+23. **Replace attributes_parsed boolean with version-based staleness check** — use attributes_parsed_at integer compared against token_starts_at. Eliminates ~646K attributes_parsed=false writes per parse iteration.
+24. **Pre-filter special element length in fast path before goto** — check tag name length (3,5,6,7,8) before goto after_tag_match. Tags with lengths 1,2,4 (88% of all tags: a, p, br, li, span, code, etc.) return immediately.
+25. **Merge STATE_INCOMPLETE_INPUT check into bounds check** — remove dedicated parser_state read at loop start. Set bytes_already_parsed=doc_length on incomplete input so the existing bounds check handles it. Eliminates 1 property read per token.
 
-### Current: ~330ms (52.8% faster)
+### Current: ~316ms (54.8% faster)
 
 ### Dead Ends
-- **First-letter bitwise OR + 7 comparisons** — replacing strspn('iIlLnNpPsStTxX',...) was WORSE (655→605ms regression). PHP bitwise string OR creates allocation; 7 comparisons slower than one C-level strspn.
-- **substr_compare for special element names** — replacing get_tag()+switch with substr_compare+switch-on-length showed no measurable improvement. The special element check is already rare (filtered by length + first letter). Added code complexity for zero gain.
-- **Simplified closer detection** — removing ternary `$is_closer ? 1 : 0` by computing $tag_at incrementally. Neutral result.
-- **Local vars for after_tag_match** — passing tag_length/tag_at as locals through the goto label. Neutral — property reads are hot in PHP's cache.
-- **Pass $at parameter to skip_attributes_and_find_closer** — extra function parameter overhead cancels the property write/read savings.
+- **First-letter bitwise OR + 7 comparisons** — replacing strspn('iIlLnNpPsStTxX',...) was WORSE. PHP bitwise string OR creates allocation; 7 comparisons slower than one C-level strspn.
+- **substr_compare for special element names** — no measurable improvement. The special element check is already rare.
+- **Simplified closer detection** — removing ternary `$is_closer ? 1 : 0` by computing $tag_at incrementally. Neutral.
+- **Local vars for after_tag_match** — passing tag_length/tag_at as locals through the goto label. Neutral.
+- **Pass $at parameter to skip_attributes_and_find_closer** — extra function parameter overhead cancels savings.
+- **Add strspn first-letter check to fast path filter** — adding strspn('iIlLnNpPsStTxX') alongside the length filter. Neutral — length filter already catches 88% of tags.
+- **Conditional text_node_classification write** — `if (TEXT_IS_GENERIC !== $this->text_node_classification)` before writing. Neutral — the conditional read costs the same as the write.
+- **1-byte text node lookahead** — check `$html[$at+1] === '<'` before calling strpos. WORSE (~15ms regression). The extra branch on every text path hurts; strpos with memchr is already very fast for single bytes.
+- **Length-3 first-letter filter in fast path** — for len=3 tags, check first letter against p/P/x/X (only PRE/XMP are special). Neutral — extra comparisons offset the savings from avoiding after_tag_match for ~74K div tags.
+- **Single boolean has_pending_updates flag** — replace `classname_updates || lexical_updates` (2 reads) with a single boolean. Too invasive: 16+ modification sites need `$this->has_pending_updates = true`. Correctness concerns with clearing the flag.
+- **Defer classname_updates check** — only check lexical_updates in hot loop, defer classname conversion. Incorrect: classname conversion requires current tag's attributes; deferring past cursor advance would use wrong attributes.
 
 ### Architecture Notes
-- **Token distribution**: ~646K tags, ~378K text nodes, ~247K attributes across ~1M tokens in html-standard.html
+- **Token distribution**: ~646K tags (325K openers, 321K closers), ~378K text nodes, ~247K attributes, 1 other, across ~1M tokens in html-standard.html
+- **Tag name length distribution**: len=1: 184K (28%), len=2: 211K (33%), len=3: 75K (12%), len=4: 174K (27%), len=5+: 4K (0.6%). Length filter catches 88% of tags.
+- **Attribute distribution**: ~517K tags without attributes, ~129K with attributes (~20%)
+- **Text node length**: 73K are 1 byte, 22K are 2 bytes, 30K are 3 bytes, etc. Most are short (whitespace between tags).
 - **Text-tag alternation**: Most tokens alternate text→tag→text→tag. The strpos skip optimization exploits this — tags start at '<' so no search is needed.
-- **PHP overhead dominates**: At 330ms / 1M tokens = 330ns/token. Property reads (~20-30ns each), property writes (~20-30ns), method dispatch (~50-100ns) are the main costs.
-- **next_token()→base_class_next_token() dispatch**: ~1M extra method calls, but cannot be eliminated because get_updated_html() needs the base implementation.
-- **Remaining writes per token**: text nodes ~7 writes, tags ~8 writes. Total ~8M writes per benchmark run at ~20ns each = ~160ms (48% of total).
+- **PHP overhead dominates**: At 316ms / 1M tokens = 316ns/token (per pass, 3 passes). Property reads (~5-10ns each), property writes (~10-15ns), method dispatch (~10-20ns for JIT-optimized private calls).
+- **next_token()→base_class_next_token() dispatch**: ~1M extra method calls, cannot be eliminated because get_updated_html() needs the base implementation.
+- **Remaining property reads per token (hot path start)**: bytes_already_parsed, classname_updates, lexical_updates, html, doc_length = 5 reads.
+- **Remaining property writes per token**: text nodes ~7, tags ~7. Total ~7M writes per benchmark pass.
+- **Protected properties constrain optimization**: parser_state and text_node_classification are protected (read directly by WP_HTML_Processor subclass). Cannot defer or version-gate these without changing the subclass, which is off-limits.
+- **after_tag() is dead code**: the method exists but is never called (fully inlined into base_class_next_token). Could be removed, but cosmetic.
+
+### Unexplored Ideas
+- **Stack operations on_push/on_pop callbacks** — the HTML processor's open_elements stack has push/pop callbacks that fire during tree-building. These are not in scope for the tag processor benchmark, but if the benchmark changes to use the HTML processor, these callbacks could be significant overhead.
+- **Bookmark on_destroy callback** — bookmarks have cleanup behavior. Not in hot path for read-only benchmark.
+- **Lazy token_length computation** — token_length = bytes_already_parsed - token_starts_at for all fast-path tokens. Could eliminate 1 write per token (~1M writes/pass). But read sites are numerous and some (special elements, bookmarks) set token_length independently. Would need to change all read sites.
+- **Lazy is_closing_tag computation** — derive from html[token_starts_at+1] === '/'. Saves 1 write per tag but adds 2 property reads + 1 byte access per read (many read sites including subclass).
+- **Integer state constants** — replace string parser_state constants with integers for faster comparison. But parser_state is protected and used by external code with string comparisons.
+- **Packed tag name properties** — store tag_name_starts_at and tag_name_length in a single 64-bit int. Saves 1 write, adds shift/mask to reads. Only useful if reads are rare (true for fast-path-filtered tags).
+- **Static variable caching for $html/$doc_length** — cache across method calls. Saves ~1 property read/call. Shared across instances (problematic for multi-instance usage).
+- **Deferred property writes with lazy flush** — store pending token data, only write to properties when external code reads them. Saves all property writes for read-only benchmark. Requires flush checks in all getter methods. Protected properties can't be deferred.
+- **Eliminate classname_updates read in hot loop** — both classname_updates and lexical_updates are always empty in the benchmark. Replacing 2 array truthiness checks with a single boolean flag would save 1 read/token, but requires setting the flag in 16+ update methods.

From a8527fdec8750bbdbd78a3154e7af7bdf5854e32 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 18:51:51 +0100
Subject: [PATCH 41/43] Perf: Eliminate token_length property writes by
 computing on demand

token_length = bytes_already_parsed - token_starts_at holds at every
write site. Derive the value on demand at the ~6 read sites instead of
writing it at ~20 write sites (~1M writes/pass eliminated).
---
 .../html-api/class-wp-html-tag-processor.php  | 37 +++++--------------
 1 file changed, 9 insertions(+), 28 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 009709934f27d..7e6254bb5b935 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1032,7 +1032,6 @@ private function base_class_next_token(): bool {
 				$this->parser_state             = self::STATE_TEXT_NODE;
 				$this->token_starts_at          = $was_at;
 				$this->text_starts_at           = $was_at;
-				$this->token_length             = $doc_length - $was_at;
 				$this->text_length              = $doc_length - $was_at;
 				$this->text_node_classification = self::TEXT_IS_GENERIC;
 				$this->bytes_already_parsed     = $doc_length;
@@ -1055,7 +1054,6 @@ private function base_class_next_token(): bool {
 			$this->parser_state             = self::STATE_TEXT_NODE;
 			$this->token_starts_at          = $was_at;
 			$this->text_starts_at           = $was_at;
-			$this->token_length             = $at - $was_at;
 			$this->text_length              = $at - $was_at;
 			$this->text_node_classification = self::TEXT_IS_GENERIC;
 			$this->bytes_already_parsed     = $at;
@@ -1094,7 +1092,6 @@ private function base_class_next_token(): bool {
 
 			$this->parser_state         = self::STATE_MATCHED_TAG;
 			$this->bytes_already_parsed = $tag_ends_at + 1;
-			$this->token_length         = $tag_ends_at + 1 - $at;
 
 			if ( $is_closer ) {
 				return true;
@@ -1148,7 +1145,6 @@ private function base_class_next_token(): bool {
 		}
 		$this->parser_state         = self::STATE_MATCHED_TAG;
 		$this->bytes_already_parsed = $tag_ends_at + 1;
-		$this->token_length         = $this->bytes_already_parsed - $this->token_starts_at;
 
 		if ( $this->is_closing_tag ) {
 			return true;
@@ -1218,7 +1214,7 @@ private function base_class_next_token(): bool {
 		 */
 		$tag_name_starts_at   = $this->tag_name_starts_at;
 		$tag_name_length      = $this->tag_name_length;
-		$tag_ends_at          = $this->token_starts_at + $this->token_length;
+		$tag_ends_at          = $this->bytes_already_parsed;
 		$this->ensure_attributes_parsed();
 		$attributes           = $this->attributes;
 		$duplicate_attributes = $this->duplicate_attributes;
@@ -1272,7 +1268,6 @@ private function base_class_next_token(): bool {
 		 * the inner content of the tag.
 		 */
 		$this->token_starts_at      = $was_at;
-		$this->token_length         = $this->bytes_already_parsed - $this->token_starts_at;
 		$this->text_starts_at       = $tag_ends_at;
 		$this->text_length          = $this->tag_name_starts_at - $this->text_starts_at;
 		$this->tag_name_starts_at   = $tag_name_starts_at;
@@ -1495,7 +1490,7 @@ public function set_bookmark( $name ): bool {
 			return false;
 		}
 
-		$this->bookmarks[ $name ] = new WP_HTML_Span( $this->token_starts_at, $this->token_length );
+		$this->bookmarks[ $name ] = new WP_HTML_Span( $this->token_starts_at, $this->bytes_already_parsed - $this->token_starts_at );
 
 		return true;
 	}
@@ -1887,7 +1882,6 @@ private function parse_next_tag(): bool {
 				$this->parser_state         = self::STATE_TEXT_NODE;
 				$this->token_starts_at      = $was_at;
 				$this->text_starts_at       = $was_at;
-				$this->token_length         = $at - $was_at;
 				$this->text_length          = $at - $was_at;
 				$this->bytes_already_parsed = $at;
 				return true;
@@ -1967,7 +1961,6 @@ private function parse_next_tag(): bool {
 						 */
 						$this->parser_state = self::STATE_COMMENT;
 						$this->comment_type = self::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT;
-						$this->token_length = $closer_at + $span_of_dashes + 1 - $this->token_starts_at;
 
 						// Only provide modifiable text if the token is long enough to contain it.
 						if ( $span_of_dashes >= 2 ) {
@@ -1998,7 +1991,6 @@ private function parse_next_tag(): bool {
 						if ( $closer_at + 2 < $doc_length && '>' === $html[ $closer_at + 2 ] ) {
 							$this->parser_state         = self::STATE_COMMENT;
 							$this->comment_type         = self::COMMENT_AS_HTML_COMMENT;
-							$this->token_length         = $closer_at + 3 - $this->token_starts_at;
 							$this->text_starts_at       = $this->token_starts_at + 4;
 							$this->text_length          = $closer_at - $this->text_starts_at;
 							$this->bytes_already_parsed = $closer_at + 3;
@@ -2012,7 +2004,6 @@ private function parse_next_tag(): bool {
 						) {
 							$this->parser_state         = self::STATE_COMMENT;
 							$this->comment_type         = self::COMMENT_AS_HTML_COMMENT;
-							$this->token_length         = $closer_at + 4 - $this->token_starts_at;
 							$this->text_starts_at       = $this->token_starts_at + 4;
 							$this->text_length          = $closer_at - $this->text_starts_at;
 							$this->bytes_already_parsed = $closer_at + 4;
@@ -2044,7 +2035,6 @@ private function parse_next_tag(): bool {
 					}
 
 					$this->parser_state         = self::STATE_DOCTYPE;
-					$this->token_length         = $closer_at + 1 - $this->token_starts_at;
 					$this->text_starts_at       = $this->token_starts_at + 9;
 					$this->text_length          = $closer_at - $this->text_starts_at;
 					$this->bytes_already_parsed = $closer_at + 1;
@@ -2072,7 +2062,6 @@ private function parse_next_tag(): bool {
 					$this->parser_state         = self::STATE_CDATA_NODE;
 					$this->text_starts_at       = $at + 9;
 					$this->text_length          = $closer_at - $this->text_starts_at;
-					$this->token_length         = $closer_at + 3 - $this->token_starts_at;
 					$this->bytes_already_parsed = $closer_at + 3;
 					return true;
 				}
@@ -2091,7 +2080,6 @@ private function parse_next_tag(): bool {
 
 				$this->parser_state         = self::STATE_COMMENT;
 				$this->comment_type         = self::COMMENT_AS_INVALID_HTML;
-				$this->token_length         = $closer_at + 1 - $this->token_starts_at;
 				$this->text_starts_at       = $this->token_starts_at + 2;
 				$this->text_length          = $closer_at - $this->text_starts_at;
 				$this->bytes_already_parsed = $closer_at + 1;
@@ -2112,7 +2100,7 @@ private function parse_next_tag(): bool {
 				 *       and require the proper closing `]]>` in those cases.
 				 */
 				if (
-					$this->token_length >= 10 &&
+					$this->bytes_already_parsed - $this->token_starts_at >= 10 &&
 					'[' === $html[ $this->token_starts_at + 2 ] &&
 					'C' === $html[ $this->token_starts_at + 3 ] &&
 					'D' === $html[ $this->token_starts_at + 4 ] &&
@@ -2149,7 +2137,6 @@ private function parse_next_tag(): bool {
 				}
 
 				$this->parser_state         = self::STATE_PRESUMPTUOUS_TAG;
-				$this->token_length         = $at + 2 - $this->token_starts_at;
 				$this->bytes_already_parsed = $at + 2;
 				return true;
 			}
@@ -2168,7 +2155,6 @@ private function parse_next_tag(): bool {
 
 				$this->parser_state         = self::STATE_COMMENT;
 				$this->comment_type         = self::COMMENT_AS_INVALID_HTML;
-				$this->token_length         = $closer_at + 1 - $this->token_starts_at;
 				$this->text_starts_at       = $this->token_starts_at + 2;
 				$this->text_length          = $closer_at - $this->text_starts_at;
 				$this->bytes_already_parsed = $closer_at + 1;
@@ -2200,8 +2186,8 @@ private function parse_next_tag(): bool {
 				 *
 				 * @see https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PITarget
 				 */
-				if ( $this->token_length >= 5 && '?' === $html[ $closer_at - 1 ] ) {
-					$comment_text     = substr( $html, $this->token_starts_at + 2, $this->token_length - 4 );
+				if ( $closer_at + 1 - $this->token_starts_at >= 5 && '?' === $html[ $closer_at - 1 ] ) {
+					$comment_text     = substr( $html, $this->token_starts_at + 2, $closer_at + 1 - $this->token_starts_at - 4 );
 					$pi_target_length = strspn( $comment_text, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:_' );
 
 					if ( 0 < $pi_target_length ) {
@@ -2243,7 +2229,6 @@ private function parse_next_tag(): bool {
 				}
 
 				$this->parser_state         = self::STATE_FUNKY_COMMENT;
-				$this->token_length         = $closer_at + 1 - $this->token_starts_at;
 				$this->text_starts_at       = $this->token_starts_at + 2;
 				$this->text_length          = $closer_at - $this->text_starts_at;
 				$this->bytes_already_parsed = $closer_at + 1;
@@ -2259,9 +2244,8 @@ private function parse_next_tag(): bool {
 		 */
 		$this->parser_state         = self::STATE_TEXT_NODE;
 		$this->token_starts_at      = $was_at;
-		$this->token_length         = $doc_length - $was_at;
 		$this->text_starts_at       = $was_at;
-		$this->text_length          = $this->token_length;
+		$this->text_length          = $doc_length - $was_at;
 		$this->bytes_already_parsed = $doc_length;
 		return true;
 	}
@@ -2680,7 +2664,6 @@ private function after_tag(): void {
 		}
 
 		$this->token_starts_at          = null;
-		$this->token_length             = null;
 		$this->tag_name_starts_at       = null;
 		$this->tag_name_length          = null;
 		$this->text_starts_at           = 0;
@@ -2993,7 +2976,7 @@ public function seek( $bookmark_name ): bool {
 
 		if (
 			$this->token_starts_at === $existing_bookmark->start &&
-			$this->token_length === $existing_bookmark->length
+			$this->bytes_already_parsed - $this->token_starts_at === $existing_bookmark->length
 		) {
 			return true;
 		}
@@ -3692,7 +3675,7 @@ public function has_self_closing_flag(): bool {
 		 *     <figure />
 		 *             ^ this appears one character before the end of the closing ">".
 		 */
-		return '/' === $this->html[ $this->token_starts_at + $this->token_length - 2 ];
+		return '/' === $this->html[ $this->bytes_already_parsed - 2 ];
 	}
 
 	/**
@@ -3933,7 +3916,6 @@ public function subdivide_text_appropriately(): bool {
 		 */
 		$leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length );
 		if ( $leading_nulls > 0 ) {
-			$this->token_length             = $leading_nulls;
 			$this->text_length              = $leading_nulls;
 			$this->bytes_already_parsed     = $this->token_starts_at + $leading_nulls;
 			$this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE;
@@ -3966,7 +3948,6 @@ public function subdivide_text_appropriately(): bool {
 		if ( $at > $this->text_starts_at ) {
 			$new_length                     = $at - $this->text_starts_at;
 			$this->text_length              = $new_length;
-			$this->token_length             = $new_length;
 			$this->bytes_already_parsed     = $at;
 			$this->text_node_classification = self::TEXT_IS_WHITESPACE;
 			return true;
@@ -5164,7 +5145,7 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info {
 			return null;
 		}
 
-		return WP_HTML_Doctype_Info::from_doctype_token( substr( $this->html, $this->token_starts_at, $this->token_length ) );
+		return WP_HTML_Doctype_Info::from_doctype_token( substr( $this->html, $this->token_starts_at, $this->bytes_already_parsed - $this->token_starts_at ) );
 	}
 
 	/**

From a89e78d346abdf18f9e71f8d7e9f45eb5119e0be Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 19:05:59 +0100
Subject: [PATCH 42/43] Perf: Derive is_closing_tag from HTML bytes instead of
 storing per token

Eliminate is_closing_tag property writes (~646K per pass) by computing
'/' === html[token_starts_at + 1] at the ~8 read sites. Uses local
variable in parse_next_tag() for the slow path.
---
 .../html-api/class-wp-html-tag-processor.php  | 34 +++++++++----------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 7e6254bb5b935..f0bdc25990813 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1073,7 +1073,6 @@ private function base_class_next_token(): bool {
 			$after_name = $tag_at + $tag_length;
 
 			$this->token_starts_at     = $at;
-			$this->is_closing_tag      = $is_closer;
 			$this->tag_name_starts_at  = $tag_at;
 			$this->tag_name_length     = $tag_length;
 
@@ -1146,7 +1145,7 @@ private function base_class_next_token(): bool {
 		$this->parser_state         = self::STATE_MATCHED_TAG;
 		$this->bytes_already_parsed = $tag_ends_at + 1;
 
-		if ( $this->is_closing_tag ) {
+		if ( '/' === $html[ $this->token_starts_at + 1 ] ) {
 			return true;
 		}
 
@@ -1890,10 +1889,10 @@ private function parse_next_tag(): bool {
 			$this->token_starts_at = $at;
 
 			if ( $at + 1 < $doc_length && '/' === $html[ $at + 1 ] ) {
-				$this->is_closing_tag = true;
+				$is_closer = true;
 				++$at;
 			} else {
-				$this->is_closing_tag = false;
+				$is_closer = false;
 			}
 
 			/*
@@ -1934,7 +1933,7 @@ private function parse_next_tag(): bool {
 			 * `<!` transitions to markup declaration open state
 			 * https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
 			 */
-			if ( ! $this->is_closing_tag && '!' === $html[ $at + 1 ] ) {
+			if ( ! $is_closer && '!' === $html[ $at + 1 ] ) {
 				/*
 				 * `<!--` transitions to a comment state – apply further comment rules.
 				 * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
@@ -2131,7 +2130,7 @@ private function parse_next_tag(): bool {
 			 */
 			if ( '>' === $html[ $at + 1 ] ) {
 				// `<>` is interpreted as plaintext.
-				if ( ! $this->is_closing_tag ) {
+				if ( ! $is_closer ) {
 					++$at;
 					continue;
 				}
@@ -2145,7 +2144,7 @@ private function parse_next_tag(): bool {
 			 * `<?` transitions to a bogus comment state – skip to the nearest >
 			 * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
 			 */
-			if ( ! $this->is_closing_tag && '?' === $html[ $at + 1 ] ) {
+			if ( ! $is_closer && '?' === $html[ $at + 1 ] ) {
 				$closer_at = strpos( $html, '>', $at + 2 );
 				if ( false === $closer_at ) {
 					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
@@ -2213,7 +2212,7 @@ private function parse_next_tag(): bool {
 			 *
 			 * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
 			 */
-			if ( $this->is_closing_tag ) {
+			if ( $is_closer ) {
 				// No chance of finding a closer.
 				if ( $at + 3 > $doc_length ) {
 					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
@@ -2350,7 +2349,7 @@ private function parse_next_attribute( bool $store = true ): bool {
 
 		$this->bytes_already_parsed = $at;
 
-		if ( ! $store || $this->is_closing_tag ) {
+		if ( ! $store || '/' === $this->html[ $this->token_starts_at + 1 ] ) {
 			return true;
 		}
 
@@ -2587,7 +2586,7 @@ private function ensure_attributes_parsed(): void {
 		$this->attributes           = array();
 		$this->duplicate_attributes = null;
 
-		if ( null === $this->tag_name_starts_at || $this->is_closing_tag ) {
+		if ( null === $this->tag_name_starts_at || '/' === $this->html[ $this->token_starts_at + 1 ] ) {
 			return;
 		}
 
@@ -2668,7 +2667,6 @@ private function after_tag(): void {
 		$this->tag_name_length          = null;
 		$this->text_starts_at           = 0;
 		$this->text_length              = 0;
-		$this->is_closing_tag           = null;
 		$this->comment_type             = null;
 		$this->text_node_classification = self::TEXT_IS_GENERIC;
 		$this->attributes_parsed_at     = -1;
@@ -3197,7 +3195,7 @@ public function get_attribute( $name ) {
 	public function get_attribute_names_with_prefix( $prefix ): ?array {
 		if (
 			self::STATE_MATCHED_TAG !== $this->parser_state ||
-			$this->is_closing_tag
+			'/' === $this->html[ $this->token_starts_at + 1 ]
 		) {
 			return null;
 		}
@@ -3698,7 +3696,7 @@ public function has_self_closing_flag(): bool {
 	public function is_tag_closer(): bool {
 		return (
 			self::STATE_MATCHED_TAG === $this->parser_state &&
-			$this->is_closing_tag &&
+			'/' === $this->html[ $this->token_starts_at + 1 ] &&
 
 			/*
 			 * The BR tag can only exist as an opening tag. If something like `</br>`
@@ -4652,7 +4650,7 @@ private static function escape_javascript_script_contents( string $sourcecode ):
 	public function set_attribute( $name, $value ): bool {
 		if (
 			self::STATE_MATCHED_TAG !== $this->parser_state ||
-			$this->is_closing_tag
+			'/' === $this->html[ $this->token_starts_at + 1 ]
 		) {
 			return false;
 		}
@@ -4804,7 +4802,7 @@ public function set_attribute( $name, $value ): bool {
 	public function remove_attribute( $name ): bool {
 		if (
 			self::STATE_MATCHED_TAG !== $this->parser_state ||
-			$this->is_closing_tag
+			'/' === $this->html[ $this->token_starts_at + 1 ]
 		) {
 			return false;
 		}
@@ -4884,7 +4882,7 @@ public function remove_attribute( $name ): bool {
 	public function add_class( $class_name ): bool {
 		if (
 			self::STATE_MATCHED_TAG !== $this->parser_state ||
-			$this->is_closing_tag
+			'/' === $this->html[ $this->token_starts_at + 1 ]
 		) {
 			return false;
 		}
@@ -4926,7 +4924,7 @@ public function add_class( $class_name ): bool {
 	public function remove_class( $class_name ): bool {
 		if (
 			self::STATE_MATCHED_TAG !== $this->parser_state ||
-			$this->is_closing_tag
+			'/' === $this->html[ $this->token_starts_at + 1 ]
 		) {
 			return false;
 		}
@@ -5105,7 +5103,7 @@ private function parse_query( $query ) {
 	 * @return bool Whether the given tag and its attribute match the search criteria.
 	 */
 	private function matches(): bool {
-		if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) {
+		if ( '/' === $this->html[ $this->token_starts_at + 1 ] && ! $this->stop_on_tag_closers ) {
 			return false;
 		}
 

From 1ca3e9b9b4e5880712f7d167a7011500a53dde8a Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Fri, 13 Mar 2026 19:40:25 +0100
Subject: [PATCH 43/43] Perf: Use ctype_alpha() for ASCII letter checks

Replace inline range comparisons with ctype_alpha() which dispatches
to a single C-level function call. Applies to all four sites: fast path
tag check, fast path text validation, parse_next_tag() tag check, and
parse_next_tag() text validation.
---
 src/wp-includes/html-api/class-wp-html-tag-processor.php | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index f0bdc25990813..9368c72b8c380 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1042,7 +1042,7 @@ private function base_class_next_token(): bool {
 			$next_byte = $html[ $at + 1 ] ?? '';
 			if (
 				'!' !== $next_byte && '/' !== $next_byte && '?' !== $next_byte &&
-				( $next_byte < 'A' || ( $next_byte > 'Z' && $next_byte < 'a' ) || $next_byte > 'z' )
+				! ctype_alpha( $next_byte )
 			) {
 				/*
 				 * The '<' doesn't start a valid token. Fall through to
@@ -1067,7 +1067,7 @@ private function base_class_next_token(): bool {
 			$first_char = $html[ $at + 2 ] ?? '';
 		}
 
-		if ( ( $first_char >= 'a' && $first_char <= 'z' ) || ( $first_char >= 'A' && $first_char <= 'Z' ) ) {
+		if ( ctype_alpha( $first_char ) ) {
 			$tag_at     = $at + 1 + ( $is_closer ? 1 : 0 );
 			$tag_length = strcspn( $html, " \t\f\r\n/>", $tag_at );
 			$after_name = $tag_at + $tag_length;
@@ -1872,7 +1872,7 @@ private function parse_next_tag(): bool {
 				$next_byte = $html[ $at + 1 ] ?? '';
 				if (
 					'!' !== $next_byte && '/' !== $next_byte && '?' !== $next_byte &&
-					( $next_byte < 'A' || ( $next_byte > 'Z' && $next_byte < 'a' ) || $next_byte > 'z' )
+					! ctype_alpha( $next_byte )
 				) {
 					++$at;
 					continue;
@@ -1910,7 +1910,7 @@ private function parse_next_tag(): bool {
 			 * * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
 			 */
 			$first_char = $html[ $at + 1 ] ?? '';
-			if ( ( $first_char >= 'a' && $first_char <= 'z' ) || ( $first_char >= 'A' && $first_char <= 'Z' ) ) {
+			if ( ctype_alpha( $first_char ) ) {
 				++$at;
 				$this->parser_state         = self::STATE_MATCHED_TAG;
 				$this->tag_name_starts_at   = $at;