From ae846193a1b1f44d46b908220303b62e97e3a919 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 25 Sep 2025 13:39:10 -0500 Subject: [PATCH 1/2] HTML API: Introduce wp_split_class_names(). This patch introduces a new CSS helper module containing a new function, `wp_split_class_names()`. This function wraps some code to rely on the HTML API to take an HTML `class` attribute value and return a `Generator` to iterate over the classes in that value. Many existing functions perform ad-hoc parsing of CSS class names, usually by splitting on a space character. However, there are issues with this approach: - There is no decoding of HTML character references, which is normative inside HTML attributes. - There is no handling of null bytes. - Class names can be split by more than just the space character. - There is no handling of duplicates, and while mostly benign, code forgetting to account for duplicates can lead to defects. The new function handles the nuances to let developers focus on reading CSS class names, adding new class names, and removing class names. This serves a middleground between legacy code interacting with CSS class names in isolation and code processing full HTML documents. --- src/wp-includes/html-api/css-helpers.php | 58 ++++++++++++++++++++++++ src/wp-settings.php | 1 + 2 files changed, 59 insertions(+) create mode 100644 src/wp-includes/html-api/css-helpers.php diff --git a/src/wp-includes/html-api/css-helpers.php b/src/wp-includes/html-api/css-helpers.php new file mode 100644 index 0000000000000..d07ae4576290e --- /dev/null +++ b/src/wp-includes/html-api/css-helpers.php @@ -0,0 +1,58 @@ + Use this in a foreach loop to iterate over the class names. + */ +function wp_split_css_class_list( $class_attribute_string ): Generator { + if ( '' === $class_attribute_string || ! is_string( $class_attribute_string ) ) { + return; + } + + // Get these from the HTML API to avoid ad-hoc parsing HTML or CSS class names. + $processor = new WP_HTML_Tag_Processor( '' ); + $processor->next_token(); + $processor->set_attribute( 'class', $class_attribute_string ); + + foreach ( $processor->class_list() as $class_name ) { + yield $class_name; + } +} diff --git a/src/wp-settings.php b/src/wp-settings.php index 8ad02ffe8fd8b..93bdffa013db8 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -270,6 +270,7 @@ require ABSPATH . WPINC . '/html-api/class-wp-html-stack-event.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php'; +require ABSPATH . WPINC . '/html-api/css-helpers.php'; require ABSPATH . WPINC . '/class-wp-block-processor.php'; require ABSPATH . WPINC . '/class-wp-http.php'; require ABSPATH . WPINC . '/class-wp-http-streams.php'; From e5a3a6282cbfaabdc6fd0453856e565ba5640467 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 9 Oct 2025 20:19:12 -0700 Subject: [PATCH 2/2] Reimagine as HTML API export. --- .../html-api/class-wp-html-tag-processor.php | 61 ++++++++++++++++--- src/wp-includes/html-api/css-helpers.php | 58 ------------------ src/wp-settings.php | 1 - 3 files changed, 53 insertions(+), 67 deletions(-) delete mode 100644 src/wp-includes/html-api/css-helpers.php diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 31c4bc8a10654..3c9f247790822 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1187,25 +1187,70 @@ public function class_list() { return; } - $seen = array(); + return self::parse_class_list( $class, $this->compat_mode ); + } - $is_quirks = self::QUIRKS_MODE === $this->compat_mode; + /** + * Generator for a foreach loop to step through each class name for the matched tag. + * + * This generator function is designed to be used inside a "foreach" loop. + * + * Example: + * + * $class_list = 'free <egg<\tlang-en'; + * foreach ( WP_HTML_Tag_Processor::parse_class_list( $class_list ) as $class_name ) { + * echo "{$class_name} "; + * } + * // Outputs: "free lang-en " + * + * The default behavior is normative for HTML5 documents in “no-quirks” mode. For + * rare documents with “quirks mode” DOCTYPE declarations, pass {@see self::QUIRKS_MODE} + * as the compatibility mode for ASCII-case-insensitive comparison of class names. Use + * this only when certain that the containing document is in no-quirks mode. + * + * Example: + * + * $class_list = 'wide naRRow WIDE Wide narrow'; + * $classes = WP_HTML_Tag_Processor::parse_class_list( $class_list ); + * $classes = iterator_to_array( $classes ); + * $classes === array( 'wide', 'naRRow', 'WIDE', 'Wide', 'narrow' ); + * + * $class_list = 'wide WIDE Wide'; + * $classes = WP_HTML_Tag_Processor::parse_class_list( $class_list, WP_HTML_Tag_Processor::QUIRKS_MODE ); + * $classes = iterator_to_array( $classes ); + * $classes === array( 'wide', 'naRRow' ); + * + * @since 6.9.0 + * + * @param string $class_list Contains a full decoded HTML `class` attribute, or plain + * list of space-separated CSS class names. + * @param string|null $compat_mode Optional. Specifies how to compare class names, whether + * byte-for-byte or ASCII-case-insensitively. Default is + * NO_QUIRKS_MODE, which compares byte for byte. + * @return Generator Iterates over each unique CSS class name in the given input list in order. + */ + public static function parse_class_list( $class_list, $compat_mode = self::NO_QUIRKS_MODE ) { + if ( '' === $class_list || ! is_string( $class_list ) ) { + return; + } - $at = 0; - while ( $at < strlen( $class ) ) { + $seen = array(); + $is_quirks = self::QUIRKS_MODE === $compat_mode; + $at = 0; + while ( $at < strlen( $class_list ) ) { // Skip past any initial boundary characters. - $at += strspn( $class, " \t\f\r\n", $at ); - if ( $at >= strlen( $class ) ) { + $at += strspn( $class_list, " \t\f\r\n", $at ); + if ( $at >= strlen( $class_list ) ) { return; } // Find the byte length until the next boundary. - $length = strcspn( $class, " \t\f\r\n", $at ); + $length = strcspn( $class_list, " \t\f\r\n", $at ); if ( 0 === $length ) { return; } - $name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) ); + $name = str_replace( "\x00", "\u{FFFD}", substr( $class_list, $at, $length ) ); if ( $is_quirks ) { $name = strtolower( $name ); } diff --git a/src/wp-includes/html-api/css-helpers.php b/src/wp-includes/html-api/css-helpers.php deleted file mode 100644 index d07ae4576290e..0000000000000 --- a/src/wp-includes/html-api/css-helpers.php +++ /dev/null @@ -1,58 +0,0 @@ - Use this in a foreach loop to iterate over the class names. - */ -function wp_split_css_class_list( $class_attribute_string ): Generator { - if ( '' === $class_attribute_string || ! is_string( $class_attribute_string ) ) { - return; - } - - // Get these from the HTML API to avoid ad-hoc parsing HTML or CSS class names. - $processor = new WP_HTML_Tag_Processor( '' ); - $processor->next_token(); - $processor->set_attribute( 'class', $class_attribute_string ); - - foreach ( $processor->class_list() as $class_name ) { - yield $class_name; - } -} diff --git a/src/wp-settings.php b/src/wp-settings.php index 93bdffa013db8..8ad02ffe8fd8b 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -270,7 +270,6 @@ require ABSPATH . WPINC . '/html-api/class-wp-html-stack-event.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php'; -require ABSPATH . WPINC . '/html-api/css-helpers.php'; require ABSPATH . WPINC . '/class-wp-block-processor.php'; require ABSPATH . WPINC . '/class-wp-http.php'; require ABSPATH . WPINC . '/class-wp-http-streams.php';