@@ -1518,7 +1518,7 @@ private function consume_ident_start_codepoint( $at ): int {
15181518 }
15191519
15201520 $ codepoint_byte_length = $ new_at - $ at ;
1521- $ codepoint = utf8_ord ( substr ( $ this ->css , $ at , $ codepoint_byte_length ) );
1521+ $ codepoint = self :: utf8_ord ( substr ( $ this ->css , $ at , $ codepoint_byte_length ) );
15221522 if ( null !== $ codepoint && $ codepoint >= 0x80 ) {
15231523 return $ codepoint_byte_length ;
15241524 }
@@ -1661,7 +1661,7 @@ private function decode_escape_at( int $offset, &$bytes_consumed ): string {
16611661
16621662 $ bytes_consumed = $ at - $ offset ;
16631663 // Convert the hex digits to a UTF-8 string.
1664- return codepoint_to_utf8_bytes ( hexdec ( $ hex ) );
1664+ return WP_HTML_Decoder:: code_point_to_utf8_bytes ( hexdec ( $ hex ) );
16651665 }
16661666
16671667 // Anything else.
@@ -1810,4 +1810,32 @@ private function check_if_3_code_points_start_an_ident_sequence( int $offset ):
18101810
18111811 return $ this ->consume_ident_start_codepoint ( $ offset ) > 0 || $ this ->is_valid_escape ( $ offset );
18121812 }
1813+
1814+ /**
1815+ * Convert a UTF-8 byte sequence to its Unicode codepoint.
1816+ *
1817+ * @param string $character UTF-8 encoded byte sequence representing a single Unicode character.
1818+ *
1819+ * @return int Unicode codepoint.
1820+ */
1821+ private static function utf8_ord ( string $ character ): int {
1822+ // Convert the byte sequence to its binary representation.
1823+ $ bytes = unpack ( 'C* ' , $ character );
1824+
1825+ // Initialize the codepoint.
1826+ $ codepoint = 0 ;
1827+
1828+ // Calculate the codepoint based on the number of bytes.
1829+ if ( 1 === count ( $ bytes ) ) {
1830+ $ codepoint = $ bytes [1 ];
1831+ } elseif ( 2 === count ( $ bytes ) ) {
1832+ $ codepoint = ( ( $ bytes [1 ] & 0x1F ) << 6 ) | ( $ bytes [2 ] & 0x3F );
1833+ } elseif ( 3 === count ( $ bytes ) ) {
1834+ $ codepoint = ( ( $ bytes [1 ] & 0x0F ) << 12 ) | ( ( $ bytes [2 ] & 0x3F ) << 6 ) | ( $ bytes [3 ] & 0x3F );
1835+ } elseif ( 4 === count ( $ bytes ) ) {
1836+ $ codepoint = ( ( $ bytes [1 ] & 0x07 ) << 18 ) | ( ( $ bytes [2 ] & 0x3F ) << 12 ) | ( ( $ bytes [3 ] & 0x3F ) << 6 ) | ( $ bytes [4 ] & 0x3F );
1837+ }
1838+
1839+ return $ codepoint ;
1840+ }
18131841}
0 commit comments