@@ -2129,36 +2129,39 @@ function sanitize_file_name( $filename ) {
21292129}
21302130
21312131/**
2132- * Returns true if the string contains no more than one unicode
2133- * script, and false if it contains two or more. This only considers
2134- * alphabetic characters.
2132+ * Validates that a string contains only characters from a single unicode script.
21352133 *
2136- * This returns true for an empty string.
2134+ * The function only considers alphabetic characters. It returns true if a string
2135+ * contains no more than one unicode script, and false if it contains two or more.
2136+ * An empty string is considered to contain no scripts, and thus returns true.
21372137 *
21382138 * IntlChar does not support returning the script property defined by
2139- * https://www.unicode.org/reports/tr24/, so this implementation uses
2140- * a workaround. Some of the old scripts have several code blocks, but
2141- * the scripts currently being added have only one, since the
2142- * committee has grown better at estimating the necessary size.
2143- *
2144- * This maps the known extension blocks ("latin extended a" etc) to
2145- * the first block for that script, and then checks that the string
2146- * uses only a single block. This works for the scripts currently in
2147- * Unicode, and will work for future scripts as long as the committee
2148- * keeps estimating high enough, so there's only one block for each
2149- * future script.
2139+ * https://www.unicode.org/reports/tr24/, so this implementation uses a workaround.
2140+ * It maps the known extension blocks ("latin extended a" etc) to the first block
2141+ * for that script, and then checks that the string uses only a single block.
2142+ *
2143+ * This works for the scripts currently in Unicode, and should continue to work for
2144+ * future scripts as long as each new script needs a single code block. While older
2145+ * scripts may have multiple blocks, the Unicode committee has grown better at
2146+ * estimating sizes high enough so that only one block is needed.
21502147 *
21512148 * @since 6.9.0
21522149 *
2153- * @param $input A string to check
2154- * @return true if all letters in the string belong to the same unicode script, and false if letters fromm two more more scripts are included.
2150+ * @param string $input A string to check.
2151+ * @return bool True if all letters in the string belong to the same unicode
2152+ * script or if the string is empty.
2153+ * False if letters from two more more scripts are included.
21552154 */
2155+ function uses_single_unicode_script ( string $ input ): bool {
2156+ if ( '' === $ input ) {
2157+ return true ;
2158+ }
21562159
2157- function uses_single_unicode_script ( $ input ) {
21582160 if ( version_compare ( PHP_VERSION , '7.4.0 ' , '< ' ) ) {
2159- /* mb_str_split is new in 7.4; accept ASCII only in 7.2-3 */
2160- return preg_match ( '/^[a-zA-Z0-9 _.\-@]+$/i ' , $ input );
2161+ // Since mb_str_split is not available in PHP < 7.4 we can only check ASCII characters.
2162+ return ( bool ) preg_match ( '/^[a-zA-Z0-9 _.\-@]+$/i ' , $ input );
21612163 }
2164+
21622165 $ block = 0 ;
21632166 // phpcs:ignore PHPCompatibility.FunctionUse.NewFunctions.mb_str_splitFound -- old versions of PHP are handled above
21642167 foreach ( mb_str_split ( $ input ) as $ cp ) {
@@ -2248,6 +2251,7 @@ function uses_single_unicode_script( $input ) {
22482251 }
22492252 }
22502253 }
2254+
22512255 return true ;
22522256}
22532257
0 commit comments