Skip to content

Commit 8107825

Browse files
committed
General: Add support for unicode email addresses in is_email
This adds support for the unicode address extensions in RFC 6532, adds unit tests for that, extends the documentation to explain the relationship between this code and the various specifications, and finally adds unit tests to ensure that the documentation's description of the code remains correct. Fixes #31992.
1 parent 04c4c81 commit 8107825

2 files changed

Lines changed: 22 additions & 3 deletions

File tree

src/wp-includes/formatting.php

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3523,7 +3523,21 @@ function convert_smilies( $text ) {
35233523
/**
35243524
* Verifies that an email is valid.
35253525
*
3526-
* Does not grok i18n domains. Not RFC compliant.
3526+
* The mostly matches what people think is the format of email
3527+
* addresses, and is close to all three current specifications.
3528+
*
3529+
* Email address syntax is specified in RFC 5322 for ASCII-only email
3530+
* and in RFC 6532 for unicode email (both unicode domains and
3531+
* localparts). In addition, the HTML WHATWG specification contains a
3532+
* third syntax which is used for HTML form input (except that major
3533+
* browsers deviate a little from the WHATWG specification).
3534+
*
3535+
* This function matches the WHATWG and RFC 6532 specifications fairly
3536+
* well, although there are some differences. " "@example.com (quote
3537+
* space quote at ...) is allowed by the RFCs and rejected by this
3538+
* code, while [email protected] is allowed by this code and prohibited
3539+
* by the RFCs. info@grå.org is allowed by this code and major
3540+
* browsers, but prohibited by WHATWG's regex (as of April 2023).
35273541
*
35283542
* @since 0.71
35293543
*
@@ -3567,7 +3581,7 @@ function is_email( $email, $deprecated = false ) {
35673581
* LOCAL PART
35683582
* Test for invalid characters.
35693583
*/
3570-
if ( ! preg_match( '/^[a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) ) {
3584+
if ( ! ( preg_match( '/^[a-zA-Z0-9\x80-\xff!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) && preg_match( '/^\X+$/', $local ) ) ) {
35713585
/** This filter is documented in wp-includes/formatting.php */
35723586
return apply_filters( 'is_email', false, $email, 'local_invalid_chars' );
35733587
}
@@ -3605,7 +3619,7 @@ function is_email( $email, $deprecated = false ) {
36053619
}
36063620

36073621
// Test for invalid characters.
3608-
if ( ! preg_match( '/^[a-z0-9-]+$/i', $sub ) ) {
3622+
if ( ! ( preg_match( '/^[a-z0-9\x80-\xff-]+$/i', $sub ) && preg_match( '/^\X+$/', $sub ) ) ) {
36093623
/** This filter is documented in wp-includes/formatting.php */
36103624
return apply_filters( 'is_email', false, $email, 'sub_invalid_chars' );
36113625
}

tests/phpunit/tests/formatting/isEmail.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ public function test_returns_the_email_address_if_it_is_valid() {
1414
1515
1616
17+
'info@grå.org',
18+
'grå@grå.org',
19+
"gr\u{0061}\u{030a}blå@grå.org",
20+
1721
);
1822
foreach ( $data as $datum ) {
1923
$this->assertSame( $datum, is_email( $datum ), $datum );
@@ -28,6 +32,7 @@ public function test_returns_false_if_given_an_invalid_email_address() {
2832
'com.exampleNOSPAMbob',
2933
'bob@your mom',
3034
35+
'" "@b.c',
3136
);
3237
foreach ( $data as $datum ) {
3338
$this->assertFalse( is_email( $datum ), $datum );

0 commit comments

Comments
 (0)