Skip to content

Commit a32ba5c

Browse files
committed
General: Add support for unicode email addresses in is_email
This adds support for the unicode address extensions in RFC 6532, adds unit tests for that, extends the documentation to explain the relationship between this code and the various specifications, and finally adds unit tests to ensure that the documentation's description of the code remains correct. Fixes #31992.
1 parent 654475f commit a32ba5c

2 files changed

Lines changed: 22 additions & 3 deletions

File tree

src/wp-includes/formatting.php

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3513,7 +3513,21 @@ function convert_smilies( $text ) {
35133513
/**
35143514
* Verifies that an email is valid.
35153515
*
3516-
* Does not grok i18n domains. Not RFC compliant.
3516+
* The mostly matches what people think is the format of email
3517+
* addresses, and is close to all three current specifications.
3518+
*
3519+
* Email address syntax is specified in RFC 5322 for ASCII-only email
3520+
* and in RFC 6532 for unicode email (both unicode domains and
3521+
* localparts). In addition, the HTML WHATWG specification contains a
3522+
* third syntax which is used for HTML form input (except that major
3523+
* browsers deviate a little from the WHATWG specification).
3524+
*
3525+
* This function matches the WHATWG and RFC 6532 specifications fairly
3526+
* well, although there are some differences. " "@example.com (quote
3527+
* space quote at ...) is allowed by the RFCs and rejected by this
3528+
* code, while [email protected] is allowed by this code and prohibited
3529+
* by the RFCs. info@grå.org is allowed by this code and major
3530+
* browsers, but prohibited by WHATWG's regex (as of April 2023).
35173531
*
35183532
* @since 0.71
35193533
*
@@ -3557,7 +3571,7 @@ function is_email( $email, $deprecated = false ) {
35573571
* LOCAL PART
35583572
* Test for invalid characters.
35593573
*/
3560-
if ( ! preg_match( '/^[a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) ) {
3574+
if ( ! ( preg_match( '/^[a-zA-Z0-9\x80-\xff!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) && preg_match( '/^\X+$/', $local ) ) ) {
35613575
/** This filter is documented in wp-includes/formatting.php */
35623576
return apply_filters( 'is_email', false, $email, 'local_invalid_chars' );
35633577
}
@@ -3595,7 +3609,7 @@ function is_email( $email, $deprecated = false ) {
35953609
}
35963610

35973611
// Test for invalid characters.
3598-
if ( ! preg_match( '/^[a-z0-9-]+$/i', $sub ) ) {
3612+
if ( ! ( preg_match( '/^[a-z0-9\x80-\xff-]+$/i', $sub ) && preg_match( '/^\X+$/', $sub ) ) ) {
35993613
/** This filter is documented in wp-includes/formatting.php */
36003614
return apply_filters( 'is_email', false, $email, 'sub_invalid_chars' );
36013615
}

tests/phpunit/tests/formatting/isEmail.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ public function test_returns_the_email_address_if_it_is_valid() {
1414
1515
1616
17+
'info@grå.org',
18+
'grå@grå.org',
19+
"gr\u{0061}\u{030a}blå@grå.org",
20+
1721
);
1822
foreach ( $data as $datum ) {
1923
$this->assertSame( $datum, is_email( $datum ), $datum );
@@ -28,6 +32,7 @@ public function test_returns_false_if_given_an_invalid_email_address() {
2832
'com.exampleNOSPAMbob',
2933
'bob@your mom',
3034
35+
'" "@b.c',
3136
);
3237
foreach ( $data as $datum ) {
3338
$this->assertFalse( is_email( $datum ), $datum );

0 commit comments

Comments
 (0)