Skip to content

Commit 5d21c70

Browse files
committed
General: Add support for unicode email addresses in is_email
This adds support for the unicode address extensions in RFC 6532, adds unit tests for that, extends the documentation to explain the relationship between this code and the various specifications, and finally adds unit tests to ensure that the documentation's description of the code remains correct. Fixes #31992.
1 parent ac8db41 commit 5d21c70

2 files changed

Lines changed: 24 additions & 3 deletions

File tree

src/wp-includes/formatting.php

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3540,7 +3540,21 @@ function convert_smilies( $text ) {
35403540
/**
35413541
* Verifies that an email is valid.
35423542
*
3543-
* Does not grok i18n domains. Not RFC compliant.
3543+
* The mostly matches what people think is the format of email
3544+
* addresses, and is close to all three current specifications.
3545+
*
3546+
* Email address syntax is specified in RFC 5322 for ASCII-only email
3547+
* and in RFC 6532 for unicode email (both unicode domains and
3548+
* localparts). In addition, the HTML WHATWG specification contains a
3549+
* third syntax which is used for HTML form input (except that major
3550+
* browsers deviate a little from the WHATWG specification).
3551+
*
3552+
* This function matches the WHATWG and RFC 6532 specifications fairly
3553+
* well, although there are some differences. " "@example.com (quote
3554+
* space quote at ...) is allowed by the RFCs and rejected by this
3555+
* code, while [email protected] is allowed by this code and prohibited
3556+
* by the RFCs. info@grå.org is allowed by this code and major
3557+
* browsers, but prohibited by WHATWG's regex (as of April 2023).
35443558
*
35453559
* @since 0.71
35463560
*
@@ -3584,7 +3598,8 @@ function is_email( $email, $deprecated = false ) {
35843598
* LOCAL PART
35853599
* Test for invalid characters.
35863600
*/
3587-
if ( ! preg_match( '/^[a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) ) {
3601+
if ( ! ( preg_match( '/^[a-zA-Z0-9\x80-\xff!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) &&
3602+
preg_match( '/^\X+$/', $local ) ) ) {
35883603
/** This filter is documented in wp-includes/formatting.php */
35893604
return apply_filters( 'is_email', false, $email, 'local_invalid_chars' );
35903605
}
@@ -3622,7 +3637,8 @@ function is_email( $email, $deprecated = false ) {
36223637
}
36233638

36243639
// Test for invalid characters.
3625-
if ( ! preg_match( '/^[a-z0-9-]+$/i', $sub ) ) {
3640+
if ( ! ( preg_match( '/^[a-z0-9\x80-\xff-]+$/i', $sub ) &&
3641+
preg_match( '/^\X+$/', $sub ) ) ) {
36263642
/** This filter is documented in wp-includes/formatting.php */
36273643
return apply_filters( 'is_email', false, $email, 'sub_invalid_chars' );
36283644
}

tests/phpunit/tests/formatting/isEmail.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ public function test_returns_the_email_address_if_it_is_valid() {
1414
1515
1616
17+
'info@grå.org',
18+
'grå@grå.org',
19+
"gr\u{0061}\u{030a}blå@grå.org",
20+
1721
);
1822
foreach ( $data as $datum ) {
1923
$this->assertSame( $datum, is_email( $datum ), $datum );
@@ -28,6 +32,7 @@ public function test_returns_false_if_given_an_invalid_email_address() {
2832
'com.exampleNOSPAMbob',
2933
'bob@your mom',
3034
35+
'" "@b.c',
3136
);
3237
foreach ( $data as $datum ) {
3338
$this->assertFalse( is_email( $datum ), $datum );

0 commit comments

Comments
 (0)