|
| 1 | +<?php |
| 2 | +/** |
| 3 | + * Class 'WP_Email_Address'. |
| 4 | + * |
| 5 | + * @package WordPress |
| 6 | + * @since 7.0.0 |
| 7 | + */ |
| 8 | + |
| 9 | +/** |
| 10 | + * Represents a validated email address. |
| 11 | + * |
| 12 | + * Use the static factory method {@see WP_Email_Address::from_string()} to create instances |
| 13 | + * of this class rather than the constructor, which is private. |
| 14 | + * |
| 15 | + * @since 7.0.0 |
| 16 | + */ |
| 17 | +final class WP_Email_Address { |
| 18 | + |
| 19 | + /** |
| 20 | + * Regex for the local part when Unicode is not enabled. |
| 21 | + * |
| 22 | + * Matches the character set from the WHATWG email specification: |
| 23 | + * https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email) |
| 24 | + * |
| 25 | + * @since 7.0.0 |
| 26 | + * @var string |
| 27 | + */ |
| 28 | + const LOCAL_PART_ASCII_REGEX = '/^[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+$/'; |
| 29 | + |
| 30 | + /** |
| 31 | + * Regex for the local part when Unicode is enabled. |
| 32 | + * |
| 33 | + * Extends the WHATWG character set to allow Unicode letters and numbers, |
| 34 | + * and applies the same grapheme-cluster structure used for domain labels: |
| 35 | + * each cluster must open with a non-combining character. |
| 36 | + * |
| 37 | + * @since 7.0.0 |
| 38 | + * @var string |
| 39 | + */ |
| 40 | + const LOCAL_PART_UNICODE_REGEX = '/^([\p{L}\p{N}.!#$%&\'*+\/=?^_`{|}~-]\p{M}*)+$/u'; |
| 41 | + |
| 42 | + /** |
| 43 | + * Pattern for a single ASCII domain label (no dot). |
| 44 | + * |
| 45 | + * Matches a label from the WHATWG email specification: starts and ends with |
| 46 | + * a letter or digit; internal characters may include hyphens. |
| 47 | + * |
| 48 | + * @since 7.0.0 |
| 49 | + * @var string |
| 50 | + */ |
| 51 | + const DOMAIN_LABEL_ASCII = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?'; |
| 52 | + |
| 53 | + /** |
| 54 | + * Pattern for a single Unicode domain label (no dot). |
| 55 | + * |
| 56 | + * Extends the ASCII label pattern to allow Unicode letters and numbers, |
| 57 | + * with grapheme-cluster structure: each cluster must open with a letter or |
| 58 | + * digit (not a combining mark), followed by zero or more combining marks. |
| 59 | + * |
| 60 | + * @since 7.0.0 |
| 61 | + * @var string |
| 62 | + */ |
| 63 | + const DOMAIN_LABEL_UNICODE = '[\p{L}\p{N}]\p{M}*(?:(?:[\p{L}\p{N}-]\p{M}*)*[\p{L}\p{N}]\p{M}*)?'; |
| 64 | + |
| 65 | + /** |
| 66 | + * Regex for the domain when Unicode is not enabled. |
| 67 | + * |
| 68 | + * Assembled from {@see self::DOMAIN_LABEL_ASCII}: one label, then zero or |
| 69 | + * more dot-separated labels. |
| 70 | + * |
| 71 | + * @since 7.0.0 |
| 72 | + * @var string |
| 73 | + */ |
| 74 | + const DOMAIN_ASCII_REGEX = '/^' . self::DOMAIN_LABEL_ASCII . '(?:\.' . self::DOMAIN_LABEL_ASCII . ')*$/'; |
| 75 | + |
| 76 | + /** |
| 77 | + * Regex for the domain when Unicode is enabled. |
| 78 | + * |
| 79 | + * Assembled from {@see self::DOMAIN_LABEL_UNICODE}: one label, then zero or |
| 80 | + * more dot-prefixed labels. |
| 81 | + * |
| 82 | + * @since 7.0.0 |
| 83 | + * @var string |
| 84 | + */ |
| 85 | + const DOMAIN_UNICODE_REGEX = '/^' . self::DOMAIN_LABEL_UNICODE . '(?:\.' . self::DOMAIN_LABEL_UNICODE . ')*$/u'; |
| 86 | + |
| 87 | + /** |
| 88 | + * The local part of the email address (the portion before the '@'). |
| 89 | + * |
| 90 | + * @since 7.0.0 |
| 91 | + * @var string |
| 92 | + */ |
| 93 | + private $localpart; |
| 94 | + |
| 95 | + /** |
| 96 | + * The domain part of the email address (the portion after the '@'). |
| 97 | + * |
| 98 | + * @since 7.0.0 |
| 99 | + * @var string |
| 100 | + */ |
| 101 | + private $domain; |
| 102 | + |
| 103 | + /** |
| 104 | + * Private constructor. Use {@see WP_Email_Address::from_string()} to create instances. |
| 105 | + * |
| 106 | + * @since 7.0.0 |
| 107 | + * |
| 108 | + * @param string $localpart The local part of the email address. |
| 109 | + * @param string $domain The domain part of the email address. |
| 110 | + */ |
| 111 | + private function __construct( string $localpart, string $domain ) { |
| 112 | + $this->localpart = $localpart; |
| 113 | + $this->domain = $domain; |
| 114 | + } |
| 115 | + |
| 116 | + /** |
| 117 | + * Creates a WP_Email_Address from a string. |
| 118 | + * |
| 119 | + * This method is intended to accept all strings that are considered valid email |
| 120 | + * addresses by the WHATWG HTML specification for the email input type: |
| 121 | + * |
| 122 | + * https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email) |
| 123 | + * |
| 124 | + * and some additional addresses, while rejecting strings that |
| 125 | + * are more likely to be typos, mispastes, or attacks. This class |
| 126 | + * may reject a few address that are valid according to RFC 5322, |
| 127 | + * but it always accepts an address if it's valid according to |
| 128 | + * WHATWG. Put differently: If users can type an address into |
| 129 | + * the major browsers of 2026, this class accepts them, if |
| 130 | + * they can't (in 2026), this class may or may not. (Note that |
| 131 | + * "<iframe src=...>"@example.com is valid according to the RFC.) |
| 132 | + * |
| 133 | + * @since 7.0.0 |
| 134 | + * |
| 135 | + * @param string $input The email address string to parse. |
| 136 | + * @param bool $unicode Whether to allow Unicode characters in the address. |
| 137 | + * @return WP_Email_Address|false A WP_Email_Address instance, or false if the input is invalid. |
| 138 | + */ |
| 139 | + public static function from_string( string $input, bool $unicode ) { |
| 140 | + // There must be exactly one '@' sign. |
| 141 | + $at_pos = strpos( $input, '@' ); |
| 142 | + if ( false === $at_pos || strrpos( $input, '@' ) !== $at_pos ) { |
| 143 | + return false; |
| 144 | + } |
| 145 | + |
| 146 | + $localpart = substr( $input, 0, $at_pos ); |
| 147 | + $domain = substr( $input, $at_pos + 1 ); |
| 148 | + |
| 149 | + foreach ( explode( '.', $domain ) as $label ) { |
| 150 | + // DNS limits each label to 63 octets. |
| 151 | + if ( strlen( $label ) > 63 ) { |
| 152 | + return false; |
| 153 | + } |
| 154 | + } |
| 155 | + |
| 156 | + if ( $unicode && function_exists( 'idn_to_utf8' ) ) { |
| 157 | + // Validate each domain label, decode any punycode to UTF-8, and |
| 158 | + // reassemble the decoded labels into the local $domain variable. |
| 159 | + $decoded_labels = array(); |
| 160 | + foreach ( explode( '.', $domain ) as $label ) { |
| 161 | + // Decode punycode labels to their Unicode form for further validation. |
| 162 | + if ( str_starts_with( $label, 'xn--' ) ) { |
| 163 | + $label = idn_to_utf8( $label, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46 ); |
| 164 | + if ( false === $label ) { |
| 165 | + return false; |
| 166 | + } |
| 167 | + } |
| 168 | + // Reject labels with a reserved ACE-like prefix (two chars followed by '--'). |
| 169 | + if ( preg_match( '/^..--/u', $label ) ) { |
| 170 | + return false; |
| 171 | + } |
| 172 | + $decoded_labels[] = $label; |
| 173 | + } |
| 174 | + $domain = implode( '.', $decoded_labels ); |
| 175 | + } else { |
| 176 | + // Without Unicode support, reject any non-ASCII byte in either part. |
| 177 | + if ( preg_match( '/[\x80-\xff]/', $input ) ) { |
| 178 | + return false; |
| 179 | + } |
| 180 | + } |
| 181 | + |
| 182 | + // Both parts must be valid UTF-8, regardless of whether Unicode is requested. (A valid ASCII string is also valid UTF-8.) |
| 183 | + if ( ! wp_is_valid_utf8( $localpart ) || ! wp_is_valid_utf8( $domain ) ) { |
| 184 | + return false; |
| 185 | + } |
| 186 | + |
| 187 | + // Validate the local part against the allowed character set. |
| 188 | + if ( ! preg_match( $unicode ? self::LOCAL_PART_UNICODE_REGEX : self::LOCAL_PART_ASCII_REGEX, $localpart ) ) { |
| 189 | + /** This filter is documented in wp-includes/formatting.php */ |
| 190 | + if ( ! apply_filters( 'is_email', false, $input, 'local_invalid_chars' ) ) { |
| 191 | + return false; |
| 192 | + } |
| 193 | + } |
| 194 | + |
| 195 | + // The domain must contain at least one dot. |
| 196 | + if ( ! str_contains( $domain, '.' ) ) { |
| 197 | + /** This filter is documented in wp-includes/formatting.php */ |
| 198 | + if ( ! apply_filters( 'is_email', false, $input, 'domain_no_periods' ) ) { |
| 199 | + return false; |
| 200 | + } |
| 201 | + } |
| 202 | + |
| 203 | + // Validate the domain against the allowed structure. |
| 204 | + if ( ! preg_match( $unicode ? self::DOMAIN_UNICODE_REGEX : self::DOMAIN_ASCII_REGEX, $domain ) ) { |
| 205 | + return false; |
| 206 | + } |
| 207 | + |
| 208 | + return new self( $localpart, $domain ); |
| 209 | + } |
| 210 | + |
| 211 | + /** |
| 212 | + * Returns the local part of the email address (the portion before the '@'). |
| 213 | + * |
| 214 | + * @since 7.0.0 |
| 215 | + * |
| 216 | + * @return string The local part of the email address. |
| 217 | + */ |
| 218 | + public function get_localpart(): string { |
| 219 | + return $this->localpart; |
| 220 | + } |
| 221 | + |
| 222 | + /** |
| 223 | + * Returns the domain part of the email address (the portion after the '@'). |
| 224 | + * |
| 225 | + * @since 7.0.0 |
| 226 | + * |
| 227 | + * @return string The domain part of the email address. |
| 228 | + */ |
| 229 | + public function get_domain(): string { |
| 230 | + return $this->domain; |
| 231 | + } |
| 232 | + |
| 233 | + /** |
| 234 | + * Returns the complete email address as a string. |
| 235 | + * |
| 236 | + * The returned value can always be passed to {@see WP_Email_Address::from_string()} |
| 237 | + * and will produce an equivalent WP_Email_Address instance. |
| 238 | + * |
| 239 | + * @since 7.0.0 |
| 240 | + * |
| 241 | + * @return string The complete email address. |
| 242 | + */ |
| 243 | + public function get_address(): string { |
| 244 | + return $this->localpart . '@' . $this->domain; |
| 245 | + } |
| 246 | +} |
0 commit comments