@@ -71,7 +71,15 @@ class WP_On_This_Day {
7171 * @since 7.1.0
7272 * @var int
7373 */
74- const CACHE_VERSION = 8 ;
74+ const CACHE_VERSION = 10 ;
75+
76+ /**
77+ * Approximate maximum number of characters shown in each post excerpt.
78+ *
79+ * @since 7.1.0
80+ * @var int
81+ */
82+ const EXCERPT_CHAR_COUNT = 160 ;
7583
7684 /**
7785 * Registers the dashboard widget and its supporting hooks and assets.
@@ -258,6 +266,7 @@ public static function render_window_updated_notice() {
258266
259267 wp_admin_notice (
260268 sprintf (
269+ /* translators: %d: Number of days. */
261270 _n (
262271 'On This Day duration updated to %d day. ' ,
263272 'On This Day duration updated to %d days. ' ,
@@ -334,6 +343,60 @@ protected static function clamp_window_days( $window_days ) {
334343 );
335344 }
336345
346+ /**
347+ * Extracts a plain-text excerpt from HTML source using the HTML API.
348+ *
349+ * Walks the input as HTML5 tokens, collecting the contents of `#text`
350+ * nodes only, so script, style, and comment contents are skipped by
351+ * construction rather than via regex stripping. A space is emitted on
352+ * every tag boundary to keep word boundaries between adjacent block
353+ * elements (e.g. `<p>One</p><p>Two</p>` -> "One Two").
354+ *
355+ * Length is measured in Unicode characters via `mb_strlen()`, which
356+ * is more language-fair than word counting (CJK languages do not
357+ * separate words with whitespace).
358+ *
359+ * @since 7.1.0
360+ *
361+ * @param string $source HTML source to extract text from.
362+ * @param int $max_chars Approximate character limit before truncation.
363+ * @return string Plain-text excerpt.
364+ */
365+ protected static function extract_excerpt_text ( $ source , $ max_chars ) {
366+ $ source = strip_shortcodes ( (string ) $ source );
367+
368+ if ( '' === trim ( $ source ) ) {
369+ return '' ;
370+ }
371+
372+ $ processor = new WP_HTML_Tag_Processor ( $ source );
373+ $ parts = array ();
374+ $ length = 0 ;
375+
376+ while ( $ processor ->next_token () ) {
377+ $ token_type = $ processor ->get_token_type ();
378+
379+ if ( '#tag ' === $ token_type ) {
380+ $ parts [] = ' ' ;
381+ continue ;
382+ }
383+
384+ if ( '#text ' !== $ token_type ) {
385+ continue ;
386+ }
387+
388+ $ chunk = $ processor ->get_modifiable_text ();
389+ $ parts [] = $ chunk ;
390+ $ length += mb_strlen ( $ chunk );
391+
392+ if ( $ length >= $ max_chars ) {
393+ break ;
394+ }
395+ }
396+ $ separator = _wp_can_use_pcre_u () ? '~\p{Z}+~u ' : '~\s+~ ' ;
397+ return trim ( preg_replace ( $ separator , ' ' , implode ( '' , $ parts ) ) );
398+ }
399+
337400 /**
338401 * Builds date query clauses for each day in the active window.
339402 *
@@ -500,10 +563,10 @@ protected static function render_post( $post, $window_days ) {
500563 $ title = __ ( '(no title) ' );
501564 }
502565
503- $ excerpt = has_excerpt ( $ post ) ? $ post -> post_excerpt : $ post -> post_content ;
504- $ excerpt = wp_strip_all_tags ( strip_shortcodes ( $ excerpt ) );
505- $ excerpt = preg_replace ( ' /\s+/ ' , ' ' , $ excerpt );
506- $ excerpt = wp_trim_words ( trim ( $ excerpt ), 24 , ' … ' );
566+ $ excerpt = self :: extract_excerpt_text (
567+ has_excerpt ( $ post ) ? $ post -> post_excerpt : $ post -> post_content ,
568+ self :: EXCERPT_CHAR_COUNT
569+ );
507570
508571 $ date_str = get_the_date ( 'F j ' , $ post );
509572 $ time_str = get_the_time ( get_option ( 'time_format ' ), $ post );
0 commit comments