Skip to content

Commit 38c29c8

Browse files
committed
HTML API: Simplify breadcrumb accounting.
Since the HTML Processor started visiting all nodes in a document, both real and virtual, the breadcrumb accounting became a bit complicated and it's not entirely clear that it is fully reliable. In this patch the breadcrumbs are rebuilt separately from the stack of open elements in order to eliminate the problem of the stateful stack interactions and the post-hoc event queue. Breadcrumbs are greatly simplified as a result, and more verifiably correct, in this construction.
1 parent f90c8bf commit 38c29c8

2 files changed

Lines changed: 61 additions & 76 deletions

File tree

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 51 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
211211
*/
212212
private $element_queue = array();
213213

214+
/**
215+
* Stores the current breadcrumbs.
216+
*
217+
* @since 6.7.0
218+
*
219+
* @var string[]
220+
*/
221+
private $breadcrumbs = array();
222+
214223
/**
215224
* Current stack event, if set, representing a matched token.
216225
*
@@ -310,8 +319,8 @@ public static function create_fragment( $html, $context = '<body>', $encoding =
310319
false
311320
);
312321

313-
$processor->state->stack_of_open_elements->push( $context_node );
314322
$processor->context_node = $context_node;
323+
$processor->breadcrumbs = array( 'HTML', $context_node->node_name );
315324

316325
return $processor;
317326
}
@@ -523,44 +532,46 @@ public function next_token() {
523532
return false;
524533
}
525534

526-
if ( 'done' !== $this->has_seen_context_node && 0 === count( $this->element_queue ) && ! $this->step() ) {
527-
while ( 'context-node' !== $this->state->stack_of_open_elements->current_node()->bookmark_name && $this->state->stack_of_open_elements->pop() ) {
528-
continue;
529-
}
530-
$this->has_seen_context_node = 'done';
531-
return $this->next_token();
535+
/*
536+
* Prime the events if there are none.
537+
*
538+
* @todo In some cases, probably related to the adoption agency
539+
* algorithm, this call to step() doesn't create any new
540+
* events. Calling it again creates them. Figure out why
541+
* this is and if it's inherent or if it's a bug. Looping
542+
* until there are events or until there are no more
543+
* tokens works in the meantime and isn't obviously wrong.
544+
*/
545+
while ( empty( $this->element_queue ) && $this->step() ) {
546+
continue;
532547
}
533548

549+
// Process the next event on the queue.
534550
$this->current_element = array_shift( $this->element_queue );
535-
while ( isset( $this->context_node ) && ! $this->has_seen_context_node ) {
536-
if ( isset( $this->current_element ) ) {
537-
if ( $this->context_node === $this->current_element->token && WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) {
538-
$this->has_seen_context_node = true;
539-
return $this->next_token();
540-
}
541-
}
542-
$this->current_element = array_shift( $this->element_queue );
551+
if ( ! isset( $this->current_element ) ) {
552+
return false;
543553
}
544554

545-
if ( ! isset( $this->current_element ) ) {
546-
if ( 'done' === $this->has_seen_context_node ) {
547-
return false;
548-
} else {
549-
return $this->next_token();
550-
}
555+
$is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation;
556+
557+
/*
558+
* The root node only exists in the fragment parser, and closing it
559+
* indicates that the parse is complete. Stop before popping if from
560+
* the breadcrumbs.
561+
*/
562+
if ( 'root-node' === $this->current_element->token->bookmark_name ) {
563+
return ! $is_pop && $this->next_token();
551564
}
552565

553-
if ( isset( $this->context_node ) && WP_HTML_Stack_Event::POP === $this->current_element->operation && $this->context_node === $this->current_element->token ) {
554-
$this->element_queue = array();
555-
$this->current_element = null;
556-
return false;
566+
// Adjust the breadcrumbs for this event.
567+
if ( $is_pop ) {
568+
array_pop( $this->breadcrumbs );
569+
} else {
570+
$this->breadcrumbs[] = $this->current_element->token->node_name;
557571
}
558572

559573
// Avoid sending close events for elements which don't expect a closing.
560-
if (
561-
WP_HTML_Stack_Event::POP === $this->current_element->operation &&
562-
! static::expects_closer( $this->current_element->token )
563-
) {
574+
if ( $is_pop && ! static::expects_closer( $this->current_element->token ) ) {
564575
return $this->next_token();
565576
}
566577

@@ -643,10 +654,11 @@ public function matches_breadcrumbs( $breadcrumbs ) {
643654
return false;
644655
}
645656

646-
foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
657+
for ( $i = count( $this->breadcrumbs ) - 1; $i >= 0; $i-- ) {
658+
$node = $this->breadcrumbs[ $i ];
647659
$crumb = strtoupper( current( $breadcrumbs ) );
648660

649-
if ( '*' !== $crumb && $node->node_name !== $crumb ) {
661+
if ( '*' !== $crumb && $node !== $crumb ) {
650662
return false;
651663
}
652664

@@ -862,46 +874,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
862874
* @return string[]|null Array of tag names representing path to matched node, if matched, otherwise NULL.
863875
*/
864876
public function get_breadcrumbs() {
865-
$breadcrumbs = array();
866-
867-
foreach ( $this->state->stack_of_open_elements->walk_down() as $stack_item ) {
868-
$breadcrumbs[] = $stack_item->node_name;
869-
}
870-
871-
if ( ! $this->is_virtual() ) {
872-
return $breadcrumbs;
873-
}
874-
875-
foreach ( $this->element_queue as $queue_item ) {
876-
if ( $this->current_element->token->bookmark_name === $queue_item->token->bookmark_name ) {
877-
break;
878-
}
879-
880-
if ( 'context-node' === $queue_item->token->bookmark_name ) {
881-
break;
882-
}
883-
884-
if ( 'real' === $queue_item->provenance ) {
885-
break;
886-
}
887-
888-
if ( WP_HTML_Stack_Event::PUSH === $queue_item->operation ) {
889-
$breadcrumbs[] = $queue_item->token->node_name;
890-
} else {
891-
array_pop( $breadcrumbs );
892-
}
893-
}
894-
895-
if ( null !== parent::get_token_name() && ! parent::is_tag_closer() ) {
896-
array_pop( $breadcrumbs );
897-
}
898-
899-
// Add the virtual node we're at.
900-
if ( WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) {
901-
$breadcrumbs[] = $this->current_element->token->node_name;
902-
}
903-
904-
return $breadcrumbs;
877+
return $this->breadcrumbs;
905878
}
906879

907880
/**
@@ -930,9 +903,7 @@ public function get_breadcrumbs() {
930903
* @return int Nesting-depth of current location in the document.
931904
*/
932905
public function get_current_depth() {
933-
return $this->is_virtual()
934-
? count( $this->get_breadcrumbs() )
935-
: $this->state->stack_of_open_elements->count();
906+
return count( $this->breadcrumbs );
936907
}
937908

938909
/**
@@ -2552,7 +2523,6 @@ public function seek( $bookmark_name ) {
25522523
? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start
25532524
: 0;
25542525
$bookmark_starts_at = $this->bookmarks[ $actual_bookmark_name ]->start;
2555-
$bookmark_length = $this->bookmarks[ $actual_bookmark_name ]->length;
25562526
$direction = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward';
25572527

25582528
/*
@@ -2610,6 +2580,12 @@ public function seek( $bookmark_name ) {
26102580
$this->state->frameset_ok = true;
26112581
$this->element_queue = array();
26122582
$this->current_element = null;
2583+
2584+
if ( isset( $this->context_node ) ) {
2585+
$this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 );
2586+
} else {
2587+
$this->breadcrumbs = array();
2588+
}
26132589
}
26142590

26152591
// When moving forwards, reparse the document until reaching the same location as the original bookmark.

tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,16 @@ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element
387387
$this->assertSame( 'CODE', $processor->get_tag(), "Expected to start test on CODE element but found {$processor->get_tag()} instead." );
388388
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN', 'CODE' ), $processor->get_breadcrumbs(), 'Failed to produce expected DOM nesting.' );
389389

390-
$this->assertTrue( $processor->next_token(), 'Failed to advance past CODE tag to expected SPAN closer.' );
390+
$this->assertTrue(
391+
$processor->next_tag(
392+
array(
393+
'tag_name' => 'SPAN',
394+
'tag_closers' => 'visit',
395+
)
396+
),
397+
'Failed to advance past CODE tag to expected SPAN closer.'
398+
);
399+
$this->assertSame( 'SPAN', $processor->get_tag() );
391400
$this->assertTrue( $processor->is_tag_closer(), 'Expected to find closing SPAN, but found opener instead.' );
392401
$this->assertSame( array( 'HTML', 'BODY', 'DIV' ), $processor->get_breadcrumbs(), 'Failed to advance past CODE tag to expected DIV opener.' );
393402

0 commit comments

Comments
 (0)