-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HTML API: Ensure bookmark exaustion does not error #10616
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 21 commits
2c9a17d
da53752
0337b13
4101235
e7d649c
1cfeb85
17bbca8
ba480a5
ff7657f
b02d679
9e042f3
c7710aa
1681fed
3f6bffd
aefc5bf
6d1f860
207f04f
cbab9c1
c578125
5e45206
c4476be
b847559
aa06e5e
547d37b
b6df5f7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1042,8 +1042,17 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool { | |
| $token_name = $this->get_token_name(); | ||
|
|
||
| if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) { | ||
| try { | ||
| $bookmark_name = $this->bookmark_token(); | ||
| } catch ( Exception $e ) { | ||
| if ( self::ERROR_EXCEEDED_MAX_BOOKMARKS === $this->last_error ) { | ||
| return false; | ||
|
sirreal marked this conversation as resolved.
|
||
| } | ||
| throw $e; | ||
| } | ||
|
|
||
| $this->state->current_token = new WP_HTML_Token( | ||
| $this->bookmark_token(), | ||
| $bookmark_name, | ||
| $token_name, | ||
| $this->has_self_closing_flag(), | ||
| $this->release_internal_bookmark_on_destruct | ||
|
|
@@ -1153,6 +1162,12 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool { | |
| * otherwise might involve messier calling and return conventions. | ||
| */ | ||
| return false; | ||
| } catch ( Exception $e ) { | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exhausted bookmarks throw a generic This block catches the exceptions thrown by |
||
| if ( self::ERROR_EXCEEDED_MAX_BOOKMARKS === $this->last_error ) { | ||
| return false; | ||
| } | ||
| // Rethrow any other exceptions for higher-level handling. | ||
| throw $e; | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -6315,6 +6330,8 @@ private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to | |
| * | ||
| * @since 6.7.0 | ||
| * | ||
| * @throws Exception When unable to allocate a bookmark for the next token in the input HTML document. | ||
| * | ||
|
Comment on lines
+6333
to
+6334
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| * @param string $token_name Name of token to create and insert into the stack of open elements. | ||
| * @param string|null $bookmark_name Optional. Name to give bookmark for created virtual node. | ||
| * Defaults to auto-creating a bookmark name. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1068,7 +1068,7 @@ public function test_ensure_next_token_method_extensibility( $html, $expected_to | |
| /** | ||
| * Ensure that lowercased tag_name query matches tags case-insensitively. | ||
| * | ||
| * @group 62427 | ||
| * @ticket 62427 | ||
| */ | ||
| public function test_next_tag_lowercase_tag_name() { | ||
| // The upper case <DIV> is irrelevant but illustrates the case-insentivity. | ||
|
|
@@ -1079,4 +1079,67 @@ public function test_next_tag_lowercase_tag_name() { | |
| $processor = WP_HTML_Processor::create_fragment( '<svg><RECT>' ); | ||
| $this->assertTrue( $processor->next_tag( array( 'tag_name' => 'rect' ) ) ); | ||
| } | ||
|
|
||
| /** | ||
| * Ensure that the processor does not throw errors in cases of extreme HTML nesting. | ||
| * | ||
| * @ticket 64394 | ||
| * | ||
| * @expectedIncorrectUsage WP_HTML_Tag_Processor::set_bookmark | ||
| */ | ||
| public function test_deep_nesting_fails_process_without_error() { | ||
| $html = str_repeat( '<i>', WP_HTML_Processor::MAX_BOOKMARKS * 2 ); | ||
| $processor = WP_HTML_Processor::create_fragment( $html ); | ||
|
|
||
| // The fragment parser starts with a few context tokens already bookmarked. | ||
| $reached_tokens = ( fn() => count( $this->bookmarks ) )->call( $processor ); | ||
| while ( $processor->next_token() ) { | ||
| ++$reached_tokens; | ||
| } | ||
| $this->assertSame( WP_HTML_Processor::MAX_BOOKMARKS, $reached_tokens ); | ||
| $this->assertSame( | ||
| WP_HTML_Processor::ERROR_EXCEEDED_MAX_BOOKMARKS, | ||
| $processor->get_last_error(), | ||
| 'Failed to report exceeded-max-bookmarks error.' | ||
| ); | ||
| } | ||
|
|
||
| /** | ||
| * @ticket 64394 | ||
| * | ||
| * @expectedIncorrectUsage WP_HTML_Tag_Processor::set_bookmark | ||
| */ | ||
| public function test_deep_nesting_fails_processing_virtual_tokens_without_error() { | ||
| $html = str_repeat( '<table><td>', WP_HTML_Processor::MAX_BOOKMARKS * 2 ); | ||
| $processor = WP_HTML_Processor::create_fragment( $html ); | ||
|
|
||
| // The fragment parser starts with a few context tokens already bookmarked. | ||
| $reached_tokens = ( fn() => count( $this->bookmarks ) )->call( $processor ); | ||
| while ( $processor->next_token() ) { | ||
| ++$reached_tokens; | ||
| } | ||
|
|
||
| /* | ||
| * This test has some variability depending on how the virtual tokens align. | ||
| * It will produce 1 real, 2 virtual, 1 real. | ||
| * | ||
| * "<table><td><table><td>…" produces: | ||
| * └─TABLE | ||
| * └─TBODY (virtual) | ||
| * └─TR (virtual) | ||
| * └─TD | ||
| * └─TABLE | ||
| * └─TBODY (virtual) | ||
| * └─TR (virtual) | ||
| * └─TD | ||
| * └─… | ||
| */ | ||
| $this->assertGreaterThanOrEqual( WP_HTML_Processor::MAX_BOOKMARKS - 1, $reached_tokens ); | ||
| $this->assertLessThanOrEqual( WP_HTML_Processor::MAX_BOOKMARKS + 1, $reached_tokens ); | ||
| $this->assertSame( | ||
| WP_HTML_Processor::ERROR_EXCEEDED_MAX_BOOKMARKS, | ||
| $processor->get_last_error(), | ||
| 'Failed to report exceeded-max-bookmarks error.' | ||
| ); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this test confuses me. Not sure if you noticed, but we should have but why the variability? and what does “depending on how the tokens align” mean? it feels flakey or not-fully-understood to me, and I get nervous adding an assertion of that in case we’re testing a happenstance detail and not a contract.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is very tricky, and imperfect as it is. I want to confirm that these don't throw errors during processing, which is the big thing I want to address in this PR. It would be fine to change this to assert that no assertions are thrown. This test in particular was difficult because the virtual tokens had another failure path. Consider:
To make this test more robust, it would probably need to process 3 times:
Since there are pairs of real tokens |
||
| } | ||
|
sirreal marked this conversation as resolved.
|
||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.