Skip to content
Closed
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -3807,7 +3807,14 @@ public function set_modifiable_text( string $plaintext_content ): bool {
return true;
}

if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
/*
* The rest of this function handles modifiable text for special "atomic" HTML elements.
* Only tags in the HTML namespace should be processed.
*/
if (
self::STATE_MATCHED_TAG !== $this->parser_state ||
'html' !== $this->get_namespace()
Comment thread
sirreal marked this conversation as resolved.
) {
return false;
}

Expand Down
61 changes: 58 additions & 3 deletions tests/phpunit/tests/html-api/wpHtmlProcessorModifiableText.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Tests_HtmlApi_WpHtmlProcessorModifiableText extends WP_UnitTestCase {
* @param string $set_text Text to set.
* @param string $expected_html Expected HTML output.
*/
public function test_modifiable_text_special_textarea( string $set_text, string $expected_html ) {
public function test_modifiable_text_special_textarea( string $set_text, string $expected_html ): void {
$processor = WP_HTML_Processor::create_fragment( '<textarea></textarea>' );
$processor->next_token();
$processor->set_modifiable_text( $set_text );
Expand All @@ -50,9 +50,9 @@ public function test_modifiable_text_special_textarea( string $set_text, string
/**
* Data provider.
*
* @return array[]
* @return array<string, array{0: string, 1: string}>
*/
public static function data_modifiable_text_special_textarea() {
public static function data_modifiable_text_special_textarea(): array {
return array(
'Leading newline' => array(
"\nAFTER NEWLINE",
Expand All @@ -68,4 +68,59 @@ public static function data_modifiable_text_special_textarea() {
),
);
}

/**
* Ensures that `set_modifiable_text()` returns false for elements that are not special "atomic" elements.
*
* This includes atomic-like foreign elements (`<svg><textarea>`) as well as arbitrary HTML
* elements (`<div>`).
*
* @ticket 64751
* @dataProvider data_set_modifiable_fails_non_atomic_tags
*/
public function test_set_modifiable_fails_non_atomic_tags(
string $html,
string $target_tag
): void {
$processor = WP_HTML_Processor::create_fragment( $html );
Comment thread
sirreal marked this conversation as resolved.
$this->assertInstanceOf( WP_HTML_Tag_Processor::class, $processor );
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’ve been trying to leave helpful notes to people for cases like this where the failure should be unrelated to the unit under test. This is not meant to assert behaviors about creating Tag Processor.

$this->assertInstanceOf( …, $processor, 'Failed to initialize HTML Processor: check test setup.' );

on that note, why are we asserting that the HTML Processor is a Tag Processor?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In a few places I've used assert() to differentiate between test invariants and the unit under test. I don't think that's a common pattern but I do think it has some merits.

I've updated the test to be a non-null assertion with an descriptive message.

$this->assertTrue( $processor->next_tag( $target_tag ), 'Failed to find target tag.' );
$this->assertFalse(
$processor->set_modifiable_text( 'test' ),
"set_modifiable_text() should return false on {$processor->get_namespace()}:{$processor->get_qualified_tag_name()}."
);
$this->assertSame(
$html,
$processor->get_updated_html(),
'HTML should be unchanged after rejected set_modifiable_text().'
);
}

/**
* Data provider.
Comment thread
sirreal marked this conversation as resolved.
*
* @return array<string, array{0: string, 1: string, 2: string}>
*/
public static function data_set_modifiable_fails_non_atomic_tags(): array {
return array(
// Plain HTML tags.
'html DIV' => array( '<div>', 'DIV' ),

// Foreign elements with non-atomic tags.
'svg PATH' => array( '<svg><path></path></svg>', 'PATH' ),
'svg PATH (self-closing)' => array( '<svg><path /></svg>', 'PATH' ),
'math MTEXT' => array( '<math><mtext></mtext></math>', 'MTEXT' ),
'math MSPACE (self-closing)' => array( '<math><mspace /></math>', 'MSPACE' ),

// Foreign elements with atomic-like tags.
'svg TEXTAREA' => array( '<svg><textarea></textarea></svg>', 'TEXTAREA' ),
'svg TITLE' => array( '<svg><title></title></svg>', 'TITLE' ),
'svg STYLE' => array( '<svg><style></style></svg>', 'STYLE' ),
'svg SCRIPT' => array( '<svg><script></script></svg>', 'SCRIPT' ),
'math TEXTAREA' => array( '<math><textarea></textarea></math>', 'TEXTAREA' ),
'math TITLE' => array( '<math><title></title></math>', 'TITLE' ),
'math STYLE' => array( '<math><style></style></math>', 'STYLE' ),
'math SCRIPT' => array( '<math><script></script></math>', 'SCRIPT' ),
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ public function test_json_auto_escaping() {
*
* @ticket 64609
*/
public function test_modifiable_text_special_textarea() {
public function test_modifiable_text_special_textarea(): void {
$processor = new WP_HTML_Tag_Processor( '<textarea></textarea>' );
$processor->next_token();
$processor->set_modifiable_text( "\nAFTER NEWLINE" );
Expand All @@ -664,4 +664,60 @@ public function test_modifiable_text_special_textarea() {
'Should have preserved the leading newline in the content.'
);
}

/**
* Ensures that `set_modifiable_text()` returns false for elements that are
* not special "atomic" elements.
*
* This includes atomic-like foreign elements as well as non-atomic foreign elements.
*
* @ticket 64751
* @dataProvider data_set_modifiable_fails_non_atomic_tags
*/
public function test_set_modifiable_fails_non_atomic_tags(
string $html,
string $parsing_namespace,
string $target_tag
): void {
$processor = new WP_HTML_Tag_Processor( $html );
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is ultimately a test of the Tag Processor and its behavior, no? as in, a failure here is a bug, but a failure in the HTML Processor is a bug in detecting namespace changes.

Does it make sense to run the test in both places? If so, wouldn’t it be quite easy for the tests to diverge given that they are in separate files?

If we want to ensure that the behavior is locked, why not run both classes from this same test runner? Since we have tests already asserting that the HTML Processor detects the namespace changes, we should only need to test the modifiable text behavior here, and therefore we should be fine reading the namespace from the HTML Processor rather than supplying it.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what you have in mind here, perhaps you could clarify. I'm open to changing or moving things, there is some duplication between these tests.


I will elaborate that the HTML Processor tests have added value. Subtle issues can appear in the interaction between the "smart" HTML processor and the "syntax-only" tag processor.

For example, this change appears equivalent, but it would make the HTML processor tests fail, detecting an issue with svg:TITLE elements:

diff --git c/src/wp-includes/html-api/class-wp-html-tag-processor.php i/src/wp-includes/html-api/class-wp-html-tag-processor.php
index d544f0f49b..cba78fdb51 100644
--- c/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ i/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -3813,7 +3813,7 @@ class WP_HTML_Tag_Processor {
 		 */
 		if (
 			self::STATE_MATCHED_TAG !== $this->parser_state ||
-			'html' !== $this->get_namespace()
+			'html' !== $this->parsing_namespace
 		) {
 			return false;
 		}

That's because svg:title is an HTML integration point. When the svg:title element is put on the stack of open elements, the parsing namespace changes to HTML (this is correct, its children will be HTML elements), but the element's namespace is svg. It's important to use $this->get_namespace() to detect the current namespace and not the namespace to be used for ongoing parsing.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, but wouldn’t or shouldn’t this test case be caught by those tests covering the integration point logic, and not slipped under the rug in a seemingly unrelated test?

on the contrary, if we introduced this bug, I would want that to cause a failure in detecting the svg:TITLE namespace, not in some set_modifiable_text() assertion which is testing html:TITLE

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bug in the diff I shared would be in the implementation of the ::set_modifiable_text() function. I didn't introduce that bug, but could have. I had to consider carefully which was correct and whether there was a difference and I'm intimately familiar with this code.

I'm not sure how else that particular issue could be detected. It's not a problem in any of the other systems. It would be a bug with ::set_modifiable_text(). It would be easy to introduce by assuming that a property check can be trivially swapped for a getter as a micro-optimization. It can't, that would be a mistake.

$processor->change_parsing_namespace( $parsing_namespace );
$this->assertTrue( $processor->next_tag( $target_tag ), 'Failed to find target tag.' );
$this->assertFalse(
$processor->set_modifiable_text( 'test' ),
"set_modifiable_text() should return false for {$parsing_namespace}:{$target_tag}."
);
$this->assertSame(
$html,
$processor->get_updated_html(),
'HTML should be unchanged after rejected set_modifiable_text().'
);
}

/**
* Data provider.
*
* @return array<string, array{0: string, 1: string, 2: string}>
*/
public static function data_set_modifiable_fails_non_atomic_tags(): array {
return array(
// Plain HTML tags.
'html DIV' => array( '<div>', 'html', 'DIV' ),

// Foreign elements with non-atomic tags.
'svg PATH' => array( '<path></path>', 'svg', 'PATH' ),
'svg PATH (self-closing)' => array( '<path />', 'svg', 'PATH' ),
'math MTEXT' => array( '<mtext></mtext>', 'math', 'MTEXT' ),
'math MSPACE (self-closing)' => array( '<mspace />', 'math', 'MSPACE' ),

// Foreign elements with atomic-like tags.
'svg TEXTAREA' => array( '<textarea></textarea>', 'svg', 'TEXTAREA' ),
'svg TITLE' => array( '<title></title>', 'svg', 'TITLE' ),
'svg STYLE' => array( '<style></style>', 'svg', 'STYLE' ),
'svg SCRIPT' => array( '<script></script>', 'svg', 'SCRIPT' ),
'math TEXTAREA' => array( '<textarea></textarea>', 'math', 'TEXTAREA' ),
'math TITLE' => array( '<title></title>', 'math', 'TITLE' ),
'math STYLE' => array( '<style></style>', 'math', 'STYLE' ),
'math SCRIPT' => array( '<script></script>', 'math', 'SCRIPT' ),
);
}
}
Loading