Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
2c9a17d
Fix group -> ticket test annotation
sirreal Dec 10, 2025
da53752
Add failing test for deep nesting
sirreal Dec 10, 2025
0337b13
Handle Exception thrown by bookmark_token()
sirreal Dec 10, 2025
4101235
Add failing test for virtual tokens
sirreal Dec 10, 2025
e7d649c
Handle insert_virtual_node exceptions
sirreal Dec 10, 2025
1cfeb85
Fix php 7.x compat
sirreal Dec 10, 2025
17bbca8
Update test ticket annotation
sirreal Dec 10, 2025
ba480a5
Increase virtual token HTML size
sirreal Dec 10, 2025
ff7657f
Use single-line comment
sirreal Dec 12, 2025
b02d679
Merge branch 'trunk' into html-api/ensure-deep-nesting-no-exception
sirreal Jan 29, 2026
9e042f3
Return false or throw unsupported exception where available
sirreal Jan 29, 2026
c7710aa
Remove general exception catching
sirreal Jan 29, 2026
1681fed
Avoid throwing unsupported exception when tokens exceeded
sirreal Jan 29, 2026
3f6bffd
Merge branch 'trunk' into html-api/ensure-deep-nesting-no-exception
sirreal Jan 29, 2026
aefc5bf
Remove unsupported union return type
sirreal Jan 29, 2026
6d1f860
Use arrow functions ✨
sirreal Jan 30, 2026
207f04f
Merge branch 'trunk' into html-api/ensure-deep-nesting-no-exception
sirreal Jan 30, 2026
cbab9c1
Add assertion message and use class constant
sirreal Jan 30, 2026
c578125
Merge branch 'trunk' into html-api/ensure-deep-nesting-no-exception
sirreal Feb 2, 2026
5e45206
Merge branch 'trunk' into html-api/ensure-deep-nesting-no-exception
sirreal Feb 26, 2026
c4476be
Restore exception-catching approach
sirreal Feb 26, 2026
b847559
Improve tests
sirreal Feb 26, 2026
aa06e5e
Merge branch 'trunk' into html-api/ensure-deep-nesting-no-exception
sirreal Feb 26, 2026
547d37b
Lints
sirreal Feb 26, 2026
b6df5f7
Merge branch 'trunk' into html-api/ensure-deep-nesting-no-exception
sirreal Feb 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1042,8 +1042,17 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
$token_name = $this->get_token_name();

if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) {
try {
$bookmark_name = $this->bookmark_token();
} catch ( Exception $e ) {
if ( self::ERROR_EXCEEDED_MAX_BOOKMARKS === $this->last_error ) {
return false;
}
throw $e;
}

$this->state->current_token = new WP_HTML_Token(
$this->bookmark_token(),
$bookmark_name,
$token_name,
$this->has_self_closing_flag(),
$this->release_internal_bookmark_on_destruct
Expand Down Expand Up @@ -1153,6 +1162,12 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
* otherwise might involve messier calling and return conventions.
*/
return false;
} catch ( Exception $e ) {
if ( self::ERROR_EXCEEDED_MAX_BOOKMARKS === $this->last_error ) {
return false;
}
// Rethrow any other exceptions for higher-level handling.
throw $e;
}
}

Expand Down Expand Up @@ -6315,6 +6330,8 @@ private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to
*
* @since 6.7.0
*
* @throws Exception When unable to allocate a bookmark for the next token in the input HTML document.
*
* @param string $token_name Name of token to create and insert into the stack of open elements.
* @param string|null $bookmark_name Optional. Name to give bookmark for created virtual node.
* Defaults to auto-creating a bookmark name.
Expand Down
104 changes: 103 additions & 1 deletion tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1068,7 +1068,7 @@ public function test_ensure_next_token_method_extensibility( $html, $expected_to
/**
* Ensure that lowercased tag_name query matches tags case-insensitively.
*
* @group 62427
* @ticket 62427
*/
public function test_next_tag_lowercase_tag_name() {
// The upper case <DIV> is irrelevant but illustrates the case-insentivity.
Expand All @@ -1079,4 +1079,106 @@ public function test_next_tag_lowercase_tag_name() {
$processor = WP_HTML_Processor::create_fragment( '<svg><RECT>' );
$this->assertTrue( $processor->next_tag( array( 'tag_name' => 'rect' ) ) );
}

/**
* Ensure that the processor does not throw errors in cases of extreme HTML nesting.
*
* @ticket 64394
*
* @expectedIncorrectUsage WP_HTML_Tag_Processor::set_bookmark
*/
public function test_deep_nesting_fails_process_without_error() {
$html = str_repeat( '<i>', WP_HTML_Processor::MAX_BOOKMARKS * 2 );
$processor = WP_HTML_Processor::create_fragment( $html );

while ( $processor->next_token() ) {
// Process tokens.
}

$this->assertSame(
WP_HTML_Processor::ERROR_EXCEEDED_MAX_BOOKMARKS,
$processor->get_last_error(),
'Failed to report exceeded-max-bookmarks error.'
);
}

/**
* @ticket 64394
*
* @expectedIncorrectUsage WP_HTML_Tag_Processor::set_bookmark
*/
public function test_deep_nesting_fails_processing_virtual_tokens_without_error() {
/*
* This test has some variability depending on how the virtual tokens align.
* In order to ensure that bookmarks are exhausted on a virtual token
* without throwing an error, 3 documents are parsed with different "offsets"
* to ensure that the bookmarks are exhaused on a virtual token in at least one of the runs.
*
* "<table><td><table><td>…" produces:
* └─TABLE (real)
* └─TBODY (virtual)
* └─TR (virtual)
* └─TD (real)
* └─TABLE (real)
* └─TBODY (virtual)
* └─TR (virtual)
* └─TD (real)
* └─…
*/
$html_table_td = str_repeat( '<table><td>', WP_HTML_Processor::MAX_BOOKMARKS * 2 );

// Offset 0
$processor = WP_HTML_Processor::create_fragment( $html_table_td );
while ( $processor->next_token() ) {
// Process tokens.
}
$this->assertSame(
WP_HTML_Processor::ERROR_EXCEEDED_MAX_BOOKMARKS,
$processor->get_last_error(),
'Failed to report exceeded-max-bookmarks error.'
);

// Offset 1
$processor = WP_HTML_Processor::create_fragment( "<div>{$html_table_td}" );
while ( $processor->next_token() ) {
// Process tokens.
}
$this->assertSame(
WP_HTML_Processor::ERROR_EXCEEDED_MAX_BOOKMARKS,
$processor->get_last_error(),
'Failed to report exceeded-max-bookmarks error.'
);

// Offset 2
$processor = WP_HTML_Processor::create_fragment( "<div><div>{$html_table_td}" );
while ( $processor->next_token() ) {
// Process tokens.
}
$this->assertSame(
WP_HTML_Processor::ERROR_EXCEEDED_MAX_BOOKMARKS,
$processor->get_last_error(),
'Failed to report exceeded-max-bookmarks error.'
);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this test confuses me.

Not sure if you noticed, but we should have $this->assertEqualsWithDelta( MAX_BOOKMARKS, $reached_tokens, 1 ); available to say "within 1 of the max"

but why the variability? and what does “depending on how the tokens align” mean? it feels flakey or not-fully-understood to me, and I get nervous adding an assertion of that in case we’re testing a happenstance detail and not a contract.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is very tricky, and imperfect as it is. I want to confirm that these don't throw errors during processing, which is the big thing I want to address in this PR.

It would be fine to change this to assert that no assertions are thrown.

This test in particular was difficult because the virtual tokens had another failure path. Consider:

<table><td><table><td> produces:

└─TABLE (real)
  └─TBODY (virtual)
    └─TR (virtual)
      └─TD (real)
        └─TABLE (real)
          └─TBODY (virtual)
            └─TR (virtual)
              └─TD (real)

To make this test more robust, it would probably need to process 3 times:

  • <table><td>…
  • <div><table><td>…
  • <div><div><table><td>…

Since there are pairs of real tokens TD (real) + TABLE (real), running with 0, 1, 2 offsets should ensure that the virtual token lands on the threshold at least once.

}

/**
* @ticket 64394
*
* @expectedIncorrectUsage WP_HTML_Tag_Processor::set_bookmark
*/
public function test_prevents_unbounded_bookmarking() {
$processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><html>' );
$processor->next_tag();

// This might fail before the MAX_BOOKMARK limit, which is okay.
foreach ( range( 0, WP_HTML_Processor::MAX_BOOKMARKS ) as $n ) {
if ( ! $processor->set_bookmark( "{$n}" ) ) {
break;
}
}

$this->assertFalse(
$processor->set_bookmark( 'beyond the limit' )
);
}
}
Loading