diff --git a/tests/phpunit/tests/html-api/wpHtmlDecoder.php b/tests/phpunit/tests/html-api/wpHtmlDecoder.php
index 82d6a10d349db..2e4671b53e3a4 100644
--- a/tests/phpunit/tests/html-api/wpHtmlDecoder.php
+++ b/tests/phpunit/tests/html-api/wpHtmlDecoder.php
@@ -33,6 +33,8 @@ public function test_edge_cases( $raw_text_node, $decoded_value ) {
public static function data_edge_cases() {
return array(
'Single ampersand' => array( '&', '&' ),
+ 'NULL byte' => array( "\0", "\0" ),
+ 'Unknown entity' => array( '&unknown;', '&unknown;' ),
);
}
@@ -138,4 +140,171 @@ public static function data_attributes_with_prefix_and_case_sensitive_match() {
array( 'http://wordpress.org', 'https', 'ascii-case-insensitive', false ),
);
}
+
+ /**
+ * Ensures strict decoding of named entities in attributes.
+ *
+ * @ticket 61072
+ */
+ public function test_decode_attribute_decodes_named_entities() {
+ $this->assertSame( '&', WP_HTML_Decoder::decode_attribute( '&' ) );
+ $this->assertSame( '&', WP_HTML_Decoder::decode_attribute( '&' ) );
+ $this->assertSame( '<', WP_HTML_Decoder::decode_attribute( '<' ) );
+ $this->assertSame( '<', WP_HTML_Decoder::decode_attribute( '<' ) );
+ $this->assertSame( '>', WP_HTML_Decoder::decode_attribute( '>' ) );
+ $this->assertSame( '>', WP_HTML_Decoder::decode_attribute( '>' ) );
+ $this->assertSame( '"', WP_HTML_Decoder::decode_attribute( '"' ) );
+ $this->assertSame( '"', WP_HTML_Decoder::decode_attribute( '"' ) );
+ $this->assertSame( '©', WP_HTML_Decoder::decode_attribute( '©' ) );
+ $this->assertSame( '©', WP_HTML_Decoder::decode_attribute( '©' ) );
+ }
+
+ /**
+ * Ensures strict decoding of decimal numeric entities.
+ *
+ * @ticket 61072
+ */
+ public function test_decode_attribute_decodes_decimal_numeric_entities() {
+ $this->assertSame( 'A', WP_HTML_Decoder::decode_attribute( 'A' ) );
+ $this->assertSame( 'A', WP_HTML_Decoder::decode_attribute( 'A' ) );
+ $this->assertSame( 'A', WP_HTML_Decoder::decode_attribute( 'A' ) );
+ $this->assertSame( 'A', WP_HTML_Decoder::decode_attribute( 'A' ) );
+ }
+
+ /**
+ * Ensures strict decoding of hex numeric entities.
+ *
+ * @ticket 61072
+ */
+ public function test_decode_attribute_decodes_hex_numeric_entities() {
+ $this->assertSame( 'A', WP_HTML_Decoder::decode_attribute( 'A' ) );
+ $this->assertSame( 'A', WP_HTML_Decoder::decode_attribute( 'A' ) );
+ $this->assertSame( 'A', WP_HTML_Decoder::decode_attribute( 'A' ) );
+ $this->assertSame( 'A', WP_HTML_Decoder::decode_attribute( 'A' ) );
+ $this->assertSame( 'A', WP_HTML_Decoder::decode_attribute( 'A' ) );
+ $this->assertSame( '😀', WP_HTML_Decoder::decode_attribute( '😀' ) );
+ }
+
+ /**
+ * Ensures that Windows-1252 mapped characters are properly decoded.
+ *
+ * @ticket 61072
+ *
+ * @dataProvider data_windows_1252_mapped_characters
+ *
+ * @param string $raw_text Raw numeric character reference.
+ * @param string $expected Expected decoded character.
+ */
+ public function test_decodes_windows_1252_mapped_characters( $raw_text, $expected ) {
+ $this->assertSame( $expected, WP_HTML_Decoder::decode_text_node( $raw_text ) );
+ $this->assertSame( $expected, WP_HTML_Decoder::decode_attribute( $raw_text ) );
+ }
+
+ /**
+ * Data provider for Windows-1252 mapped characters.
+ *
+ * @return array[]
+ */
+ public static function data_windows_1252_mapped_characters() {
+ return array(
+ 'Euro sign' => array( '', '€' ),
+ 'Single low-9' => array( '', '‚' ),
+ 'F with hook' => array( '', 'ƒ' ),
+ 'Double low-9' => array( '', '„' ),
+ 'Ellipsis' => array( '
', '…' ),
+ 'Dagger' => array( '', '†' ),
+ 'Double dagger' => array( '', '‡' ),
+ 'Circumflex' => array( '', 'ˆ' ),
+ 'Per mille' => array( '', '‰' ),
+ 'S with caron' => array( '', 'Š' ),
+ 'Less single guil' => array( '', '‹' ),
+ 'OE ligature' => array( '', 'Œ' ),
+ 'Z with caron' => array( '', 'Ž' ),
+ 'Left single quot' => array( '', '‘' ),
+ 'Right single quo' => array( '', '’' ),
+ 'Left double quot' => array( '', '“' ),
+ 'Right double quo' => array( '', '”' ),
+ 'Bullet' => array( '', '•' ),
+ 'En dash' => array( '', '–' ),
+ 'Em dash' => array( '', '—' ),
+ 'Small tilde' => array( '', '˜' ),
+ 'Trade mark' => array( '', '™' ),
+ 's with caron' => array( '', 'š' ),
+ 'Right single gui' => array( '', '›' ),
+ 'oe ligature' => array( '', 'œ' ),
+ 'z with caron' => array( '', 'ž' ),
+ 'Y with diaeresis' => array( '', 'Ÿ' ),
+ );
+ }
+
+ /**
+ * Ensures decoding of invalid and special numeric character references.
+ *
+ * @ticket 61072
+ *
+ * @dataProvider data_invalid_numeric_references
+ *
+ * @param string $raw_text Raw numeric character reference.
+ * @param string $expected Expected decoded string.
+ */
+ public function test_decodes_invalid_numeric_references( $raw_text, $expected ) {
+ $this->assertSame( $expected, WP_HTML_Decoder::decode_text_node( $raw_text ) );
+ }
+
+ /**
+ * Data provider for invalid numeric references.
+ *
+ * @return array[]
+ */
+ public static function data_invalid_numeric_references() {
+ $replacement = "\xEF\xBF\xBD";
+ return array(
+ 'Null byte' => array( '', $replacement ),
+ 'Null byte (hex)' => array( '', $replacement ),
+ 'Surrogate low' => array( '', $replacement ),
+ 'Surrogate mid' => array( '', $replacement ),
+ 'Surrogate high' => array( '', $replacement ),
+ 'Out of range' => array( '', $replacement ),
+ 'No digits' => array( '', '' ),
+ 'No digits (hex)' => array( '', '' ),
+ 'Too many digits' => array( '', $replacement ), // Limit is 7.
+ 'Too many digits (hex)' => array( '', $replacement ), // Limit is 6.
+ 'Only zeros' => array( '', $replacement ),
+ );
+ }
+
+ /**
+ * Ensures proper decoding of ambiguous ampersands.
+ *
+ * @ticket 61072
+ *
+ * @dataProvider data_ambiguous_ampersands
+ *
+ * @param string $context 'attribute' or 'data'.
+ * @param string $raw_text Raw text.
+ * @param string $expected Expected decoded string.
+ */
+ public function test_decodes_ambiguous_ampersands( $context, $raw_text, $expected ) {
+ $this->assertSame( $expected, WP_HTML_Decoder::decode( $context, $raw_text ) );
+ }
+
+ /**
+ * Data provider for ambiguous ampersands.
+ *
+ * @return array[]
+ */
+ public static function data_ambiguous_ampersands() {
+ return array(
+ 'Starting with logical AND' => array( 'data', '&', '&' ),
+ 'Starting with logical AND (attr)' => array( 'attribute', '&', '&' ),
+ 'Ambiguous with equals' => array( 'data', '¬=', '¬=' ),
+ 'Ambiguous with equals (attr)' => array( 'attribute', '¬=', '¬=' ),
+ 'Ambiguous with alphanumeric' => array( 'data', '¬it', '¬it' ),
+ 'Ambiguous with alphanumeric (attr)' => array( 'attribute', '¬it', '¬it' ),
+ 'Not ambiguous (semicolon)' => array( 'data', '¬', '¬' ),
+ 'Not ambiguous (semicolon) (attr)' => array( 'attribute', '¬', '¬' ),
+ 'Not ambiguous (non-alphanum)' => array( 'data', '¬ ', '¬ ' ),
+ 'Not ambiguous (non-alphanum) (attr)' => array( 'attribute', '¬ ', '¬ ' ),
+ );
+ }
}