Skip to content

Commit 50eca61

Browse files
craigfrancisnikic
authored andcommitted
Use ENT_QUOTES|ENT_SUBSTITUTE default for HTML encoding and decoding functions
htmlspecialchars() etc now use ENT_QUOTES | ENT_SUBSTITUTE rather than ENT_COMPAT by default. Closes phpGH-6583.
1 parent 496e474 commit 50eca61

14 files changed

+41
-35
lines changed

UPGRADING

+6
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ PHP 8.1 UPGRADE NOTES
6969

7070
- Standard:
7171
. version_compare() no longer accepts undocumented operator abbreviations.
72+
. htmlspecialchars(), htmlentities(), htmlspecialchars_decode(),
73+
html_entitity_decode() and get_html_translation_table() now use
74+
ENT_QUOTES | ENT_SUBSTITUTE rather than ENT_COMPAT by default. This means
75+
that ' is escaped to ' while previously it was left alone.
76+
Additionally, malformed UTF-8 will be replaced by a Unicode substitution
77+
character, instead of resulting in an empty string.
7278

7379
========================================
7480
2. New Features

ext/standard/basic_functions.stub.php

+5-5
Original file line numberDiff line numberDiff line change
@@ -512,15 +512,15 @@ function headers_list(): array {}
512512

513513
/* {{{ html.c */
514514

515-
function htmlspecialchars(string $string, int $flags = ENT_COMPAT, ?string $encoding = null, bool $double_encode = true): string {}
515+
function htmlspecialchars(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, ?string $encoding = null, bool $double_encode = true): string {}
516516

517-
function htmlspecialchars_decode(string $string, int $flags = ENT_COMPAT): string {}
517+
function htmlspecialchars_decode(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE): string {}
518518

519-
function html_entity_decode(string $string, int $flags = ENT_COMPAT, ?string $encoding = null): string {}
519+
function html_entity_decode(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, ?string $encoding = null): string {}
520520

521-
function htmlentities(string $string, int $flags = ENT_COMPAT, ?string $encoding = null, bool $double_encode = true): string {}
521+
function htmlentities(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, ?string $encoding = null, bool $double_encode = true): string {}
522522

523-
function get_html_translation_table(int $table = HTML_SPECIALCHARS, int $flags = ENT_COMPAT, string $encoding = "UTF-8"): array {}
523+
function get_html_translation_table(int $table = HTML_SPECIALCHARS, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, string $encoding = "UTF-8"): array {}
524524

525525
/* }}} */
526526

ext/standard/basic_functions_arginfo.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -765,27 +765,27 @@ ZEND_END_ARG_INFO()
765765

766766
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_htmlspecialchars, 0, 1, IS_STRING, 0)
767767
ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0)
768-
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT")
768+
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE")
769769
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
770770
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, double_encode, _IS_BOOL, 0, "true")
771771
ZEND_END_ARG_INFO()
772772

773773
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_htmlspecialchars_decode, 0, 1, IS_STRING, 0)
774774
ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0)
775-
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT")
775+
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE")
776776
ZEND_END_ARG_INFO()
777777

778778
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_html_entity_decode, 0, 1, IS_STRING, 0)
779779
ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0)
780-
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT")
780+
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE")
781781
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
782782
ZEND_END_ARG_INFO()
783783

784784
#define arginfo_htmlentities arginfo_htmlspecialchars
785785

786786
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_get_html_translation_table, 0, 0, IS_ARRAY, 0)
787787
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, table, IS_LONG, 0, "HTML_SPECIALCHARS")
788-
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT")
788+
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE")
789789
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 0, "\"UTF-8\"")
790790
ZEND_END_ARG_INFO()
791791

ext/standard/html.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -1316,7 +1316,7 @@ PHPAPI zend_string *php_escape_html_entities_ex(const unsigned char *old, size_t
13161316
static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
13171317
{
13181318
zend_string *str, *hint_charset = NULL;
1319-
zend_long flags = ENT_COMPAT;
1319+
zend_long flags = ENT_QUOTES|ENT_SUBSTITUTE;
13201320
zend_string *replaced;
13211321
bool double_encode = 1;
13221322

@@ -1367,7 +1367,7 @@ PHP_FUNCTION(htmlspecialchars)
13671367
PHP_FUNCTION(htmlspecialchars_decode)
13681368
{
13691369
zend_string *str;
1370-
zend_long quote_style = ENT_COMPAT;
1370+
zend_long quote_style = ENT_QUOTES|ENT_SUBSTITUTE;
13711371
zend_string *replaced;
13721372

13731373
ZEND_PARSE_PARAMETERS_START(1, 2)
@@ -1385,7 +1385,7 @@ PHP_FUNCTION(htmlspecialchars_decode)
13851385
PHP_FUNCTION(html_entity_decode)
13861386
{
13871387
zend_string *str, *hint_charset = NULL;
1388-
zend_long quote_style = ENT_COMPAT;
1388+
zend_long quote_style = ENT_QUOTES|ENT_SUBSTITUTE;
13891389
zend_string *replaced;
13901390

13911391
ZEND_PARSE_PARAMETERS_START(1, 3)
@@ -1468,7 +1468,7 @@ static inline void write_s3row_data(
14681468
PHP_FUNCTION(get_html_translation_table)
14691469
{
14701470
zend_long all = HTML_SPECIALCHARS,
1471-
flags = ENT_COMPAT;
1471+
flags = ENT_QUOTES|ENT_SUBSTITUTE;
14721472
int doctype;
14731473
entity_table_opt entity_table;
14741474
const enc_to_uni *to_uni_table = NULL;

ext/standard/tests/strings/bug53021.phpt

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@ single quotes variations:
3838
'
3939
'
4040
'
41-
'
41+
'

ext/standard/tests/strings/bug61116.phpt

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Function [ <internal:standard> function htmlspecialchars ] {
1010

1111
- Parameters [4] {
1212
Parameter #0 [ <required> string $string ]
13-
Parameter #1 [ <optional> int $flags = ENT_COMPAT ]
13+
Parameter #1 [ <optional> int $flags = ENT_QUOTES | ENT_SUBSTITUTE ]
1414
Parameter #2 [ <optional> ?string $encoding = null ]
1515
Parameter #3 [ <optional> bool $double_encode = true ]
1616
}
@@ -21,7 +21,7 @@ Function [ <internal:standard> function get_html_translation_table ] {
2121

2222
- Parameters [3] {
2323
Parameter #0 [ <optional> int $table = HTML_SPECIALCHARS ]
24-
Parameter #1 [ <optional> int $flags = ENT_COMPAT ]
24+
Parameter #1 [ <optional> int $flags = ENT_QUOTES | ENT_SUBSTITUTE ]
2525
Parameter #2 [ <optional> string $encoding = "UTF-8" ]
2626
}
2727
- Return [ array ]

ext/standard/tests/strings/html_entity_decode3.phpt

+1-1
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ echo "\nDone.\n";
218218
&#x0E; NOT DECODED
219219
&#x1F; NOT DECODED
220220
&#x20; DECODED
221-
&#x27; NOT DECODED
221+
&#x27; DECODED
222222
&#x7F; NOT DECODED
223223
&#x80; NOT DECODED
224224
&#x9F; NOT DECODED

ext/standard/tests/strings/htmlentities24.phpt

+1-1
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ string(198) "&sbquo;&dagger;&trade;&Yuml;&euro;&sbquo;&dagger;&bdquo;&euro;&perm
310310
string(42) "&lt;html&gt; This is a test! &lt;/html&gt;"
311311

312312
*** Testing htmlentites() on a quote ***
313-
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"
313+
string(46) "A &#039;quote&#039; is &lt;b&gt;bold&lt;/b&gt;"
314314
string(46) "A &#039;quote&#039; is &lt;b&gt;bold&lt;/b&gt;"
315315
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"
316316
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"

ext/standard/tests/strings/htmlspecialchars.phpt

+1-1
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ string(187) "&lt;br&gt;Testing&lt;p&gt;New file.&lt;/p&gt;&lt;p&gt;&lt;br&gt;Fil
306306
string(46) "&lt;br&gt;Testing&lt;p&gt;New file.&lt;/p&gt; "
307307

308308
*** Testing htmlspecialchars() on a quote...
309-
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"
309+
string(46) "A &#039;quote&#039; is &lt;b&gt;bold&lt;/b&gt;"
310310
string(46) "A &#039;quote&#039; is &lt;b&gt;bold&lt;/b&gt;"
311311
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"
312312
string(36) "A 'quote' is &lt;b&gt;bold&lt;/b&gt;"

ext/standard/tests/strings/htmlspecialchars_basic.phpt

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ Basic tests
5656
Test 1: abc&lt;&gt;&quot;&amp;
5757
Test 2: &amp;&amp;abc&lt;&gt;&quot;&amp;
5858
Test 3: a&gt;,\&lt;bc&lt;&gt;&quot;&amp;
59-
Test 4: a\'\'&amp;bc&lt;&gt;&quot;&amp;
59+
Test 4: a\&#039;\&#039;&amp;bc&lt;&gt;&quot;&amp;
6060
Test 5: &amp;amp;&amp;lt;
6161
Test 6: abc&lt;&gt;"&amp;
6262
Test 7: &amp;&amp;abc&lt;&gt;"&amp;

ext/standard/tests/strings/htmlspecialchars_decode_basic.phpt

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ echo "Done";
2626
?>
2727
--EXPECT--
2828
*** Testing htmlspecialchars_decode() : basic functionality ***
29-
string(92) "Roy&#039;s height > Sam&#039;s height. 13 < 25. 1111 & 0000 = 0000. " double quoted string ""
30-
string(92) "Roy&#039;s height > Sam&#039;s height. 13 < 25. 1111 & 0000 = 0000. " double quoted string ""
29+
string(82) "Roy's height > Sam's height. 13 < 25. 1111 & 0000 = 0000. " double quoted string ""
30+
string(82) "Roy's height > Sam's height. 13 < 25. 1111 & 0000 = 0000. " double quoted string ""
3131
string(92) "Roy&#039;s height > Sam&#039;s height. 13 < 25. 1111 & 0000 = 0000. " double quoted string ""
3232
string(92) "Roy&#039;s height > Sam&#039;s height. 13 < 25. 1111 & 0000 = 0000. " double quoted string ""
3333
string(102) "Roy&#039;s height > Sam&#039;s height. 13 < 25. 1111 & 0000 = 0000. &quot; double quoted string &quot;"

ext/standard/tests/strings/htmlspecialchars_decode_variation3.phpt

+4-4
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,12 @@ string(0) ""
7373
-- Iteration 2 --
7474
string(0) ""
7575
-- Iteration 3 --
76-
string(103) "<html>Roy&#039;s height > Sam&#039;s height
76+
string(93) "<html>Roy's height > Sam's height
7777
13 < 25
7878
1111 & 0000 = 0000
7979
"This is a double quoted string""
8080
-- Iteration 4 --
81-
string(130) "<html>Roy&#039;s height > Sam &#039;s height
81+
string(120) "<html>Roy's height > Sam 's height
8282
1111 & 0000 = 0000
8383
" heredoc
8484
double quoted string. with different white spaces""
@@ -87,8 +87,8 @@ string(62) "<html>11 < 12. 123 string 4567
8787
"string" 1111 & 0000 = 0000
8888
;"
8989
-- Iteration 6 --
90-
string(153) "<html>< This's a string with quotes:
90+
string(143) "<html>< This's a string with quotes:
9191
"strings in double quote" &
9292
'strings in single quote' "
93-
this\line is &#039;single quoted&#039; /with\slashes </html>"
93+
this\line is 'single quoted' /with\slashes </html>"
9494
Done

ext/standard/tests/strings/htmlspecialchars_decode_variation4.phpt

+4-4
Original file line numberDiff line numberDiff line change
@@ -33,22 +33,22 @@ echo "Done";
3333
--EXPECT--
3434
*** Testing htmlspecialchars_decode() : usage variations ***
3535
-- Iteration 1 --
36-
string(90) "Roy&#039s height > Sam&#039;s \$height... 1111 &ap; 0000 = 0000... " double quote string ""
36+
string(85) "Roy&#039s height > Sam's \$height... 1111 &ap; 0000 = 0000... " double quote string ""
3737
string(90) "Roy&#039s height > Sam&#039;s \$height... 1111 &ap; 0000 = 0000... " double quote string ""
3838
string(100) "Roy&#039s height > Sam&#039;s \$height... 1111 &ap; 0000 = 0000... &quot; double quote string &quot;"
3939
string(85) "Roy&#039s height > Sam's \$height... 1111 &ap; 0000 = 0000... " double quote string ""
4040
-- Iteration 2 --
41-
string(88) "Roy&#039;s height > Sam&#039;s height... \t\t 13 < 15...\n\r " double quote\f\v string ""
41+
string(78) "Roy's height > Sam's height... \t\t 13 < 15...\n\r " double quote\f\v string ""
4242
string(88) "Roy&#039;s height > Sam&#039;s height... \t\t 13 < 15...\n\r " double quote\f\v string ""
4343
string(98) "Roy&#039;s height > Sam&#039;s height... \t\t 13 < 15...\n\r &quot; double quote\f\v string &quot;"
4444
string(78) "Roy's height > Sam's height... \t\t 13 < 15...\n\r " double quote\f\v string ""
4545
-- Iteration 3 --
46-
string(48) "\nRoy&#039;s height &gt\t; Sam&#039;s\v height\f"
46+
string(38) "\nRoy's height &gt\t; Sam's\v height\f"
4747
string(48) "\nRoy&#039;s height &gt\t; Sam&#039;s\v height\f"
4848
string(48) "\nRoy&#039;s height &gt\t; Sam&#039;s\v height\f"
4949
string(38) "\nRoy's height &gt\t; Sam's\v height\f"
5050
-- Iteration 4 --
51-
string(48) "\r\tRoy&#039;s height &gt\r; Sam\t&#039;s height"
51+
string(38) "\r\tRoy's height &gt\r; Sam\t's height"
5252
string(48) "\r\tRoy&#039;s height &gt\r; Sam\t&#039;s height"
5353
string(48) "\r\tRoy&#039;s height &gt\r; Sam\t&#039;s height"
5454
string(38) "\r\tRoy's height &gt\r; Sam\t's height"

ext/standard/tests/strings/htmlspecialchars_decode_variation5.phpt

+5-5
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@ echo "Done";
3232
--EXPECT--
3333
*** Testing htmlspecialchars_decode() : usage variations ***
3434
-- Iteration 1 --
35-
string(89) "Roy&#039s height > Sam&#039;s $height... 1111 &ap; 0000 = 0000... " double quote string ""
35+
string(84) "Roy&#039s height > Sam's $height... 1111 &ap; 0000 = 0000... " double quote string ""
3636
string(89) "Roy&#039s height > Sam&#039;s $height... 1111 &ap; 0000 = 0000... " double quote string ""
3737
string(99) "Roy&#039s height > Sam&#039;s $height... 1111 &ap; 0000 = 0000... &quot; double quote string &quot;"
3838
string(84) "Roy&#039s height > Sam's $height... 1111 &ap; 0000 = 0000... " double quote string ""
3939
-- Iteration 2 --
40-
string(82) "Roy&#039;s height > Sam&#039;s height... 13 < 15...
40+
string(72) "Roy's height > Sam's height... 13 < 15...
4141
" double quote string ""
4242
string(82) "Roy&#039;s height > Sam&#039;s height... 13 < 15...
4343
" double quote string ""
@@ -46,16 +46,16 @@ string(92) "Roy&#039;s height > Sam&#039;s height... 13 < 15...
4646
string(72) "Roy's height > Sam's height... 13 < 15...
4747
" double quote string ""
4848
-- Iteration 3 --
49-
string(44) "
50-
Roy&#039;s height &gt ; Sam&#039;s height"
49+
string(34) "
50+
Roy's height &gt ; Sam's height"
5151
string(44) "
5252
Roy&#039;s height &gt ; Sam&#039;s height "
5353
string(44) "
5454
Roy&#039;s height &gt ; Sam&#039;s height "
5555
string(34) "
5656
Roy's height &gt ; Sam's height "
5757
-- Iteration 4 --
58-
string(44) " Roy&#039;s height &gt; Sam &#039;s height"
58+
string(34) " Roy's height &gt; Sam 's height"
5959
string(44) " Roy&#039;s height &gt; Sam &#039;s height"
6060
string(44) " Roy&#039;s height &gt; Sam &#039;s height"
6161
string(34) " Roy's height &gt; Sam 's height"

0 commit comments

Comments
 (0)