From 4aedd6be0e3ccb9331671c6704e56d52ba66337a Mon Sep 17 00:00:00 2001 From: Craig Manley <591285+cmanley@users.noreply.github.com> Date: Tue, 14 May 2024 15:26:54 +0200 Subject: [PATCH 01/10] Add 3rd input encoding parameter to json_encode modifier let it default to \Smarty\Smarty::$_CHARSET --- .../Modifier/JsonEncodeModifierCompiler.php | 14 --- src/Extension/DefaultExtension.php | 88 ++++++++++++++++++- 2 files changed, 87 insertions(+), 15 deletions(-) delete mode 100644 src/Compile/Modifier/JsonEncodeModifierCompiler.php diff --git a/src/Compile/Modifier/JsonEncodeModifierCompiler.php b/src/Compile/Modifier/JsonEncodeModifierCompiler.php deleted file mode 100644 index 4f191a31f..000000000 --- a/src/Compile/Modifier/JsonEncodeModifierCompiler.php +++ /dev/null @@ -1,14 +0,0 @@ -modifiers[$modifier] = new \Smarty\Compile\Modifier\IndentModifierCompiler(); break; case 'is_array': $this->modifiers[$modifier] = new \Smarty\Compile\Modifier\IsArrayModifierCompiler(); break; case 'isset': $this->modifiers[$modifier] = new \Smarty\Compile\Modifier\IssetModifierCompiler(); break; - case 'json_encode': $this->modifiers[$modifier] = new \Smarty\Compile\Modifier\JsonEncodeModifierCompiler(); break; case 'lower': $this->modifiers[$modifier] = new \Smarty\Compile\Modifier\LowerModifierCompiler(); break; case 'nl2br': $this->modifiers[$modifier] = new \Smarty\Compile\Modifier\Nl2brModifierCompiler(); break; case 'noprint': $this->modifiers[$modifier] = new \Smarty\Compile\Modifier\NoPrintModifierCompiler(); break; @@ -62,6 +61,7 @@ public function getModifierCallback(string $modifierName) { case 'implode': return [$this, 'smarty_modifier_implode']; case 'in_array': return [$this, 'smarty_modifier_in_array']; case 'join': return [$this, 'smarty_modifier_join']; + case 'json_encode': return [$this, 'smarty_modifier_json_encode']; case 'mb_wordwrap': return [$this, 'smarty_modifier_mb_wordwrap']; case 'number_format': return [$this, 'smarty_modifier_number_format']; case 'regex_replace': return [$this, 'smarty_modifier_regex_replace']; @@ -605,6 +605,92 @@ public function smarty_modifier_join($values, $separator = '') return implode((string) ($separator ?? ''), (array) $values); } + /** + * Smarty json_encode modifier plugin. + * Type: modifier + * Name: json_encode + * Purpose: Returns the JSON representation of the given value or false on error. The resulting string will be UTF-8 encoded. + * + * @param mixed $value + * @param int $flags + * @param string $input_encoding of $value; defaults to \Smarty\Smarty::$_CHARSET + * + * @return string|false + */ + public function smarty_modifier_json_encode($value, $flags = 0, string $input_encoding = null) + { + if (!$input_encoding) { + $input_encoding = \Smarty\Smarty::$_CHARSET; + } + + # json_encode() expects UTF-8 input, so recursively encode $value if necessary into UTF-8 + if (!empty($value) && strcasecmp($input_encoding, 'UTF-8')) { + static $transcoder = null; + if (is_null($transcoder)) { + /** + * Similar to mb_convert_encoding(), but operates on keys and values of arrays, and on objects too. + * Objects implementing \JsonSerializable and unsupported types are returned unchanged. + * + * @param string $from_encoding + * @param string $to_encoding + * @param mixed $data + * @return mixed + */ + $transcoder = function($data, string $to_encoding, string $from_encoding) use(&$transcoder) { + if (empty($data)) { + return $data; + } + elseif (is_string($data)) { + return mb_convert_encoding($data, $to_encoding, $from_encoding); + } + elseif (is_scalar($data)) { + return $data; + } + + # convert object to array if necessary + if (is_object($data)) { + if (is_a($data, '\JsonSerializable')) { # this is the reason why this function is not generic + return $data; # this object should know how to deal with it's internal encoding when fed to json_encode() + } + $data = get_object_vars($data); # public properties as key => value pairs + } + + if (is_array($data)) { + $result = []; + foreach ($data as $k => $v) { + if (is_string($k)) { + $k = mb_convert_encoding($k, $to_encoding, $from_encoding); + if ($k === false) { + return false; + } + } + if (empty($v) || (is_scalar($v) && !is_string($v))) { + $result[$k] = $v; # $v can be false and that's not an error + } + else { + # recurse + $v = $transcoder($v, $to_encoding, $from_encoding); + if ($v === false) { + return false; + } + $result[$k] = $v; + } + } + return $result; + } + + return $data; # anything except string, object, or array + }; + } + + $value = $transcoder($value, 'UTF-8', $input_encoding); + if ($value === false) { + return $value; # failure; this must not be passed to json_encode!; this is part of what the !empty() check is for at the top of this block + } + } + return \json_encode($value, $flags); # string|false + } + /** * Smarty wordwrap modifier plugin * Type: modifier From f4defd72b9510b4b7f68af5cf75bb7c202409866 Mon Sep 17 00:00:00 2001 From: Craig Manley <591285+cmanley@users.noreply.github.com> Date: Thu, 16 May 2024 16:24:25 +0200 Subject: [PATCH 02/10] Add input_encoding parameter to json_encode modifier documentation --- .../language-modifier-json-encode.md | 9 +++++---- .../language-modifiers/language-modifier-round.md | 10 ---------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/docs/designers/language-modifiers/language-modifier-json-encode.md b/docs/designers/language-modifiers/language-modifier-json-encode.md index 4e70f0c26..c07cba0b5 100644 --- a/docs/designers/language-modifiers/language-modifier-json-encode.md +++ b/docs/designers/language-modifiers/language-modifier-json-encode.md @@ -11,9 +11,10 @@ Depending on the value of `$user` this would return a string in JSON-format, e.g ## Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------------------------------------------------------------------------------------| -| 1 | int | No | bitmask of flags, directly passed to [PHP's json_encode](https://www.php.net/json_encode) | +| Parameter | Type | Required | Description | +|-----------|--------|----------|-------------------------------------------------------------------------------------------| +| 1 | int | No | bitmask of flags, directly passed to [PHP's json_encode](https://www.php.net/json_encode) | +| 2 | string | No | input encoding; defaults to \Smarty\Smarty::$_CHARSET which defaults to UTF-8 | ## Examples @@ -24,4 +25,4 @@ Without it, an array `$myArray = ["a","b"]` would be formatted as a javascript a ```smarty {$myArray|json_encode} # renders: ["a","b"] {$myArray|json_encode:16} # renders: {"0":"a","1":"b"} -``` \ No newline at end of file +``` diff --git a/docs/designers/language-modifiers/language-modifier-round.md b/docs/designers/language-modifiers/language-modifier-round.md index c05b899a9..635e85b42 100644 --- a/docs/designers/language-modifiers/language-modifier-round.md +++ b/docs/designers/language-modifiers/language-modifier-round.md @@ -23,13 +23,3 @@ If 'precision' is negative, the number is rounded to the nearest power of 10. Se The parameter 'mode' defines how the rounding is done. By default, 2.5 is rounded to 3, whereas 2.45 is rounded to 2. You usually don't need to change this. For more details on rounding modes, see [PHP's documentation on round](https://www.php.net/manual/en/function.round). - -## Examples - -By passing `16` as the second parameter, you can force json_encode to always format the JSON-string as an object. -Without it, an array `$myArray = ["a","b"]` would be formatted as a javascript array: - -```smarty -{$myArray|json_encode} # renders: ["a","b"] -{$myArray|json_encode:16} # renders: {"0":"a","1":"b"} -``` \ No newline at end of file From 2f40db9e9f7346c523bf60d95798bb63ba8dc235 Mon Sep 17 00:00:00 2001 From: Craig Manley <591285+cmanley@users.noreply.github.com> Date: Thu, 16 May 2024 16:25:50 +0200 Subject: [PATCH 03/10] Minor optimization of json_encode modifier to skip creating closure in most common cases --- src/Extension/DefaultExtension.php | 116 +++++++++++++++-------------- 1 file changed, 61 insertions(+), 55 deletions(-) diff --git a/src/Extension/DefaultExtension.php b/src/Extension/DefaultExtension.php index 9fbec2435..583634c38 100644 --- a/src/Extension/DefaultExtension.php +++ b/src/Extension/DefaultExtension.php @@ -624,70 +624,76 @@ public function smarty_modifier_json_encode($value, $flags = 0, string $input_en } # json_encode() expects UTF-8 input, so recursively encode $value if necessary into UTF-8 - if (!empty($value) && strcasecmp($input_encoding, 'UTF-8')) { - static $transcoder = null; - if (is_null($transcoder)) { - /** - * Similar to mb_convert_encoding(), but operates on keys and values of arrays, and on objects too. - * Objects implementing \JsonSerializable and unsupported types are returned unchanged. - * - * @param string $from_encoding - * @param string $to_encoding - * @param mixed $data - * @return mixed - */ - $transcoder = function($data, string $to_encoding, string $from_encoding) use(&$transcoder) { - if (empty($data)) { - return $data; - } - elseif (is_string($data)) { - return mb_convert_encoding($data, $to_encoding, $from_encoding); - } - elseif (is_scalar($data)) { - return $data; - } + if ($value && strcasecmp($input_encoding, 'UTF-8')) { + if (is_string($value)) { # shortcut for the most common case + $value = mb_convert_encoding($value, 'UTF-8', $input_encoding); + } + elseif (is_array($value) || is_object($value)) { + static $transcoder; # this closure will be assigned once, and then persist in memory + if (is_null($transcoder)) { + /** + * Similar to mb_convert_encoding(), but operates on keys and values of arrays, and on objects too. + * Objects implementing \JsonSerializable and unsupported types are returned unchanged. + * + * @param string $from_encoding + * @param string $to_encoding + * @param mixed $data + * @return mixed + */ + $transcoder = function($data, string $to_encoding, string $from_encoding) use(&$transcoder) { + if (empty($data)) { + return $data; + } + elseif (is_string($data)) { + return mb_convert_encoding($data, $to_encoding, $from_encoding); + } + elseif (is_scalar($data)) { + return $data; + } - # convert object to array if necessary - if (is_object($data)) { - if (is_a($data, '\JsonSerializable')) { # this is the reason why this function is not generic - return $data; # this object should know how to deal with it's internal encoding when fed to json_encode() + # convert object to array if necessary + if (is_object($data)) { + if (is_a($data, '\JsonSerializable')) { # this is the only reason why this function is not generic + return $data; + } + $data = get_object_vars($data); # public properties as key => value pairs } - $data = get_object_vars($data); # public properties as key => value pairs - } - if (is_array($data)) { - $result = []; - foreach ($data as $k => $v) { - if (is_string($k)) { - $k = mb_convert_encoding($k, $to_encoding, $from_encoding); - if ($k === false) { - return false; + if (is_array($data)) { + $result = []; + foreach ($data as $k => $v) { + if (is_string($k)) { + $k = mb_convert_encoding($k, $to_encoding, $from_encoding); + if ($k === false) { + return false; + } } - } - if (empty($v) || (is_scalar($v) && !is_string($v))) { - $result[$k] = $v; # $v can be false and that's not an error - } - else { - # recurse - $v = $transcoder($v, $to_encoding, $from_encoding); - if ($v === false) { - return false; + if (empty($v) || (is_scalar($v) && !is_string($v))) { + $result[$k] = $v; # $v can be false and that's not an error + } + else { + # recurse + $v = $transcoder($v, $to_encoding, $from_encoding); + if ($v === false) { + return false; + } + $result[$k] = $v; } - $result[$k] = $v; } + return $result; } - return $result; - } - return $data; # anything except string, object, or array - }; - } + return $data; # anything except string, object, or array + }; # / $transcoder function + } # / if is_null($transcoder) + + $value = $transcoder($value, 'UTF-8', $input_encoding); + if ($value === false) { + return $value; # failure; this must not be passed to json_encode!; this is part of what the !empty() check is for at the top of this block + } + } # / elseif (is_array($value) || is_object($value)) + } # / if input encoding != UTF-8 - $value = $transcoder($value, 'UTF-8', $input_encoding); - if ($value === false) { - return $value; # failure; this must not be passed to json_encode!; this is part of what the !empty() check is for at the top of this block - } - } return \json_encode($value, $flags); # string|false } From 52878fe699b3bfaa060e2707f03fd7d382acbcee Mon Sep 17 00:00:00 2001 From: Craig Manley <591285+cmanley@users.noreply.github.com> Date: Thu, 16 May 2024 16:26:33 +0200 Subject: [PATCH 04/10] Add unit tests to prove that new json_encode modifier works with special characters in both UTF-8 and other encodings --- .../PluginModifierJsonEncodeCp1252Test.php | 76 +++++++++++++++++++ .../PluginModifierJsonEncodeTest.php | 5 +- 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php diff --git a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php new file mode 100644 index 000000000..339742025 --- /dev/null +++ b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php @@ -0,0 +1,76 @@ +setUpSmarty(__DIR__); + \Smarty\Smarty::$_CHARSET = 'cp1252'; + } + + /** + * @dataProvider dataForDefault + */ + public function testDefault($value, $expected) + { + $tpl = $this->smarty->createTemplate('string:{$v|json_encode}'); + $tpl->assign("v", $value); + $this->assertEquals($expected, $this->smarty->fetch($tpl)); + } + + /** + * @dataProvider dataForDefault + */ + public function testDefaultAsFunction($value, $expected) + { + $tpl = $this->smarty->createTemplate('string:{json_encode($v)}'); + $tpl->assign("v", $value); + $this->assertEquals($expected, $this->smarty->fetch($tpl)); + } + + public function dataForDefault() { + return [ + ["abc", '"abc"'], + [["abc"], '["abc"]'], + [["abc",["a"=>2]], '["abc",{"a":2}]'], + [['€uro',['Schlüssel'=>'Straße']], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # € = x80 in cp1252; ü = xFC in cp1252; ß = xDF in cp1252; + ]; + } + + /** + * @dataProvider dataForForceObject + */ + public function testForceObject($value, $expected) + { + $tpl = $this->smarty->createTemplate('string:{$v|json_encode:16}'); + $tpl->assign("v", $value); + $this->assertEquals($expected, $this->smarty->fetch($tpl)); + } + + /** + * @dataProvider dataForForceObject + */ + public function testForceObjectAsFunction($value, $expected) + { + $tpl = $this->smarty->createTemplate('string:{json_encode($v,16)}'); + $tpl->assign("v", $value); + $this->assertEquals($expected, $this->smarty->fetch($tpl)); + } + + public function dataForForceObject() { + return [ + ["abc", '"abc"'], + [["abc"], '{"0":"abc"}'], + [["abc",["a"=>2]], '{"0":"abc","1":{"a":2}}'], + [['€uro'], '{"0":"\u20acuro"}'], + ]; + } + +} diff --git a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php index 9a2878122..894242368 100644 --- a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php +++ b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php @@ -1,6 +1,7 @@ 2]], '["abc",{"a":2}]'], + [['€uro',['Schlüssel'=>'Straße']], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # € = x80 in cp1252; ü = xFC in cp1252; ß = xDF in cp1252; ]; } @@ -66,6 +68,7 @@ public function dataForForceObject() { ["abc", '"abc"'], [["abc"], '{"0":"abc"}'], [["abc",["a"=>2]], '{"0":"abc","1":{"a":2}}'], + [['€uro'], '{"0":"\u20acuro"}'], ]; } From 77cac4edc4af22b054cee0b9609f8c9ddad016ad Mon Sep 17 00:00:00 2001 From: Craig Manley <591285+cmanley@users.noreply.github.com> Date: Thu, 16 May 2024 17:22:38 +0200 Subject: [PATCH 05/10] Remove requirement to save json_encode modifier cp1252 unit test file in that encoding --- .../PluginModifier/PluginModifierJsonEncodeCp1252Test.php | 6 +++--- .../PluginModifier/PluginModifierJsonEncodeTest.php | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php index 339742025..1d6e81b20 100644 --- a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php +++ b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php @@ -1,7 +1,7 @@ 2]], '["abc",{"a":2}]'], - [['€uro',['Schlüssel'=>'Straße']], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # € = x80 in cp1252; ü = xFC in cp1252; ß = xDF in cp1252; + [["\x80uro",["Schl\xFCssel"=>"Stra\xDFe"]], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # x80 = € = euro, xFC = ü = uuml, xDF = ß = szlig ]; } @@ -69,7 +69,7 @@ public function dataForForceObject() { ["abc", '"abc"'], [["abc"], '{"0":"abc"}'], [["abc",["a"=>2]], '{"0":"abc","1":{"a":2}}'], - [['€uro'], '{"0":"\u20acuro"}'], + [["\x80uro"], '{"0":"\u20acuro"}'], ]; } diff --git a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php index 894242368..4eb14843f 100644 --- a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php +++ b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php @@ -39,7 +39,7 @@ public function dataForDefault() { ["abc", '"abc"'], [["abc"], '["abc"]'], [["abc",["a"=>2]], '["abc",{"a":2}]'], - [['€uro',['Schlüssel'=>'Straße']], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # € = x80 in cp1252; ü = xFC in cp1252; ß = xDF in cp1252; + [["€uro",["Schlüssel"=>"Straße"]], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # \u{20ac} = € = euro, \u{00fc} = ü = uuml, xDF = \u{00df} = szlig ]; } @@ -68,7 +68,7 @@ public function dataForForceObject() { ["abc", '"abc"'], [["abc"], '{"0":"abc"}'], [["abc",["a"=>2]], '{"0":"abc","1":{"a":2}}'], - [['€uro'], '{"0":"\u20acuro"}'], + [["€uro"], '{"0":"\u20acuro"}'], ]; } From e2b1d713918746b025d674124d4fc1a81ea27ad0 Mon Sep 17 00:00:00 2001 From: Simon Wisselink Date: Wed, 29 May 2024 21:37:01 +0200 Subject: [PATCH 06/10] Fix classname of PluginModifierJsonEncodeCp1252Test --- .../PluginModifier/PluginModifierJsonEncodeCp1252Test.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php index 1d6e81b20..abc76b800 100644 --- a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php +++ b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php @@ -7,7 +7,7 @@ namespace UnitTests\TemplateSource\TagTests\PluginModifier; use PHPUnit_Smarty; -class PluginModifierJsonEncodeTest extends PHPUnit_Smarty +class PluginModifierJsonEncodeCp1252Test extends PHPUnit_Smarty { public function setUp(): void { @@ -40,7 +40,7 @@ public function dataForDefault() { ["abc", '"abc"'], [["abc"], '["abc"]'], [["abc",["a"=>2]], '["abc",{"a":2}]'], - [["\x80uro",["Schl\xFCssel"=>"Stra\xDFe"]], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # x80 = € = euro, xFC = ü = uuml, xDF = ß = szlig + [["\x80uro",["Schl\xFCssel"=>"Stra\xDFe"]], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # x80 = � = euro, xFC = � = uuml, xDF = � = szlig ]; } From 9316a42aab5e8a741ae924478d1c334bf24044a3 Mon Sep 17 00:00:00 2001 From: Simon Wisselink Date: Wed, 29 May 2024 21:40:28 +0200 Subject: [PATCH 07/10] Add tearDown that resets smarty charset to UTF-8 after each test, to prevent other tests from failing. --- .../PluginModifier/PluginModifierJsonEncodeCp1252Test.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php index abc76b800..46a5ec0fe 100644 --- a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php +++ b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php @@ -15,6 +15,11 @@ public function setUp(): void \Smarty\Smarty::$_CHARSET = 'cp1252'; } + public function tearDown(): void + { + \Smarty\Smarty::$_CHARSET = 'UTF-8'; + } + /** * @dataProvider dataForDefault */ From ef2fa74fb70626418830d08005064e8d634cb091 Mon Sep 17 00:00:00 2001 From: Simon Wisselink Date: Thu, 30 May 2024 09:04:36 +0200 Subject: [PATCH 08/10] Update DefaultExtension.php --- src/Extension/DefaultExtension.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Extension/DefaultExtension.php b/src/Extension/DefaultExtension.php index 583634c38..1ebba0185 100644 --- a/src/Extension/DefaultExtension.php +++ b/src/Extension/DefaultExtension.php @@ -653,7 +653,7 @@ public function smarty_modifier_json_encode($value, $flags = 0, string $input_en # convert object to array if necessary if (is_object($data)) { - if (is_a($data, '\JsonSerializable')) { # this is the only reason why this function is not generic + if (is_a($data, \JsonSerializable::class)) { # this is the only reason why this function is not generic return $data; } $data = get_object_vars($data); # public properties as key => value pairs From cbbb2444ba3e2e7026a81043071c99b6b3797e62 Mon Sep 17 00:00:00 2001 From: Craig Manley <591285+cmanley@users.noreply.github.com> Date: Sat, 1 Jun 2024 21:19:11 +0200 Subject: [PATCH 09/10] Move recursive transcoder from json_encode() into src/Extension/DefaultExtension/RecursiveTranscoder.php --- src/Extension/DefaultExtension.php | 69 +-------- .../DefaultExtension/RecursiveTranscoder.php | 138 ++++++++++++++++++ .../PluginModifierJsonEncodeCp1252Test.php | 9 +- .../PluginModifierJsonEncodeTest.php | 2 +- 4 files changed, 153 insertions(+), 65 deletions(-) create mode 100644 src/Extension/DefaultExtension/RecursiveTranscoder.php diff --git a/src/Extension/DefaultExtension.php b/src/Extension/DefaultExtension.php index 1ebba0185..984412dbc 100644 --- a/src/Extension/DefaultExtension.php +++ b/src/Extension/DefaultExtension.php @@ -628,71 +628,14 @@ public function smarty_modifier_json_encode($value, $flags = 0, string $input_en if (is_string($value)) { # shortcut for the most common case $value = mb_convert_encoding($value, 'UTF-8', $input_encoding); } - elseif (is_array($value) || is_object($value)) { - static $transcoder; # this closure will be assigned once, and then persist in memory - if (is_null($transcoder)) { - /** - * Similar to mb_convert_encoding(), but operates on keys and values of arrays, and on objects too. - * Objects implementing \JsonSerializable and unsupported types are returned unchanged. - * - * @param string $from_encoding - * @param string $to_encoding - * @param mixed $data - * @return mixed - */ - $transcoder = function($data, string $to_encoding, string $from_encoding) use(&$transcoder) { - if (empty($data)) { - return $data; - } - elseif (is_string($data)) { - return mb_convert_encoding($data, $to_encoding, $from_encoding); - } - elseif (is_scalar($data)) { - return $data; - } - - # convert object to array if necessary - if (is_object($data)) { - if (is_a($data, \JsonSerializable::class)) { # this is the only reason why this function is not generic - return $data; - } - $data = get_object_vars($data); # public properties as key => value pairs - } - - if (is_array($data)) { - $result = []; - foreach ($data as $k => $v) { - if (is_string($k)) { - $k = mb_convert_encoding($k, $to_encoding, $from_encoding); - if ($k === false) { - return false; - } - } - if (empty($v) || (is_scalar($v) && !is_string($v))) { - $result[$k] = $v; # $v can be false and that's not an error - } - else { - # recurse - $v = $transcoder($v, $to_encoding, $from_encoding); - if ($v === false) { - return false; - } - $result[$k] = $v; - } - } - return $result; - } - - return $data; # anything except string, object, or array - }; # / $transcoder function - } # / if is_null($transcoder) - - $value = $transcoder($value, 'UTF-8', $input_encoding); + elseif (DefaultExtension\RecursiveTranscoder::is_transcoding_candidate($value)) { + $value = DefaultExtension\RecursiveTranscoder::transcode($value, 'UTF-8', $input_encoding, ['ignore_JsonSerializable_objects' => true]); if ($value === false) { - return $value; # failure; this must not be passed to json_encode!; this is part of what the !empty() check is for at the top of this block + # If transcode() throws an exception on failure, then the interpreter will never arrive here + return false; # failure } - } # / elseif (is_array($value) || is_object($value)) - } # / if input encoding != UTF-8 + } + } return \json_encode($value, $flags); # string|false } diff --git a/src/Extension/DefaultExtension/RecursiveTranscoder.php b/src/Extension/DefaultExtension/RecursiveTranscoder.php new file mode 100644 index 000000000..438a49111 --- /dev/null +++ b/src/Extension/DefaultExtension/RecursiveTranscoder.php @@ -0,0 +1,138 @@ +jsonSerialize(); # mixed + #if (!is_array($data)) { + # $debug && error_log(__METHOD__ . ' return non-array jsonSerialize() on JsonSerializable object'); + # return $data; + #} + } + $debug && error_log(__METHOD__ . ' convert object to array'); + $data = get_object_vars($data); # public properties as key => value pairs + } + + if (!(is_array($data) && $data)) { + $debug && error_log(__METHOD__ . ' return non-array or empty empty'); + return $data; # any empty array or non-array type as a possible result of object conversion above + } + + # $data is a filled array + $must_transcode_keys = empty($options['ignore_keys']); + $debug && error_log(__METHOD__ . " process filled array (must_transcode_keys == $must_transcode_keys)"); + $result = []; # replacement for $data if keys are transcoded too (i.e. $must_transcode_keys) + $this_func = __FUNCTION__; # for recursion + foreach ($data as $k => &$v) { + if ($must_transcode_keys && is_string($k)) { + $converted_k = mb_convert_encoding($k, $to_encoding, $from_encoding); # string|false + $debug && error_log(__METHOD__ . "\tconverted key \"$k\" to \"$converted_k\""); + if ($converted_k === false) { # this means mb_convert_encoding() failed which should've triggered a warning + # One of three things can be done here: + # 1. throw an Exception + # 2. return false, indicating to caller that mb_convert_encoding() failed + # 3. do nothing and use the original key + #return false; + throw Exception("Failed to encode array key \"$k\" from $from_encoding to $to_encoding"); + } + else { + $k = $converted_k; + } + } + if (static::is_transcoding_candidate($v)) { + $debug && error_log(__METHOD__ . "\trecurse for " . gettype($value) . ' value that is a transcoding candidate'); + # recurse + $converted_v = static::$this_func($v, $to_encoding, $from_encoding, $options); + if ($converted_v === false) { # this means that $v is a string and that mb_convert_encoding() failed, which should've triggered a warning + # One of four things can be done here: + # 1. throw an Exception + # 2. return false, indicating to caller that mb_convert_encoding() failed + # 3. do nothing and use the original value + # 4. replace the original value with false + #return false; + throw Exception('Failed to encode array value' . (is_string($v) ? " \"$k\"" : '') . 'of type ' . gettype($v) . " from $from_encoding to $to_encoding"); + } + else { + $v = $converted_v; + if ($must_transcode_keys) { + $result[$k] = $v; + } + } + } + else { + # $v may be false here, and in this case it is not an error (since no transcoding occurred since it's not a transcoding candidate) + if ($must_transcode_keys) { + $result[$k] = $v; + } + } + unset($v); + } + return $must_transcode_keys ? $result : $data; + } + +} diff --git a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php index 46a5ec0fe..c40aae1c0 100644 --- a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php +++ b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeCp1252Test.php @@ -41,11 +41,18 @@ public function testDefaultAsFunction($value, $expected) } public function dataForDefault() { + $json_serializable_object = new class() implements \JsonSerializable { + public function jsonSerialize(): mixed { + return ["Schl\xC3\xBCssel" => "Stra\xC3\x9Fe"]; # UTF-8 ready for json_encode(); to prove that transcoding doesn't attempt to transcode this again + #return ['Schlüssel' => 'Straße']; # alternatively, this can be used, but then this file must always be saved in UTF-8 encoding or else the test will fail. + } + }; return [ ["abc", '"abc"'], [["abc"], '["abc"]'], [["abc",["a"=>2]], '["abc",{"a":2}]'], - [["\x80uro",["Schl\xFCssel"=>"Stra\xDFe"]], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # x80 = � = euro, xFC = � = uuml, xDF = � = szlig + [["\x80uro",["Schl\xFCssel"=>"Stra\xDFe"]], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # x80 = € = euro, xFC = ü = uuml, xDF = ß = szlig + [$json_serializable_object, '{"Schl\u00fcssel":"Stra\u00dfe"}'], ]; } diff --git a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php index 4eb14843f..bfc6cb6b0 100644 --- a/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php +++ b/tests/UnitTests/TemplateSource/TagTests/PluginModifier/PluginModifierJsonEncodeTest.php @@ -39,7 +39,7 @@ public function dataForDefault() { ["abc", '"abc"'], [["abc"], '["abc"]'], [["abc",["a"=>2]], '["abc",{"a":2}]'], - [["€uro",["Schlüssel"=>"Straße"]], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # \u{20ac} = € = euro, \u{00fc} = ü = uuml, xDF = \u{00df} = szlig + [["€uro",["Schlüssel"=>"Straße"]], '["\u20acuro",{"Schl\u00fcssel":"Stra\u00dfe"}]'], # \u{20ac} = € = euro, \u{00fc} = ü = uuml, \u{00df} = ß = szlig ]; } From 0027e977658c9b265697dc2c9ce4efc32d788315 Mon Sep 17 00:00:00 2001 From: Craig Manley <591285+cmanley@users.noreply.github.com> Date: Sun, 2 Jun 2024 12:42:34 +0200 Subject: [PATCH 10/10] Remove debugging --- .../DefaultExtension/RecursiveTranscoder.php | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/src/Extension/DefaultExtension/RecursiveTranscoder.php b/src/Extension/DefaultExtension/RecursiveTranscoder.php index 438a49111..6614c4b3c 100644 --- a/src/Extension/DefaultExtension/RecursiveTranscoder.php +++ b/src/Extension/DefaultExtension/RecursiveTranscoder.php @@ -35,63 +35,45 @@ public static function is_transcoding_candidate($value): bool { * @return mixed */ public static function transcode($data, string $to_encoding, string $from_encoding = null, array $options = null) { - $debug = !empty($options['debug']); - #$debug = true; - $debug && error_log(__METHOD__ . ' entered with $data type ' . gettype($data)); if (!static::is_transcoding_candidate($data)) { - $debug && error_log(__METHOD__ . ' return $data unchanged since it is not a transcoding candidate'); return $data; } if (!$from_encoding) { $from_encoding = \Smarty\Smarty::$_CHARSET; } if (strcasecmp($to_encoding, $from_encoding) == 0) { - $debug && error_log(__METHOD__ . ' return $data unchanged since to_encoding = from_encoding'); return $data; } # most cases: if (is_string($data)) { - $debug && error_log(__METHOD__ . ' return mb_convert_encoding string $data'); return mb_convert_encoding($data, $to_encoding, $from_encoding); # string|false } # convert object to array to be transcoded as array if (is_object($data)) { if (!empty($options['ignore_objects'])) { - $debug && error_log(__METHOD__ . ' return object unchanged since ignore_objects is true'); return $data; } if (is_a($data, \JsonSerializable::class)) { if (!empty($options['ignore_JsonSerializable_objects'])) { - $debug && error_log(__METHOD__ . ' return JsonSerializable object unchanged since ignore_JsonSerializable_objects is true'); return $data; # \JsonSerializable objects should be trusted to serialize themselves into data that can be consumed by json_encode() no matter what the application's default encoding is. } - #$debug && error_log(__METHOD__ . ' call jsonSerialize() on JsonSerializable object'); - #$data = $data->jsonSerialize(); # mixed - #if (!is_array($data)) { - # $debug && error_log(__METHOD__ . ' return non-array jsonSerialize() on JsonSerializable object'); - # return $data; - #} } - $debug && error_log(__METHOD__ . ' convert object to array'); $data = get_object_vars($data); # public properties as key => value pairs } if (!(is_array($data) && $data)) { - $debug && error_log(__METHOD__ . ' return non-array or empty empty'); return $data; # any empty array or non-array type as a possible result of object conversion above } # $data is a filled array $must_transcode_keys = empty($options['ignore_keys']); - $debug && error_log(__METHOD__ . " process filled array (must_transcode_keys == $must_transcode_keys)"); - $result = []; # replacement for $data if keys are transcoded too (i.e. $must_transcode_keys) + $result = $must_transcode_keys ? [] : null; # replacement for $data if keys are transcoded too (i.e. $must_transcode_keys) $this_func = __FUNCTION__; # for recursion foreach ($data as $k => &$v) { if ($must_transcode_keys && is_string($k)) { $converted_k = mb_convert_encoding($k, $to_encoding, $from_encoding); # string|false - $debug && error_log(__METHOD__ . "\tconverted key \"$k\" to \"$converted_k\""); if ($converted_k === false) { # this means mb_convert_encoding() failed which should've triggered a warning # One of three things can be done here: # 1. throw an Exception @@ -105,7 +87,6 @@ public static function transcode($data, string $to_encoding, string $from_encodi } } if (static::is_transcoding_candidate($v)) { - $debug && error_log(__METHOD__ . "\trecurse for " . gettype($value) . ' value that is a transcoding candidate'); # recurse $converted_v = static::$this_func($v, $to_encoding, $from_encoding, $options); if ($converted_v === false) { # this means that $v is a string and that mb_convert_encoding() failed, which should've triggered a warning