-
Notifications
You must be signed in to change notification settings - Fork 94
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix for issue #33: excessive usage of numeric charset entities when c…
…onverting utf8 to latin-1
- Loading branch information
Showing
4 changed files
with
114 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
<?php | ||
/** | ||
* @author JoakimLofgren | ||
*/ | ||
|
||
use PhpXmlRpc\Helper\Charset; | ||
|
||
/** | ||
* Test conversion between encodings | ||
* | ||
* For Windows if you want to test the output use Consolas font | ||
* and run the following in cmd: | ||
* chcp 28591 (latin1) | ||
* chcp 65001 (utf8) | ||
*/ | ||
class CharsetTest extends PHPUnit_Framework_TestCase | ||
{ | ||
// Consolas font should render these properly | ||
protected $runes = "ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ"; | ||
protected $greek = "Τὴ γλῶσσα μοῦ ἔδωσαν ἑλληνικὴ"; | ||
protected $russian = "Река неслася; бедный чёлн"; | ||
protected $chinese = "我能吞下玻璃而不伤身体。"; | ||
protected $latinString; | ||
|
||
protected function setUp() | ||
{ | ||
// construct a latin string with all chars (except control ones) | ||
$this->latinString = "\n\r\t"; | ||
for($i = 32; $i < 127; $i++) { | ||
$this->latinString .= chr($i); | ||
} | ||
for($i = 160; $i < 256; $i++) { | ||
$this->latinString .= chr($i); | ||
} | ||
} | ||
|
||
protected function utfToLatin($data) | ||
{ | ||
return Charset::instance()->encodeEntities( | ||
$data, | ||
'UTF-8', | ||
'ISO-8859-1' | ||
); | ||
} | ||
|
||
public function testUtf8ToLatin1All() | ||
{ | ||
/*$this->assertEquals( | ||
'ISO-8859-1', | ||
mb_detect_encoding($this->latinString, 'ISO-8859-1, UTF-8, WINDOWS-1251, ASCII', true), | ||
'Setup latinString is not ISO-8859-1 encoded...' | ||
);*/ | ||
$string = utf8_encode($this->latinString); | ||
$encoded = $this->utfToLatin($string); | ||
$this->assertEquals(str_replace(array('&', '"', "'", '<', '>'), array('&', '"', ''', '<', '>'), $this->latinString), $encoded); | ||
} | ||
|
||
public function testUtf8ToLatin1EuroSymbol() | ||
{ | ||
$string = 'a.b.c.å.ä.ö.€.'; | ||
$encoded = $this->utfToLatin($string); | ||
$this->assertEquals(utf8_decode('a.b.c.å.ä.ö.€.'), $encoded); | ||
} | ||
|
||
public function testUtf8ToLatin1Runes() | ||
{ | ||
$string = $this->runes; | ||
$encoded = $this->utfToLatin($string); | ||
$this->assertEquals('ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ', $encoded); | ||
} | ||
|
||
public function testUtf8ToLatin1Greek() | ||
{ | ||
$string = $this->greek; | ||
$encoded = $this->utfToLatin($string); | ||
$this->assertEquals('Τὴ γλῶσσα μοῦ ἔδωσαν ἑλληνικὴ', $encoded); | ||
} | ||
|
||
public function testUtf8ToLatin1Russian() | ||
{ | ||
$string = $this->russian; | ||
$encoded = $this->utfToLatin($string); | ||
$this->assertEquals('Река неслася; бедный чёлн', $encoded); | ||
} | ||
|
||
public function testUtf8ToLatin1Chinese() | ||
{ | ||
$string = $this->chinese; | ||
$encoded = $this->utfToLatin($string); | ||
$this->assertEquals('我能吞下玻璃而不伤身体。', $encoded); | ||
} | ||
} |