Skip to content

Commit

Permalink
add test
Browse files Browse the repository at this point in the history
  • Loading branch information
danny50610 committed Aug 21, 2023
1 parent a53038a commit 51fcfc3
Showing 1 changed file with 20 additions and 0 deletions.
20 changes: 20 additions & 0 deletions tests/EncodingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use Danny50610\BpeTokeniser\EncodingFactory;
use InvalidArgumentException;
use PHPUnit\Framework\TestCase;
use ValueError;

class EncodingTest extends TestCase
{
Expand Down Expand Up @@ -164,4 +165,23 @@ public function testEncodeOrdinaryWithDisallowedSpecial()
$this->assertSame([9468, 104, 94, 9468, 235, 96, 14167, 237, 88435], $tokens1);
$this->assertSame($tokens1, $tokens2);
}

public function testEncodeSpecialCheck()
{
$this->expectException(ValueError::class);
$this->expectExceptionMessage('Encountered text corresponding to disallowed special token');

$enc = EncodingFactory::createByEncodingName('cl100k_base');

$enc->encode('<|endoftext|>');
}

public function testEncodeSpecialWithAllow()
{
$enc = EncodingFactory::createByEncodingName('cl100k_base');

$tokens = $enc->encode('<|endoftext|>Hello<|fim_prefix|>', ['<|fim_prefix|>'], []);

$this->assertSame([27, 91, 8862, 728, 428, 91, 29, 9906, 100258], $tokens);
}
}

0 comments on commit 51fcfc3

Please sign in to comment.