From 51fcfc3301791771495b909723db2fe7f2be9130 Mon Sep 17 00:00:00 2001 From: "Chan, Danny" Date: Mon, 21 Aug 2023 12:37:00 +0000 Subject: [PATCH] add test --- tests/EncodingTest.php | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/EncodingTest.php b/tests/EncodingTest.php index 73d9db8..ed0e280 100644 --- a/tests/EncodingTest.php +++ b/tests/EncodingTest.php @@ -6,6 +6,7 @@ use Danny50610\BpeTokeniser\EncodingFactory; use InvalidArgumentException; use PHPUnit\Framework\TestCase; +use ValueError; class EncodingTest extends TestCase { @@ -164,4 +165,23 @@ public function testEncodeOrdinaryWithDisallowedSpecial() $this->assertSame([9468, 104, 94, 9468, 235, 96, 14167, 237, 88435], $tokens1); $this->assertSame($tokens1, $tokens2); } + + public function testEncodeSpecialCheck() + { + $this->expectException(ValueError::class); + $this->expectExceptionMessage('Encountered text corresponding to disallowed special token'); + + $enc = EncodingFactory::createByEncodingName('cl100k_base'); + + $enc->encode('<|endoftext|>'); + } + + public function testEncodeSpecialWithAllow() + { + $enc = EncodingFactory::createByEncodingName('cl100k_base'); + + $tokens = $enc->encode('<|endoftext|>Hello<|fim_prefix|>', ['<|fim_prefix|>'], []); + + $this->assertSame([27, 91, 8862, 728, 428, 91, 29, 9906, 100258], $tokens); + } }