From ed6f62271ec769989f40c7329732d163beaabb6b Mon Sep 17 00:00:00 2001 From: "Chan, Danny" Date: Sun, 20 Aug 2023 06:36:23 +0000 Subject: [PATCH] add register --- README.md | 4 +++- src/EncodingFactory.php | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 87515d2..91923bc 100644 --- a/README.md +++ b/README.md @@ -33,4 +33,6 @@ $enc = EncodingFactory::createByModelName('gpt-3.5-turbo'); var_dump($enc->decode($enc->encode("hello world"))); // output: string(11) "hello world" -``` \ No newline at end of file +``` + +For available encodings, see `src/EncodingFactory.php` diff --git a/src/EncodingFactory.php b/src/EncodingFactory.php index 2b8c1ce..af267c6 100644 --- a/src/EncodingFactory.php +++ b/src/EncodingFactory.php @@ -2,6 +2,7 @@ namespace Danny50610\BpeTokeniser; +use Closure; use Exception; use SplFileObject; @@ -63,10 +64,39 @@ class EncodingFactory "gpt-35-turbo" => "cl100k_base", # Azure deployment name ]; - protected static $encodingInstance = []; - protected static $encodingConstructors = null; + public static function registerModelToEncoding(string $modelName, string $encodingName) + { + if (array_key_exists($modelName, self::$modelToEncoding)) { + throw new Exception("{$modelName} already exists"); + } + + self::$modelToEncoding[$modelName] = $encodingName; + } + + public static function registerModelPrefixToEncoding(string $modelPrefix, string $encodingName) + { + if (array_key_exists($modelPrefix, self::$modelPrefixToEncoding)) { + throw new Exception("{$modelPrefix} already exists"); + } + + self::$modelPrefixToEncoding[$modelPrefix] = $encodingName; + } + + public static function registerEncoding(string $encodingName, Closure $constructor) + { + static::initConstructor(); + + if (array_key_exists($encodingName, self::$encodingConstructors)) { + throw new Exception("{$encodingName} already exists"); + } + + self::$encodingConstructors[$encodingName] = $constructor; + } + + protected static $encodingInstance = []; + public static function createByModelName(string $modelName): Encoding { $encodingName = null;