Add adepter selection argument.

Ai00-X · Aug 2, 2023 · 3120154 · 3120154
1 parent efdf98a
commit 3120154
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -96,7 +96,16 @@ QQ Group for communication: 30920262
 *   `--model`: Model path
 *   `--tokenizer`: Tokenizer path
 *   `--port`: Running port
-*   `--quant`: Specify number of layers to quantize
+*   `--quant`: Specify the number of quantization layers
+*   `--adapter`: Adapter (GPU and backend) selection options
+
+### Example
+
+The server listens on port 3000, loads the full-layer quantized (32 > 24) 0.4B model, and selects adapter 0 (to get the specific adapter number, you can first not add this parameter, and the program will enter the adapter selection page).
+
+```bash
+$ cargo run --release -- --model assets/models/RWKV-4-World-0.4B-v1-20230529-ctx4096.st --port 3000 --quant 32 --adapter 0
+```
 
 ## 📙Currently Available APIs
 

diff --git a/README_jp.md b/README_jp.md
@@ -92,9 +92,18 @@ OpenAIのChatGPT APIインターフェースと互換性があります。
 ## 📝サポートされている起動パラメーター
 
 *   `--model`: モデルのパス
-*   `--tokenizer`: トークナイザーの
+*   `--tokenizer`: トークナイザーのパス
 *   `--port`: 実行ポート
-*   `--quant`: クオンタイズするレイヤーの数を指定します
+*   `--quant`: 量子化レイヤーの数を指定
+*   `--adapter`: アダプター（GPUおよびバックエンド）の選択オプション
+
+### 例
+
+サーバーはポート3000でリッスンし、全レイヤー量子化（32 > 24）の0.4Bモデルをロードし、アダプター0を選択します（特定のアダプター番号を取得するには、最初にこのパラメーターを追加せず、プログラムがアダプター選択ページに入るまで待ちます）。
+
+```bash
+$ cargo run --release -- --model assets/models/RWKV-4-World-0.4B-v1-20230529-ctx4096.st --port 3000 --quant 32 --adapter 0
+```
 
 ## 📙現在利用可能なAPI
 

diff --git a/README_zh.md b/README_zh.md
@@ -104,6 +104,14 @@
 - `--tokenizer`: 词表路径
 - `--port`: 运行端口
 - `--quant`: 指定量化层数
+- `--adapter`: 适配器（GPU和后端）选择项
+
+### 示例
+
+服务器监听3000端口，加载全部层量化（32 > 24）的0.4B模型，选择0号适配器（要查看具体适配器编号可以先不加该参数，程序会先进入选择页面）。
+```bash
+$ cargo run --release -- --model assets/models/RWKV-4-World-0.4B-v1-20230529-ctx4096.st --port 3000 --quant 32 --adapter 0
+```
 
 
 ## 📙目前可用的API

diff --git a/src/main.rs b/src/main.rs
@@ -119,14 +119,17 @@ pub struct ReloadRequest {
     pub quantized_layers: Vec<usize>,
 }
 
-async fn create_environment() -> Result<Environment> {
+async fn create_environment(selection: Option<usize>) -> Result<Environment> {
     let instance = Instance::new();
     let adapters = instance.adapters();
-    let selection = Select::with_theme(&ColorfulTheme::default())
-        .with_prompt("Please select an adapter")
-        .default(0)
-        .items(&adapters)
-        .interact()?;
+    let selection = match selection {
+        Some(selection) => selection,
+        None => Select::with_theme(&ColorfulTheme::default())
+            .with_prompt("Please select an adapter")
+            .default(0)
+            .items(&adapters)
+            .interact()?,
+    };
 
     let adapter = instance.select_adapter(selection)?;
     let env = Environment::new(adapter).await?;
@@ -347,6 +350,8 @@ fn model_task(model: Model, tokenizer: Tokenizer, receiver: Receiver<ThreadReque
 #[derive(Parser, Debug, Clone)]
 #[command(author, version, about, long_about = None)]
 struct Args {
+    #[arg(long, short)]
+    adepter: Option<usize>,
     #[arg(long, short, value_name = "FILE")]
     model: Option<String>,
     #[arg(long, short, value_name = "FILE")]
@@ -385,7 +390,7 @@ async fn main() -> Result<()> {
     );
 
     let (sender, receiver) = flume::unbounded::<ThreadRequest>();
-    let env = create_environment().await?;
+    let env = create_environment(args.adepter).await?;
     let tokenizer = load_tokenizer(&tokenizer_path)?;
 
     log::info!("{:#?}", env.adapter.get_info());