diff --git a/Cargo.toml b/Cargo.toml index 4c7c8ec6..02214b45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,9 @@ itertools = "0.13" memmap2 = "0.9" safetensors = "0.4" +[workspace.dependencies.ai00-core] +path = "crates/ai00-core" + [workspace.dependencies.web-rwkv] # path = "../web-rwkv" default-features = false diff --git a/crates/ai00-server/Cargo.toml b/crates/ai00-server/Cargo.toml index 0f490167..4fbb4d61 100644 --- a/crates/ai00-server/Cargo.toml +++ b/crates/ai00-server/Cargo.toml @@ -26,6 +26,9 @@ tokio = { version = "1", features = ["full"] } toml = "0.8.6" zip-extract = "0.1" +[dependencies.ai00-core] +workspace = true + [dependencies.anyhow] workspace = true @@ -61,6 +64,3 @@ features = [ "sse", ] version = "0.67" - -[dependencies.ai00-core] -path = "../ai00-core" diff --git a/crates/ai00-server/src/api/mod.rs b/crates/ai00-server/src/api/mod.rs index 354d6a6a..078f76e3 100644 --- a/crates/ai00-server/src/api/mod.rs +++ b/crates/ai00-server/src/api/mod.rs @@ -32,12 +32,12 @@ pub async fn request_info(sender: Sender, sleep: Duration) -> Run pub async fn request_info_stream( sender: Sender, - info_sender: Sender, + stream: Sender, sleep: Duration, ) { loop { if let Ok(_info) = try_request_info(sender.clone()).await { - if info_sender.send(_info).is_err() { + if stream.send(_info).is_err() { break; } } diff --git a/crates/ai00-server/src/api/oai/chat.rs b/crates/ai00-server/src/api/oai/chat.rs index 45294e0e..eafed885 100644 --- a/crates/ai00-server/src/api/oai/chat.rs +++ b/crates/ai00-server/src/api/oai/chat.rs @@ -184,7 +184,7 @@ struct PartialChatResponse { async fn respond_one(depot: &mut Depot, request: ChatRequest, res: &mut Response) { let ThreadState { sender, .. } = depot.obtain::().unwrap(); - let info = request_info(sender.clone(), Duration::from_secs(1)).await; + let info = request_info(sender.clone(), Duration::from_millis(500)).await; let model_name = info.reload.model_path.to_string_lossy().into_owned(); let (token_sender, token_receiver) = flume::unbounded(); @@ -233,7 +233,7 @@ async fn respond_one(depot: &mut Depot, request: ChatRequest, res: &mut Response async fn respond_stream(depot: &mut Depot, request: ChatRequest, res: &mut Response) { let ThreadState { sender, .. } = depot.obtain::().unwrap(); - let info = request_info(sender.clone(), Duration::from_secs(1)).await; + let info = request_info(sender.clone(), Duration::from_millis(500)).await; let model_name = info.reload.model_path.to_string_lossy().into_owned(); let (token_sender, token_receiver) = flume::unbounded(); diff --git a/crates/ai00-server/src/api/oai/completion.rs b/crates/ai00-server/src/api/oai/completion.rs index 1359796b..140eed6f 100644 --- a/crates/ai00-server/src/api/oai/completion.rs +++ b/crates/ai00-server/src/api/oai/completion.rs @@ -131,7 +131,7 @@ pub struct PartialCompletionResponse { async fn respond_one(depot: &mut Depot, request: CompletionRequest, res: &mut Response) { let ThreadState { sender, .. } = depot.obtain::().unwrap(); - let info = request_info(sender.clone(), Duration::from_secs(1)).await; + let info = request_info(sender.clone(), Duration::from_millis(500)).await; let model_name = info.reload.model_path.to_string_lossy().into_owned(); let (token_sender, token_receiver) = flume::unbounded(); @@ -177,7 +177,7 @@ async fn respond_one(depot: &mut Depot, request: CompletionRequest, res: &mut Re async fn respond_stream(depot: &mut Depot, request: CompletionRequest, res: &mut Response) { let ThreadState { sender, .. } = depot.obtain::().unwrap(); - let info = request_info(sender.clone(), Duration::from_secs(1)).await; + let info = request_info(sender.clone(), Duration::from_millis(500)).await; let model_name = info.reload.model_path.to_string_lossy().into_owned(); let (token_sender, token_receiver) = flume::unbounded(); diff --git a/crates/ai00-server/src/api/oai/embedding.rs b/crates/ai00-server/src/api/oai/embedding.rs index b0d07fb4..d4dbd744 100644 --- a/crates/ai00-server/src/api/oai/embedding.rs +++ b/crates/ai00-server/src/api/oai/embedding.rs @@ -57,7 +57,7 @@ pub async fn embeddings( ) -> Json { let request = req.to_owned(); // req.parse_json::().await.unwrap(); let ThreadState { sender, .. } = depot.obtain::().unwrap(); - let info = request_info(sender.clone(), Duration::from_secs(1)).await; + let info = request_info(sender.clone(), Duration::from_millis(500)).await; let model_name = info.reload.model_path.to_string_lossy().into_owned(); let (token_sender, token_receiver) = flume::unbounded(); diff --git a/crates/ai00-server/src/api/oai/info.rs b/crates/ai00-server/src/api/oai/info.rs index 9ebd88ac..0a8d7bcd 100644 --- a/crates/ai00-server/src/api/oai/info.rs +++ b/crates/ai00-server/src/api/oai/info.rs @@ -23,7 +23,7 @@ pub struct ModelResponse { #[endpoint] pub async fn models(depot: &mut Depot) -> Json { let ThreadState { sender, .. } = depot.obtain::().unwrap(); - let info = request_info(sender.to_owned(), Duration::from_secs(1)).await; + let info = request_info(sender.to_owned(), Duration::from_millis(500)).await; let model_name = info .reload .model_path