Skip to content

Commit

Permalink
Return elapsed time in response.
Browse files Browse the repository at this point in the history
  • Loading branch information
cryscan committed May 14, 2024
1 parent 9d4398e commit c870f69
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 15 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ zip-extract = "0.1"
# path = "../web-rwkv"
default-features = false
features = ["native"]
version = "0.8.8"
version = "0.8.9"

[dependencies.salvo]
default-features = true
Expand Down
2 changes: 2 additions & 0 deletions src/middleware.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ pub struct TokenCounter {
pub prompt_tokens: usize,
pub completion_tokens: usize,
pub total_tokens: usize,
pub duration: Duration,
}

#[derive(Clone)]
Expand Down Expand Up @@ -645,6 +646,7 @@ pub async fn model_route(receiver: Receiver<ThreadRequest>) -> Result<()> {
buffer: Default::default(),
model_tokens: Default::default(),
bnf_sampler: None,
instant: None,
request,
sender: token_sender,
};
Expand Down
29 changes: 17 additions & 12 deletions src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ pub struct GenerateContext {
pub model_tokens: Vec<u16>,
/// Compiled BNF schema, if any.
pub bnf_sampler: Option<Arc<RwLock<BnfSampler>>>,
/// For measuring time used.
pub instant: Option<Instant>,
/// Generate request provided by the caller.
pub request: GenerateRequest,
/// To send back generated tokens.
Expand Down Expand Up @@ -809,6 +811,7 @@ impl Runtime {
continue;
};

let instant = context.instant.get_or_insert(Instant::now());
let prefix = std::mem::take(&mut context.prefix);
let suffix = std::mem::take(&mut context.suffix);
let model_tokens = [prefix.0, suffix.0].concat();
Expand Down Expand Up @@ -853,20 +856,22 @@ impl Runtime {
context.buffer.append(&mut word);
context.model_tokens.push(token);

let count_tokens = || {
let prompt_tokens = context.prompt_tokens.len();
let completion_tokens = context.model_tokens.len();
let total_tokens = prompt_tokens + completion_tokens;
TokenCounter {
prompt_tokens,
completion_tokens,
total_tokens,
}
};

let mut done = false;
let mut finish = |reason| {
let _ = context.sender.send(Token::Stop(reason, count_tokens()));
let counter = {
let prompt_tokens = context.prompt_tokens.len();
let completion_tokens = context.model_tokens.len();
let total_tokens = prompt_tokens + completion_tokens;
let duration = instant.elapsed();
TokenCounter {
prompt_tokens,
completion_tokens,
total_tokens,
duration,
}
};

let _ = context.sender.send(Token::Stop(reason, counter));
let _ = context.sender.send(Token::Done);
done = true;
};
Expand Down

0 comments on commit c870f69

Please sign in to comment.