From 222a131cf9679cf933327904e06f1e0debc6e207 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Tue, 23 Apr 2024 17:39:31 +0100 Subject: [PATCH] update the model from gpt-4-1106-preview to gpt-4-turbo --- server/bleep/src/agent/model.rs | 2 +- server/bleep/src/llm/call.rs | 2 +- server/bleep/src/webserver/studio.rs | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/server/bleep/src/agent/model.rs b/server/bleep/src/agent/model.rs index 917941e931..3ac0c249b7 100644 --- a/server/bleep/src/agent/model.rs +++ b/server/bleep/src/agent/model.rs @@ -41,7 +41,7 @@ const HEADROOM_CORRECTION: usize = GPT_4_TURBO_MAX_TOKENS - ACTUAL_MAX_TOKENS; // PS: when we want to fully utilize the model max context window, the correction is 0 pub const GPT_4_TURBO_24K: LLMModel = LLMModel { tokenizer: "gpt-4-1106-preview", - model_name: "gpt-4-1106-preview", + model_name: "gpt-4-turbo", answer_headroom: 1024 + HEADROOM_CORRECTION, prompt_headroom: 2500 + HEADROOM_CORRECTION, history_headroom: 2048 + HEADROOM_CORRECTION, diff --git a/server/bleep/src/llm/call.rs b/server/bleep/src/llm/call.rs index 02231ddef3..56aa658493 100644 --- a/server/bleep/src/llm/call.rs +++ b/server/bleep/src/llm/call.rs @@ -82,7 +82,7 @@ pub async fn llm_call( ) -> anyhow::Result>> { let model = match req.model.as_deref() { Some(model) => model.to_owned(), - None => "gpt-4-turbo-preview".into(), + None => "gpt-4-turbo".into(), }; let builder = { diff --git a/server/bleep/src/webserver/studio.rs b/server/bleep/src/webserver/studio.rs index 82a7662582..d74e56c926 100644 --- a/server/bleep/src/webserver/studio.rs +++ b/server/bleep/src/webserver/studio.rs @@ -31,7 +31,7 @@ use crate::{ mod diff; -const LLM_GATEWAY_MODEL: &str = "gpt-4-1106-preview"; +const LLM_GATEWAY_MODEL: &str = "gpt-4-turbo"; fn studio_not_found() -> Error { Error::not_found("unknown code studio ID") @@ -491,7 +491,7 @@ async fn token_counts( }) .collect::>(); - let core_bpe = tiktoken_rs::get_bpe_from_model("gpt-4-1106-preview").unwrap(); + let core_bpe = tiktoken_rs::get_bpe_from_model("gpt-4-turbo").unwrap(); let per_doc_file = stream::iter(doc_context) .map(|file| async { if file.hidden { @@ -652,14 +652,14 @@ pub async fn get_doc_file_token_count( .map(|sr| sr.text) .collect::(); - let core_bpe = tiktoken_rs::get_bpe_from_model("gpt-4-1106-preview").unwrap(); + let core_bpe = tiktoken_rs::get_bpe_from_model("gpt-4-turbo").unwrap(); let token_count = core_bpe.encode_ordinary(&content).len(); Ok(Json(token_count)) } fn count_tokens_for_file(path: &str, body: &str, ranges: &[Range]) -> usize { - let core_bpe = tiktoken_rs::get_bpe_from_model("gpt-4-1106-preview").unwrap(); + let core_bpe = tiktoken_rs::get_bpe_from_model("gpt-4-turbo").unwrap(); let mut chunks = Vec::new();