File tree Expand file tree Collapse file tree 6 files changed +446
-168
lines changed
Expand file tree Collapse file tree 6 files changed +446
-168
lines changed Original file line number Diff line number Diff line change @@ -309,6 +309,8 @@ def run(cls):
309309 parser .add_argument ("--limit_worker_concurrency" , type = int , default = 1024 )
310310 # port
311311 parser .add_argument ("--port" , type = int , default = None )
312+ # model_type
313+ parser .add_argument ("--model_type" , type = str , default = "auto" )
312314 args = parser .parse_args ()
313315 os .environ ["num_gpus" ] = str (args .num_gpus )
314316 if args .backend == "vllm" :
@@ -331,6 +333,7 @@ def run(cls):
331333 if args .punc_model :
332334 os .environ ["punc_model" ] = args .punc_model
333335
336+ os .environ ["model_type" ] = args .model_type
334337 os .environ ["enable_prefix_caching" ] = args .enable_prefix_caching
335338 os .environ ["gpu_memory_utilization" ] = args .gpu_memory_utilization
336339 os .environ ["kv_cache_quant_policy" ] = args .kv_cache_quant_policy
Original file line number Diff line number Diff line change 2323 start_api_server ,
2424 start_model_worker ,
2525 delete_log ,
26+ pre_processing ,
2627)
2728
2829
29- def delete_flash_attn ():
30- "删除 flash_attn,避免报错"
31- import shutil
32- import os
33- from pathlib import Path
34- from loguru import logger
35-
36- root_path = Path (__file__ ).parent .parent .parent
37- flash_attn_path = root_path .joinpath (
38- ".venv/lib/python3.11/site-packages/flash_attn"
39- )
40-
41- try :
42- # 检查路径是否存在
43- if os .path .exists (flash_attn_path ):
44- # 删除整个目录树
45- shutil .rmtree (flash_attn_path )
46- logger .info (f"成功删除: { flash_attn_path } " )
47-
48- except PermissionError :
49- logger .error ("权限不足,无法删除 flash_attn" )
50- except Exception as e :
51- logger .error (f"删除 flash_attn 失败: { e } " )
52-
53-
54- # 删除日志
55- delete_log ()
56-
57- delete_flash_attn ()
30+ pre_processing ()
5831
5932config_path = os .path .join (root_dir , "gpt_server/script/config.yaml" )
6033env = os .getenv ("ENV" )
Original file line number Diff line number Diff line change 1818STATIC_DIR = root_dir / "static"
1919
2020
21+ def clear_flashinfer_cache ():
22+ os .system ("flashinfer clear-cache" )
23+
24+
25+ def delete_flash_attn ():
26+ "删除 flash_attn,避免报错"
27+ import shutil
28+ import os
29+ from pathlib import Path
30+ from loguru import logger
31+
32+ root_path = Path (__file__ ).parent .parent
33+ flash_attn_path = root_path .joinpath (
34+ ".venv/lib/python3.11/site-packages/flash_attn"
35+ )
36+
37+ try :
38+ # 检查路径是否存在
39+ if os .path .exists (flash_attn_path ):
40+ # 删除整个目录树
41+ shutil .rmtree (flash_attn_path )
42+ logger .info (f"成功删除: { flash_attn_path } " )
43+
44+ except PermissionError :
45+ logger .error ("权限不足,无法删除 flash_attn" )
46+ except Exception as e :
47+ logger .error (f"删除 flash_attn 失败: { e } " )
48+
49+
50+ def pre_processing ():
51+ "前置处理"
52+ # 删除日志
53+ delete_log ()
54+ # 删除 垃圾flash attn
55+ delete_flash_attn ()
56+ # 清理 flashinfer 缓存
57+ clear_flashinfer_cache ()
58+
59+
2160def kill_child_processes (parent_pid , including_parent = False ):
2261 "杀死子进程/僵尸进程"
2362 try :
@@ -263,6 +302,7 @@ def start_model_worker(config: dict):
263302 + f" --log_level { log_level } " # 日志水平
264303 + f" --task_type { task_type } " # 日志水平
265304 + f" --limit_worker_concurrency { limit_worker_concurrency } " # 限制worker并发数
305+ + f" --model_type { model_type } " # 默认类型
266306 )
267307 # 处理为 None的情况
268308 if port :
Original file line number Diff line number Diff line change 11[project ]
22name = " gpt_server"
3- version = " 0.6.4 "
3+ version = " 0.6.5 "
44description = " gpt_server是一个用于生产级部署LLMs、Embedding、Reranker、ASR和TTS的开源框架。"
55readme = " README.md"
66license = { text = " Apache 2.0" }
@@ -16,16 +16,16 @@ dependencies = [
1616 " loguru>=0.7.2" ,
1717 " openai==1.99.1" ,
1818 " setuptools==75.2.0" ,
19- " streamlit==1.39 .0" ,
19+ " streamlit>=1.50 .0" ,
2020 " torch==2.8.0" ,
2121 " torchvision==0.23.0" ,
22- " vllm==0.10.2 " ,
22+ " vllm==0.11.0 " ,
2323 " qwen_vl_utils" ,
2424 " evalscope[perf,rag]==0.16.1" ,
2525 " modelscope==1.26.0" ,
2626 " edge-tts>=7.0.0" ,
2727 " funasr>=1.2.6" ,
28- " sglang[all]>=0.5.2 " ,
28+ " sglang[all]>=0.5.3.post1 " ,
2929 " flashinfer-python" ,
3030 " flashtts>=0.1.7" ,
3131 " diffusers>=0.35.1" ,
@@ -35,9 +35,10 @@ dependencies = [
3535
3636[tool .uv ]
3737default-groups = [] # 默认只安装dependencies中的库
38+ prerelease = " allow"
3839override-dependencies = [
3940 " setuptools==77.0.3" ,
40- " transformers==4.56.1 " , # infinity-emb
41+ " transformers==4.57.0 " , # infinity-emb
4142 " soundfile==0.13.1" , # infinity
4243 " xgrammar==0.1.24" , # sglang[all]==0.4.5 depends on xgrammar==0.1.17
4344 " outlines-core==0.2.11" , # sglang 和 vllm 的冲突
You can’t perform that action at this time.
0 commit comments