diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..01afcfc --- /dev/null +++ b/Dockerfile @@ -0,0 +1,64 @@ +FROM swr.cn-south-1.myhuaweicloud.com/ascendhub/ascend-infer-310b:24.0.RC1-dev-arm + +# 修改时区 +# ENV TZ=Asia/Shanghai +# RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +# python换源 +RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + +# 安装CANN依赖的python模块 +RUN pip install "numpy==1.26.3" jinja2 "attrs==23.2.0" \ + "decorator==5.1.1" psutil cloudpickle "scipy==1.12.0" \ + "tornado==6.4" synr==0.5.0 absl-py sympy ml-dtypes \ + scipy tornado --no-cache-dir + +# 安装torch 2.1.0 +RUN pip install torch==2.1.0 --no-cache-dir + +# 创建一个qwen_ascend_llm目录,以及output目录 +RUN mkdir /home/AscendWork/qwen_ascend_llm/ && \ + mkdir /home/AscendWork/qwen_ascend_llm/output +WORKDIR /home/AscendWork/qwen_ascend_llm/ + + + +# 安装torch_npu(本容器是3.9,需要下载3.9的torch_npu2.1.0) +RUN curl -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" -L https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.1.0/torch_npu-2.1.0.post6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl \ + -o torch_npu-2.1.0.post6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl +RUN pip install ./torch_npu-2.1.0.post6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl --no-cache-dir + +# 拷贝python依赖文件 +COPY ./requirements.txt . + +# 安装本项目需要的python模块 +RUN pip install -r requirements.txt --no-cache-dir + +# 拷贝模型文件 +COPY download ./download +COPY output/model ./output/model + +# 拷贝代码路径 +COPY client ./client +COPY export ./export +COPY image ./image +COPY utils ./utils +COPY ./api.py . +COPY ./cli_chat.py . +COPY ./config.py . +COPY ./README.md . + +# 清理下载的torch_npu文件 +RUN rm ./*.whl + +# 暴露默认的8000端口用于api +EXPOSE 8000 + +# 切换root账号改变文件权限(以防万一) +USER root +RUN chown -R HwHiAiUser:HwHiAiUser ./* + +USER HwHiAiUser + +# 启动程序, 默认启动api +CMD ["bash", "-c", "/home/AscendWork/run.sh && python3 api.py"] \ No newline at end of file diff --git a/Dockerfile_dev b/Dockerfile_dev new file mode 100644 index 0000000..f71d219 --- /dev/null +++ b/Dockerfile_dev @@ -0,0 +1,50 @@ +FROM swr.cn-south-1.myhuaweicloud.com/ascendhub/ascend-infer-310b:24.0.RC1-dev-arm + +# 修改时区 +# ENV TZ=Asia/Shanghai +# RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +# python换源 +RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + +# 安装CANN依赖的python模块 +RUN pip install "numpy==1.26.3" jinja2 "attrs==23.2.0" \ + "decorator==5.1.1" psutil cloudpickle "scipy==1.12.0" \ + "tornado==6.4" synr==0.5.0 absl-py sympy ml-dtypes \ + scipy tornado --no-cache-dir + +# 安装torch 2.1.0 +RUN pip install torch==2.1.0 --no-cache-dir + +# 创建一个qwen_ascend_llm目录,以及output目录 +RUN mkdir /home/AscendWork/qwen_ascend_llm/ && \ + mkdir /home/AscendWork/qwen_ascend_llm/output +WORKDIR /home/AscendWork/qwen_ascend_llm/ + + + +# 安装torch_npu(本容器是3.9,需要下载3.9的torch_npu2.1.0) +RUN curl -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" -L https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.1.0/torch_npu-2.1.0.post6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl \ + -o torch_npu-2.1.0.post6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl +RUN pip install ./torch_npu-2.1.0.post6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl --no-cache-dir + +# 拷贝python依赖文件 +COPY ./requirements.txt . + +# 安装本项目需要的python模块 +RUN pip install -r requirements.txt --no-cache-dir + +# 清理下载的torch_npu文件 +RUN rm ./*.whl + +# 暴露默认的8000端口用于api +EXPOSE 8000 + +# 切换root账号改变文件权限(以防万一) +USER root +RUN chown -R HwHiAiUser:HwHiAiUser ./* + +USER HwHiAiUser + +# 启动程序, 默认启动api +CMD ["bash", "-c", "/home/AscendWork/run.sh && /bin/bash"] \ No newline at end of file diff --git a/README.md b/README.md index 78b3479..55c6b1e 100644 --- a/README.md +++ b/README.md @@ -13,11 +13,42 @@ 2. 下载qwen1.5/qwen2的模型,选择chat模型或者instruct模型,将其放到download文件夹,仅支持huggingface下载的模型,网络不好的可以用镜像站:https://hf-mirror.com/Qwen -### 快速运行 -- 暂无 +### Docker运行相关 +- (可选)构建部署用的docker。需要先参考[该教程](https://www.hiascend.com/forum/thread-0286157793000580492-1-1.html)登录并拉取镜像(建议跑通下面的所有步骤,得到.om文件后再编译docker)。 + ```bash + docker build . -t qwen_ascend_llm + ``` + +- (可选)构建开发用的docker。如果你想用docker来编译运行自定义芯片和自定义模型,可以运行下面的命令来构建镜像。同样的,需要先参考[该教程](https://www.hiascend.com/forum/thread-0286157793000580492-1-1.html)登录并拉取镜像 + ```bash + docker build -f Dockerfile_dev . -t qwen_ascend_llm_dev + ``` + +- 拉取编译好的镜像(仅适配昇腾310B1,例如香橙派AIPro 20T版)镜像内置了一个Qwen2-1.5B-Instruct模型以及对应的.om文件。 + ```bash + docker pull qwen_ascend_llm registry.cn-guangzhou.aliyuncs.com/tlntin/qwen_ascend_llm:v0.0.1_310B_arm64 + docker tag qwen_ascend_llm registry.cn-guangzhou.aliyuncs.com/tlntin/qwen_ascend_llm:v0.0.1_310B_arm64 qwen_ascend_llm + ``` + +- 启动部署用的容器(如果是开发用的容器,可以参考该脚本稍微修改,比如最底下的`python api.py`命令可以换成`sleep 8640000`让100天内不会关闭,然后加上-v 参数挂载一下download/output目录)。 + ```bash + ./run_container.sh + ``` + +- 查看容器日志,出现`INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)`则代表启动成功。 + ```bash + docker logs qwen_ascend_llm + ``` + +- 调用容器提供的api接口。进入本项目的client目录,可以运行里面的文件请求服务端。 + ```bash + # openai_stream_client.py 流式请求,类似打字机效果 + # openai_normal_client.py 非流式请求,需要等模型推理完再返回 + # openai_function_call.py 测试function_call,该功能启用时建议增加max_input_length和kv_cache_length的长度。 + ``` -### 分步骤运行 +### 详细运行步骤 ##### 步骤1:编译模型(以Qwen2-1.5B-Instruct)为例。 1. 除了上面说的CANN环境安装外,还需额外安装一些python模块。 ```bash @@ -58,7 +89,7 @@ ``` -##### 步骤2:运行模型 +##### 步骤2:在终端运行模型进行对话 - 使用下面的命令直接运行模型,`--max_prefill_length`需要和上面编译的时候使用的数值相同。 ```bash python3 ./cli_chat.py \ @@ -101,4 +132,4 @@ - [x] 兼容OpenAI的api搭建 - [x] 支持functional call - [ ] 支持模型量化,如weight only, smooth quant等 -- [ ] 支持Docker快速部署 \ No newline at end of file +- [x] 支持Docker快速部署 \ No newline at end of file diff --git a/run_container.sh b/run_container.sh new file mode 100755 index 0000000..b4f3245 --- /dev/null +++ b/run_container.sh @@ -0,0 +1,35 @@ +docker run -d \ + -p 8000:8000 \ + --device=/dev/upgrade:/dev/upgrade \ + --device=/dev/davinci0:/dev/davinci0 \ + --device=/dev/davinci_manager \ + --device=/dev/vdec:/dev/vdec \ + --device=/dev/vpc:/dev/vpc \ + --device=/dev/pngd:/dev/pngd \ + --device=/dev/venc:/dev/venc \ + --device=/dev/sys:/dev/sys \ + --device=/dev/svm0 \ + --device=/dev/ts_aisle:/dev/ts_aisle \ + --device=/dev/dvpp_cmdlist:/dev/dvpp_cmdlist \ + -v /etc/sys_version.conf:/etc/sys_version.conf:ro \ + -v /etc/hdcBasic.cfg:/etc/hdcBasic.cfg:ro \ + -v /usr/lib64/libaicpu_processer.so:/usr/lib64/libaicpu_processer.so:ro \ + -v /usr/lib64/libaicpu_prof.so:/usr/lib64/libaicpu_prof.so:ro \ + -v /usr/lib64/libaicpu_sharder.so:/usr/lib64/libaicpu_sharder.so:ro \ + -v /usr/lib64/libadump.so:/usr/lib64/libadump.so:ro \ + -v /usr/lib64/libtsd_eventclient.so:/usr/lib64/libtsd_eventclient.so:ro \ + -v /usr/lib64/libaicpu_scheduler.so:/usr/lib64/libaicpu_scheduler.so:ro \ + -v /usr/lib/aarch64-linux-gnu/libcrypto.so.1.1:/usr/lib/aarch64-linux-gnu/libcrypto.so.1.1:ro \ + -v /usr/lib/aarch64-linux-gnu/libyaml-0.so.2:/usr/lib/aarch64-linux-gnu/libyaml-0.so.2:ro \ + -v /usr/lib64/libdcmi.so:/usr/lib64/libdcmi.so:ro \ + -v /usr/lib64/libmpi_dvpp_adapter.so:/usr/lib64/libmpi_dvpp_adapter.so:ro \ + -v /usr/lib64/aicpu_kernels/:/usr/lib64/aicpu_kernels/:ro \ + -v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi:ro \ + -v /usr/lib64/libstackcore.so:/usr/lib64/libstackcore.so:ro \ + -v /usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64:ro \ + -v /var/slogd:/var/slogd:ro \ + -v /var/dmp_daemon:/var/dmp_daemon:ro \ + -v /etc/slog.conf:/etc/slog.conf:ro \ + --name qwen_ascend_llm \ + qwen_ascend_llm \ + bash -c "/home/AscendWork/run.sh && python3 api.py" \ No newline at end of file