Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docker/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ popd >/dev/null
if [[ "${CUDA_VERSION_SHORT}" = "cu118" ]]; then
apt-get install -y --no-install-recommends cuda-minimal-build-11-8
elif [[ "${CUDA_VERSION_SHORT}" = "cu124" ]]; then
apt-get install -y --no-install-recommends cuda-minimal-build-12-4
apt-get install -y --no-install-recommends cuda-minimal-build-12-4 cuda-cuobjdump-12-4 cuda-nvdisasm-12-4
elif [[ "${CUDA_VERSION_SHORT}" = "cu128" ]]; then
apt-get install -y --no-install-recommends cuda-minimal-build-12-8
apt-get install -y --no-install-recommends cuda-minimal-build-12-8 cuda-cuobjdump-12-8 cuda-nvdisasm-12-8
fi

apt-get clean -y
Expand Down
5 changes: 3 additions & 2 deletions docker/prepare_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ fi
if [[ "${CUDA_VERSION_SHORT}" != "cu118" ]]; then

if [[ "${CUDA_VERSION_SHORT}" = "cu124" ]]; then
DEEP_GEMM_VERSION=03d0be3
FLASH_MLA_VERSION=9edee0c
else
DEEP_GEMM_VERSION=79f48ee
FLASH_MLA_VERSION=c759027
fi

# The current EP implementation uses dlblas, which is incompatible with the latest deep_gemm APIs.
# To ensure compatibility, we pin deep_gemm to an older version.
DEEP_GEMM_VERSION=03d0be3
DEEP_EP_VERSION=26cf250
pip install nvidia-nvshmem-cu12

Expand Down
7 changes: 7 additions & 0 deletions lmdeploy/pytorch/backends/cuda/token_dispatcher.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright (c) OpenMMLab. All rights reserved.
try:
from deep_ep import Buffer

from lmdeploy.pytorch.envs import env_to_int

# default value refers to DeepEP code
# https://github.com/deepseek-ai/DeepEP/blob/bfded34800dfec415b71503f8205181de90b2480/deep_ep/buffer.py#L30
deep_ep_buffer_num_sms = env_to_int(env_var='DEEPEP_BUFFER_NUM_SMS', default=20)
Buffer.set_num_sms(deep_ep_buffer_num_sms)
use_deepep = True
except ImportError:
use_deepep = False
Expand Down
1 change: 1 addition & 0 deletions lmdeploy/pytorch/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def _patched_get_env(
# we don't need to read this, it would be passed to ray workers
# If Ray is launched from outside, it may fail to access the environment variables.
os.getenv('DEEPEP_MAX_BATCH_SIZE', None)
os.getenv('DEEPEP_BUFFER_NUM_SMS', None)

# deepgemm
os.getenv('DG_JIT_DEBUG', '0')
Expand Down
Loading