Skip to content

Commit

Permalink
update nccl-plugin and rxdm versions
Browse files Browse the repository at this point in the history
  • Loading branch information
akiki-liang0 committed Dec 3, 2024
1 parent b538966 commit 91ab62f
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions tools/prologs-epilogs/receive-data-path-manager-mega
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ fi
# ensure that dmabuf-import-helper is loaded
modprobe import-helper

NCCL_PLUGIN_IMAGE=us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpxo/nccl-plugin-gpudirecttcpx-dev:v1.0.4
RXDM_IMAGE=us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpxo/tcpgpudmarxd-dev:v1.0.10
NCCL_PLUGIN_IMAGE=us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpxo/nccl-plugin-gpudirecttcpx-dev:v1.0.7
RXDM_IMAGE=us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpxo/tcpgpudmarxd-dev:v1.0.13
RXDM_CONTAINER=receive-datapath-manager-"${SLURM_JOB_ID}"
if [[ ${SLURM_SCRIPT_CONTEXT} == "prolog_slurmd" ]]; then
docker container list --filter "name=receive-datapath-manager-*" --quiet | xargs --no-run-if-empty docker container stop
Expand All @@ -44,7 +44,7 @@ if [[ ${SLURM_SCRIPT_CONTEXT} == "prolog_slurmd" ]]; then
--network=host \
-v /var/lib:/var/lib \
${NCCL_PLUGIN_IMAGE} \
install
install --install-nccl

# Modify NCCL env vars for Debian 12.
# /var/lib/tcpxo/lib64/nccl-env-profile.sh is written by the nccl-installer container
Expand Down

0 comments on commit 91ab62f

Please sign in to comment.