Skip to content

Commit

Permalink
Merge pull request #27 from OguzPastirmaci/main
Browse files Browse the repository at this point in the history
Update BM.GPU.H100.8-nccl-test.yaml
  • Loading branch information
arnaudfroidmont authored Jun 25, 2024
2 parents 5b790c6 + 60473d1 commit b9e38d7
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions manifests/BM.GPU.H100.8-nccl-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,19 @@ spec:
-x IB_RX_QUEUE_LEN=8192 \
-x NCCL_SOCKET_IFNAME=eth0 \
-x NCCL_IGNORE_CPU_AFFINITY=1 \
/workspace/nccl-tests/build/alltoall_perf -b 8 -f 2 -g 1 -e 4G -c 1
/workspace/nccl-tests/build/all_reduce_perf -b 8 -f 2 -g 1 -e 4G -c 1
while :; do { [[ $exit ]] && break; }; sleep 1; done
ports:
- { name: mpijob-port, containerPort: 2222, protocol: TCP }
image: ord.ocir.io/hpc_limited_availability/nccl-tests:pytorch-24.02-nccl-2.20.5-1
name: mpimaster
resources:
limits:
ephemeral-storage: 32Gi
ephemeral-storage: 16Gi
requests:
cpu: 128
ephemeral-storage: 32Gi
memory: 512Gi
cpu: 4
ephemeral-storage: 16Gi
memory: 1Gi
securityContext:
privileged: true
capabilities:
Expand Down Expand Up @@ -121,4 +121,4 @@ spec:
- { key: nvidia.com/gpu, operator: Exists }
volumes:
- { name: devinf, hostPath: { path: /dev/infiniband }}
- { name: shm, emptyDir: { medium: Memory, sizeLimit: 32Gi }}
- { name: shm, emptyDir: { medium: Memory, sizeLimit: 32Gi }}

0 comments on commit b9e38d7

Please sign in to comment.