Skip to content

Commit dfb3fbe

Browse files
authored
fix(num_devices): fix num_shard/num device auto compute when NVIDIA_VISIBLE_DEVICES == "all" or "void" (#3346)
* fix(num_devices): fix num_shard/num devices auto compute when NVIDIA_VISIBLE_DEVICES == "all" the computed num_shards was always 1 in this case, no matter what * fix(num_devices): make TGI shard auto compute compliant with nvidia-container-toolkit in cdi mode
1 parent 3498847 commit dfb3fbe

File tree

1 file changed

+12
-8
lines changed

1 file changed

+12
-8
lines changed

launcher/src/main.rs

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ fn vram_maximum(
7272
if let Some(vram) = available.checked_sub(model) {
7373
let tokens_allowed = vram / token_vram;
7474
tracing::debug!(
75-
"Available vram {}: model needs {}, every tokens requires {}, maximum allocatable tokens {tokens_allowed}",
76-
human_size(available, "B"),
77-
human_size(model, "B"),
78-
human_size(token_vram, "B"),
79-
);
75+
"Available vram {}: model needs {}, every tokens requires {}, maximum allocatable tokens {tokens_allowed}",
76+
human_size(available, "B"),
77+
human_size(model, "B"),
78+
human_size(token_vram, "B"),
79+
);
8080
Some(tokens_allowed)
8181
} else {
8282
tracing::warn!(
@@ -1264,7 +1264,12 @@ fn num_cuda_devices() -> Option<usize> {
12641264
Ok(devices) => devices,
12651265
Err(_) => match env::var("NVIDIA_VISIBLE_DEVICES") {
12661266
Ok(devices) => {
1267-
if devices.trim() == "all" {
1267+
// NVIDIA_VISIBLE_DEVICES is always set when not specified and the nvidia container runtime is
1268+
// in (jit-)cdi mode (since 1.14)
1269+
// nvidia container runtime default mode switched from legacy to cdi mode from 1.18 on
1270+
// Let's handle the void case as all here
1271+
// See: https://github.com/NVIDIA/nvidia-container-toolkit
1272+
if ["all", "void"].contains(&devices.trim()) {
12681273
// Count the number of all GPUs via nvidia-smi
12691274
let output = Command::new("nvidia-smi")
12701275
.args(["--query-gpu=uuid", "--format=csv,noheader"])
@@ -1274,8 +1279,7 @@ fn num_cuda_devices() -> Option<usize> {
12741279
String::from_utf8_lossy(&output.stdout)
12751280
.lines()
12761281
.filter(|line| !line.trim().is_empty())
1277-
.count()
1278-
.to_string()
1282+
.collect::<Vec<_>>().join(",")
12791283
} else {
12801284
devices
12811285
}

0 commit comments

Comments
 (0)