diff --git a/src/device/readers/nvidia.rs b/src/device/readers/nvidia.rs index 86e5c0b..1f01c55 100644 --- a/src/device/readers/nvidia.rs +++ b/src/device/readers/nvidia.rs @@ -18,14 +18,13 @@ use crate::device::process_list::{get_all_processes, merge_gpu_processes}; use crate::device::readers::common_cache::{DetailBuilder, DeviceStaticInfo, MAX_DEVICES}; use crate::device::types::{GpuInfo, ProcessInfo}; use crate::device::GpuReader; -use crate::utils::get_hostname; +use crate::utils::{get_hostname, with_global_system}; use chrono::Local; use nvml_wrapper::enums::device::UsedGpuMemory; use nvml_wrapper::error::NvmlError; use nvml_wrapper::{cuda_driver_version_major, cuda_driver_version_minor, Nvml}; use std::collections::{HashMap, HashSet}; use std::sync::{Mutex, OnceLock}; -use sysinfo::System; // Global status for NVML error messages static NVML_STATUS: Mutex> = Mutex::new(None); @@ -39,8 +38,6 @@ pub struct NvidiaGpuReader { device_static_info: OnceLock>, /// Cached NVML handle (initialized once, reused across calls) nvml: Mutex>, - /// Cached System instance for process info (reused across calls) - system: Mutex, } impl Default for NvidiaGpuReader { @@ -56,7 +53,6 @@ impl NvidiaGpuReader { cuda_version: OnceLock::new(), device_static_info: OnceLock::new(), nvml: Mutex::new(Nvml::init().ok()), - system: Mutex::new(System::new()), } } @@ -231,20 +227,21 @@ impl GpuReader for NvidiaGpuReader { fn get_process_info(&self) -> Vec { use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, UpdateKind}; - // Reuse the cached System instance - let mut system = self.system.lock().unwrap_or_else(|e| e.into_inner()); - system.refresh_processes_specifics( - ProcessesToUpdate::All, - true, - ProcessRefreshKind::everything().with_user(UpdateKind::Always), - ); - system.refresh_memory(); - // Get GPU processes and PIDs using cached NVML handle let (gpu_processes, gpu_pids) = self.get_gpu_processes_cached(); - // Get all system processes - let mut all_processes = get_all_processes(&system, &gpu_pids); + // Use global system instance to avoid file descriptor leak + let mut all_processes = with_global_system(|system| { + system.refresh_processes_specifics( + ProcessesToUpdate::All, + true, + ProcessRefreshKind::everything().with_user(UpdateKind::Always), + ); + system.refresh_memory(); + + // Get all system processes + get_all_processes(system, &gpu_pids) + }); // Merge GPU information into the process list merge_gpu_processes(&mut all_processes, gpu_processes); diff --git a/src/device/readers/nvidia_jetson.rs b/src/device/readers/nvidia_jetson.rs index 49e7efd..4bbf9fb 100644 --- a/src/device/readers/nvidia_jetson.rs +++ b/src/device/readers/nvidia_jetson.rs @@ -17,12 +17,11 @@ use crate::device::process_list::{get_all_processes, merge_gpu_processes}; use crate::device::readers::common_cache::{DetailBuilder, DeviceStaticInfo}; use crate::device::types::{GpuInfo, ProcessInfo}; use crate::device::GpuReader; -use crate::utils::{get_hostname, hz_to_mhz, millicelsius_to_celsius}; +use crate::utils::{get_hostname, hz_to_mhz, millicelsius_to_celsius, with_global_system}; use chrono::Local; use std::collections::HashSet; use std::fs; use std::sync::OnceLock; -use sysinfo::System; pub struct NvidiaJetsonGpuReader { /// Cached static device information (fetched only once) @@ -171,22 +170,23 @@ impl GpuReader for NvidiaJetsonGpuReader { } fn get_process_info(&self) -> Vec { - // Create a lightweight system instance and only refresh what we need use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, UpdateKind}; - let mut system = System::new(); - // Refresh processes with user information - system.refresh_processes_specifics( - ProcessesToUpdate::All, - true, - ProcessRefreshKind::everything().with_user(UpdateKind::Always), - ); - system.refresh_memory(); // Get GPU processes and PIDs let (gpu_processes, gpu_pids) = get_gpu_processes(); - // Get all system processes - let mut all_processes = get_all_processes(&system, &gpu_pids); + // Use global system instance to avoid file descriptor leak + let mut all_processes = with_global_system(|system| { + system.refresh_processes_specifics( + ProcessesToUpdate::All, + true, + ProcessRefreshKind::everything().with_user(UpdateKind::Always), + ); + system.refresh_memory(); + + // Get all system processes + get_all_processes(system, &gpu_pids) + }); // Merge GPU information into the process list merge_gpu_processes(&mut all_processes, gpu_processes); @@ -258,41 +258,42 @@ fn get_gpu_processes() -> (Vec, HashSet) { "cuda", ]; - let mut system = System::new(); - system.refresh_memory(); - for (pid, process) in system.processes() { - let process_name = process.name().to_string_lossy().to_lowercase(); - for gpu_name in &gpu_process_names { - if process_name.contains(gpu_name) { - let pid_u32 = pid.as_u32(); - gpu_pids.insert(pid_u32); - - gpu_processes.push(ProcessInfo { - device_id: 0, - device_uuid: "JetsonGPU".to_string(), - pid: pid_u32, - process_name: String::new(), // Will be filled by sysinfo - used_memory: 0, // Can't determine GPU memory usage without nvidia-smi - cpu_percent: 0.0, // Will be filled by sysinfo - memory_percent: 0.0, // Will be filled by sysinfo - memory_rss: 0, // Will be filled by sysinfo - memory_vms: 0, // Will be filled by sysinfo - user: String::new(), // Will be filled by sysinfo - state: String::new(), // Will be filled by sysinfo - start_time: String::new(), // Will be filled by sysinfo - cpu_time: 0, // Will be filled by sysinfo - command: String::new(), // Will be filled by sysinfo - ppid: 0, // Will be filled by sysinfo - threads: 0, // Will be filled by sysinfo - uses_gpu: true, - priority: 0, // Will be filled by sysinfo - nice_value: 0, // Will be filled by sysinfo - gpu_utilization: 0.0, // Can't determine per-process GPU utilization - }); - break; + with_global_system(|system| { + system.refresh_memory(); + for (pid, process) in system.processes() { + let process_name = process.name().to_string_lossy().to_lowercase(); + for gpu_name in &gpu_process_names { + if process_name.contains(gpu_name) { + let pid_u32 = pid.as_u32(); + gpu_pids.insert(pid_u32); + + gpu_processes.push(ProcessInfo { + device_id: 0, + device_uuid: "JetsonGPU".to_string(), + pid: pid_u32, + process_name: String::new(), // Will be filled by sysinfo + used_memory: 0, // Can't determine GPU memory usage without nvidia-smi + cpu_percent: 0.0, // Will be filled by sysinfo + memory_percent: 0.0, // Will be filled by sysinfo + memory_rss: 0, // Will be filled by sysinfo + memory_vms: 0, // Will be filled by sysinfo + user: String::new(), // Will be filled by sysinfo + state: String::new(), // Will be filled by sysinfo + start_time: String::new(), // Will be filled by sysinfo + cpu_time: 0, // Will be filled by sysinfo + command: String::new(), // Will be filled by sysinfo + ppid: 0, // Will be filled by sysinfo + threads: 0, // Will be filled by sysinfo + uses_gpu: true, + priority: 0, // Will be filled by sysinfo + nice_value: 0, // Will be filled by sysinfo + gpu_utilization: 0.0, // Can't determine per-process GPU utilization + }); + break; + } } } - } + }); } (gpu_processes, gpu_pids) diff --git a/src/device/readers/tenstorrent.rs b/src/device/readers/tenstorrent.rs index cd21269..f9ce6e8 100644 --- a/src/device/readers/tenstorrent.rs +++ b/src/device/readers/tenstorrent.rs @@ -16,7 +16,7 @@ use crate::device::process_list::{get_all_processes, merge_gpu_processes}; use crate::device::readers::common_cache::{DetailBuilder, DeviceStaticInfo}; use crate::device::types::{GpuInfo, ProcessInfo}; use crate::device::GpuReader; -use crate::utils::get_hostname; +use crate::utils::{get_hostname, with_global_system}; use all_smi_luwen_core; use all_smi_luwen_if::chip::{Chip, ChipImpl, Telemetry}; use all_smi_luwen_if::ChipDetectOptions; @@ -25,7 +25,6 @@ use chrono::Local; use once_cell::sync::Lazy; use std::collections::{HashMap, HashSet}; use std::sync::Mutex; -use sysinfo::System; /// Collection method for Tenstorrent NPU metrics #[derive(Debug, Clone, Copy)] @@ -196,21 +195,23 @@ impl GpuReader for TenstorrentReader { } fn get_process_info(&self) -> Vec { - // Create system instance and refresh processes use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, UpdateKind}; - let mut system = System::new(); - system.refresh_processes_specifics( - ProcessesToUpdate::All, - true, - ProcessRefreshKind::everything().with_user(UpdateKind::Always), - ); - system.refresh_memory(); // Get NPU processes (currently empty for Tenstorrent) let (npu_processes, npu_pids) = self.get_npu_processes(); - // Get all system processes - let mut all_processes = get_all_processes(&system, &npu_pids); + // Use global system instance to avoid file descriptor leak + let mut all_processes = with_global_system(|system| { + system.refresh_processes_specifics( + ProcessesToUpdate::All, + true, + ProcessRefreshKind::everything().with_user(UpdateKind::Always), + ); + system.refresh_memory(); + + // Get all system processes + get_all_processes(system, &npu_pids) + }); // Merge NPU information merge_gpu_processes(&mut all_processes, npu_processes);