forked from serycjon/gpu-status
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gpu-status.py
182 lines (154 loc) · 5.75 KB
/
gpu-status.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/python
import subprocess as sp
import xml.etree.ElementTree
import os
import pwd
import argparse
import psutil
import socket
import getpass
my_username = getpass.getuser()
red_c = '\x1b[0;31m'
blue_c = '\x1b[0;34m'
cyan_c = '\x1b[0;36m'
green_c = '\x1b[0;32m'
yellow_c = '\x1b[0;33m'
magenta_c = '\x1b[0;35m'
red_cb = '\x1b[1;31m'
blue_cb = '\x1b[1;34m'
cyan_cb = '\x1b[1;36m'
green_cb = '\x1b[1;32m'
yellow_cb = '\x1b[1;33m'
magenta_cb = '\x1b[1;35m'
gray_c = '\x1b[0;37m'
def owner(pid):
try:
# the /proc/PID is owned by process creator
proc_stat_file = os.stat("/proc/{}".format(pid))
# get UID via stat call
uid = proc_stat_file.st_uid
# look up the username from uid
username = pwd.getpwuid(uid)[0]
except:
username = 'unknown'
return username
def get_status():
status = {}
color_out = '\x1b[0m'
smi_cmd = ['nvidia-smi', '-q', '-x'] # get XML output
proc = sp.Popen(smi_cmd, stdout=sp.PIPE, stderr=sp.PIPE)
stdout, stderr = proc.communicate()
gpu_info_cmd = ['nvidia-smi',
'--query-gpu=index,memory.total,memory.used,memory.free,utilization.gpu,temperature.gpu',
'--format=csv,noheader']
proc = sp.Popen(gpu_info_cmd, stdout=sp.PIPE, stderr=sp.PIPE)
gpu_stdout, gpu_stderr = proc.communicate()
gpu_infos = gpu_stdout.strip().split('\n')
gpu_infos = map(lambda x: x.split(', '), gpu_infos)
gpu_infos = [{'index': x[0],
'mem_total': x[1],
'mem_used': x[2],
'mem_free': x[3],
'gpu_util': x[4],
'gpu_temp': x[5]}
for x in gpu_infos]
e = xml.etree.ElementTree.fromstring(stdout)
for id, gpu in enumerate(e.findall('gpu')):
gpu_stat = {}
index = int(gpu_infos[id]['index'])
utilization = gpu.find('utilization')
gpu_util = utilization.find('gpu_util').text
gpu_temp = gpu_infos[id]['gpu_temp'].split()[0]
mem_free = gpu_infos[id]['mem_free'].split()[0]
mem_total = gpu_infos[id]['mem_total'].split()[0]
gpu_stat['gpu_util'] = float(gpu_util.split()[0]) / 100
gpu_stat['mem_free'] = int(mem_free)
gpu_stat['mem_total'] = int(mem_total)
gpu_stat['gpu_temp'] = int(gpu_temp)
gpu_procs = []
procs = gpu.find('processes')
for procinfo in procs.iter('process_info'):
pid = int(procinfo.find('pid').text)
mem = procinfo.find('used_memory').text
mem_num = int(mem.split()[0])
user = owner(pid)
if user == my_username:
user = blue_cb + user + color_out
else:
user = gray_c + user + color_out
tmp = {'user': user,
'mem': mem_num,
'pid': pid,
}
command = ""
try:
p = psutil.Process(pid)
command = ' '.join(p.cmdline())
tmp['command'] = command
except:
pass
gpu_procs.append(tmp)
gpu_stat['proc'] = gpu_procs
status[index] = gpu_stat
return status
def get_color_memory(value, max_value=1.0):
perc = 100 * (float(value) / max_value)
if perc > 95:
return green_cb
elif perc > 80:
return blue_cb
elif perc > 40:
return yellow_cb
else:
return red_cb
def pretty_print(status, verbose=False):
line_separator = '+-----+------+--------------------+----------+'
print(line_separator)
print('| GPU | TEMP | Memory-Usage | GPU-Util |')
print('|=====+======+====================+==========|')
for id, stats in status.iteritems():
color_out = '\x1b[0m'
# GPU Memory
mem_free = stats['mem_free']
mem_total = stats['mem_total']
mem_color = get_color_memory(mem_free, mem_total)
# GPU Proc
gpu_util = stats['gpu_util']
gpu_color = get_color_memory(1.0 - gpu_util)
# GPU Temp
temp = stats['gpu_temp']
header = '| {:2d} | {:3d}C | {}{:6d}{} /{:6d} MiB | {}{:7d}{}% |'.format(id,
temp,
mem_color,
mem_free,
color_out,
mem_total,
gpu_color,
int(100*gpu_util),
color_out)
print(header)
print(line_separator)
line_separator = '+-----+---------------------+---------+----------------+'
print('')
print(line_separator)
print('| GPU | PROCESS OWNER | PID | MEMORY |')
print('|=====+=====================+=========+================|')
for id, stats in status.iteritems():
if len(stats['proc']) == 0:
continue
for proc in stats['proc']:
line = '| {:2d} | {:30s} | {:7d} | {:10} MiB |'.format(id, proc['user'], proc['pid'], proc['mem'])
print(line)
if verbose:
print(proc['command'])
print('')
print(line_separator)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-v', action='store_true', help='show commands')
args = vars(parser.parse_args())
verbose = args['v']
hostname = socket.gethostname()
if hostname == 'halmos':
print('!!! Halmos has GPU 0 and GPU 3 switched !!!\n')
pretty_print(get_status(), verbose)