Skip to content

Commit 0efdf39

Browse files
author
Scott Wales
committed
Add qmonitor
1 parent 89629cd commit 0efdf39

File tree

3 files changed

+184
-38
lines changed

3 files changed

+184
-38
lines changed

Diff for: qmonitor

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#!/g/data/hh5/public/apps/nci_scripts/python-analysis3
2+
# Copyright 2020 Scott Wales
3+
# author: Scott Wales <[email protected]>
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""
18+
Monitor a PBS queue job
19+
"""
20+
21+
import argparse
22+
import tqdm
23+
import subprocess
24+
import json
25+
import time
26+
import pandas
27+
import importlib
28+
from qtools import qstat, decode_bytes
29+
30+
31+
def metrics(stat):
32+
cpu = 0
33+
mem = 0
34+
wall = 0
35+
36+
if stat['job_state'] == 'R':
37+
mem_request = decode_bytes(stat['Resource_List']['mem'])
38+
mem_used = decode_bytes(stat['resources_used']['mem'])
39+
mem = mem_used / mem_request * 100
40+
41+
cpu_time = pandas.to_timedelta(stat['resources_used']['cput'])
42+
wall_used = pandas.to_timedelta(stat['resources_used']['walltime'])
43+
ncpus = stat['Resource_List']['ncpus']
44+
45+
cpu = cpu_time / wall_used / ncpus * 100
46+
47+
wall = wall_used.seconds
48+
49+
return cpu, mem, wall
50+
51+
52+
def monitor(jobid):
53+
jobid, stat = list(qstat([jobid]).items())[0]
54+
c, m, w = metrics(stat)
55+
56+
wall_request = pandas.to_timedelta(stat['Resource_List']['walltime']).seconds
57+
58+
print(f"{jobid} monitor")
59+
60+
bar_format = '{l_bar}{bar}'
61+
cpu = tqdm.tqdm(unit="%", desc="CPU", total=100.0, position=0, bar_format=bar_format, leave=True)
62+
mem = tqdm.tqdm(unit="%", desc="MEM", total=100.0, position=1, bar_format=bar_format, leave=True)
63+
wall = tqdm.tqdm(unit='s', desc="TIME", total=wall_request, position=2, bar_format=bar_format, leave=True)
64+
65+
while stat["job_state"] in ["Q", "R"]:
66+
c, m, w = metrics(stat)
67+
68+
cpu.reset()
69+
cpu.update(c)
70+
cpu.refresh()
71+
72+
mem.reset()
73+
mem.update(m)
74+
mem.refresh()
75+
76+
wall.reset()
77+
wall.update(w)
78+
wall.refresh()
79+
80+
time.sleep(10)
81+
stat = list(qstat([jobid]).values())[0]
82+
83+
cpu.close()
84+
mem.close()
85+
wall.close()
86+
87+
88+
def main():
89+
parser = argparse.ArgumentParser(description=__doc__)
90+
parser.add_argument("jobid")
91+
args = parser.parse_args()
92+
93+
monitor(args.jobid)
94+
95+
96+
if __name__ == "__main__":
97+
main()

Diff for: qtools.py

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#!/g/data/hh5/public/apps/nci_scripts/python-analysis3
2+
# Copyright 2020 Scott Wales
3+
# author: Scott Wales <[email protected]>
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
18+
import typing as T
19+
import pandas
20+
import re
21+
import subprocess
22+
import json
23+
24+
def decode_bytes(s):
25+
"""
26+
Convert a formatted size to number of bytes
27+
"""
28+
if pandas.isnull(s):
29+
return s
30+
31+
scales = {
32+
'k': 1024,
33+
}
34+
if not s.endswith('b'):
35+
raise Exception(f"{s} doesn't look like a size")
36+
37+
scale = 1
38+
s = s[:-1]
39+
40+
if not s[-1].isdigit():
41+
scale = scales[s[-1]]
42+
s = s[:-1]
43+
44+
return int(s) * scale
45+
46+
47+
def clean_qstat_json(stream):
48+
"""
49+
Clean up the improperly escaped JSON returned by qstat
50+
"""
51+
string_entry_re = re.compile(r'^\s*"(?P<key>.+)":"(?P<value>.+)"(?P<comma>,?)$')
52+
53+
lines = []
54+
55+
for line in stream.splitlines():
56+
match = string_entry_re.match(line)
57+
if match is not None:
58+
fixed_value = json.dumps(match.group('value'))
59+
line = f'"{match.group("key")}":{fixed_value}{match.group("comma")}'
60+
61+
lines.append(line)
62+
63+
return json.loads(''.join(lines))
64+
65+
66+
def qstat(jobids: T.List[str], show_finished: bool=False):
67+
"""
68+
Returns the information from qstat
69+
"""
70+
71+
extra_args = []
72+
if show_finished:
73+
extra_args.append('-x')
74+
75+
subp = subprocess.run(
76+
["/opt/pbs/default/bin/qstat", *extra_args, "-f", "-F", "json", *jobids],
77+
text=True,
78+
check=True,
79+
stdout=subprocess.PIPE,
80+
)
81+
82+
jobs = clean_qstat_json(subp.stdout)["Jobs"]
83+
84+
return jobs
85+
86+

Diff for: uqstat

+1-38
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import re
88
import sys
99
import pymunge
1010
import requests
11+
from qtools import *
1112

1213
# Base charge rates in SU/hour for 1 cpu with minimal memory
1314
charge_rates = {
@@ -37,25 +38,6 @@ base_mem = {
3738
'normalsl': 192/32,
3839
}
3940

40-
def decode_bytes(s):
41-
if pandas.isnull(s):
42-
return s
43-
44-
scales = {
45-
'k': 1024,
46-
}
47-
if not s.endswith('b'):
48-
raise Exception(f"{s} doesn't look like a size")
49-
50-
scale = 1
51-
s = s[:-1]
52-
53-
if not s[-1].isdigit():
54-
scale = scales[s[-1]]
55-
s = s[:-1]
56-
57-
return int(s) * scale
58-
5941

6042
def maybe_get(x, key):
6143
if pandas.isnull(x):
@@ -72,25 +54,6 @@ def get_list(s, lst, key):
7254
except AttributeError:
7355
return None
7456

75-
76-
def clean_qstat_json(stream):
77-
"""
78-
Clean up the improperly escaped JSON returned by qstat
79-
"""
80-
string_entry_re = re.compile(r'^\s*"(?P<key>.+)":"(?P<value>.+)"(?P<comma>,?)$')
81-
82-
lines = []
83-
84-
for line in stream.splitlines():
85-
match = string_entry_re.match(line)
86-
if match is not None:
87-
fixed_value = json.dumps(match.group('value'))
88-
line = f'"{match.group("key")}":{fixed_value}{match.group("comma")}'
89-
90-
lines.append(line)
91-
92-
return json.loads(''.join(lines))
93-
9457
def qstat_df(historical=False):
9558
command= ['/opt/pbs/default/bin/qstat','-f','-F','json']
9659

0 commit comments

Comments
 (0)