-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_oom
85 lines (69 loc) · 3.19 KB
/
check_oom
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python3
#-------------------------------------------------------------------------------
import argparse
import sys
import os
import subprocess
import platform
import json
#-------------------------------------------------------------------------------
_candebug = False
def candebug():
global _candebug
return _candebug
def setcandebug(value):
global _candebug
_candebug = value
def infomsg(msg):
if candebug() == True:
print(msg, flush=True)
def exitnagios(status,message):
if status=="OK":
exitcode = 0
elif status=="WARNING":
exitcode = 1
elif status=="CRITICAL":
exitcode = 2
elif status=="UNKNOWN":
exitcode = 3
else:
exitcode = 4
print(status+": "+message, flush=True)
sys.exit(exitcode)
#-------------------------------------------------------------------------------
def oom_check(criticallevel, warninglevel, short=False):
cmdline = ["/usr/bin/journalctl","--utc","--no-pager","-kq","-o","json","-b","-0","--output-fields=MESSAGE"]
infomsg(cmdline)
completedproc = subprocess.run(cmdline,capture_output=True)
output = completedproc.stdout.decode("utf-8").strip()
errors = completedproc.stderr.decode("utf-8").strip()
exitcode = completedproc.returncode
infomsg(output)
if exitcode==0:
counter=0
for line in output.splitlines():
parsed = json.loads(line)
infomsg(parsed)
if "killed process" in parsed["MESSAGE"]:
counter = counter+1
if criticallevel!="" and counter > int(criticallevel):
exitnagios("CRITICAL",str(counter)+" processes was killed by OOM | counter="+str(counter))
elif warninglevel!="" and counter > int(warninglevel):
exitnagios("WARNING",str(counter)+" processes was killed by OOM | counter="+str(counter))
else:
exitnagios("OK","No OOM killer activity found in journalctl | counter=-1")
else:
exitnagios("CRITICAL","error retrieving the information from journalctl | counter=-1")
#-------------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-w", "--warning", default="", help="Number of occurences to trigger a warning")
parser.add_argument("-c", "--critical", default="1", help="Number of occurences to trigger a warning")
parser.add_argument("-s", "--short", action="store_true", help="If this option is specified, check ignores dmesg OOM problems older then 24 hours")
parser.add_argument("-®", "--debug", action="store_true", dest="debug", default=False, help="be more verbose")
args = parser.parse_args()
setcandebug(args.debug)
oom_check(args.critical, args.warning, args.short)
if __name__ == "__main__":
main()
#-------------------------------------------------------------------------------