diff --git a/controller/config.go b/controller/config.go index 80009b5..e88acbe 100644 --- a/controller/config.go +++ b/controller/config.go @@ -25,6 +25,7 @@ const ( DEFAULT_WORKER_COUNT = 5 DEFAULT_ACTION_TIMEOUT = 60 DEFAULT_SEND_NOTIFICATIONS_DIR = "./scripts/notifications/" + DEFAULT_SEND_ATIONS_DIR = "./scripts/actions/" DEFAULT_SEND_MAIL_SCRIPT = "send_mail.py" DEFAULT_SEND_SMS_SCRIPT = "send_sms.py" DEFAULT_SEND_WECHAT_SCRIPT = "send_wechat.py" @@ -75,7 +76,9 @@ type Config struct { SEND_CALL_SCRIPT string //打电话的脚本 - SEND_NOTIFICATIONS_DIR string //自定义脚本的目录 + SEND_NOTIFICATIONS_DIR string //原有默认发送通知脚本目录 + + SEND_ATIONS_DIR string //自定义脚本的目录 SEND_INTERVAL int //发送消息间隔时间单位毫秒 @@ -97,7 +100,7 @@ func InitGlobalConfig() error { MYSQL_PASSWORD: cfg.MustValue(goconfig.DEFAULT_SECTION, "mysql_password", DEFAULT_MYSQL_PASSWORD), MYSQL_MAX_CONN: cfg.MustInt(goconfig.DEFAULT_SECTION, "mysql_max_conn", DEFAULT_MAX_CONN), MYSQL_MAX_IDLE_CONN: cfg.MustInt(goconfig.DEFAULT_SECTION, "mysql_max_idle_conn", DEFAULT_MAX_IDLE_CONN), - LOG_FILE: cfg.MustValue(goconfig.DEFAULT_SECTION, "log_file", "./logs/controller.log", DEFAULT_LOG_FILE), + LOG_FILE: cfg.MustValue(goconfig.DEFAULT_SECTION, "log_file", DEFAULT_LOG_FILE), LOG_EXPIRE_DAYS: cfg.MustInt(goconfig.DEFAULT_SECTION, "log_expire_days", DEFAULT_LOG_EXPIRE_DAYS), LOG_LEVEL: cfg.MustInt(goconfig.DEFAULT_SECTION, "log_level", DEFAULT_LOG_LEVEL), MAX_PACKET_SIZE: cfg.MustInt(goconfig.DEFAULT_SECTION, "max_packet_size", DEFAULT_MAX_PACKET_SIZE), @@ -113,6 +116,7 @@ func InitGlobalConfig() error { SEND_WECHAT_SCRIPT: cfg.MustValue(goconfig.DEFAULT_SECTION, "send_wechat_script", DEFAULT_SEND_WECHAT_SCRIPT), SEND_CALL_SCRIPT: cfg.MustValue(goconfig.DEFAULT_SECTION, "send_call_script", DEFAULT_SEND_CALL_SCRIPT), SEND_NOTIFICATIONS_DIR: cfg.MustValue(goconfig.DEFAULT_SECTION, "send_notifications_dir", DEFAULT_SEND_NOTIFICATIONS_DIR), + SEND_ATIONS_DIR: cfg.MustValue(goconfig.DEFAULT_SECTION, "send_ations_dir", DEFAULT_SEND_ATIONS_DIR), SEND_INTERVAL: cfg.MustInt(goconfig.DEFAULT_SECTION, "send_interval", DEFAULT_SEND_INTERVAL), SEND_SWITCH: cfg.MustBool(goconfig.DEFAULT_SECTION, "send_switch", DEFAULT_SEND_SWITCH), SEND_MAX: cfg.MustInt(goconfig.DEFAULT_SECTION, "send_max", DEFAULT_SEND_MAX), diff --git a/controller/controller.go b/controller/controller.go index d2f0b22..29623ec 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -271,7 +271,7 @@ func (this *Controller) sendToQueue(strategy_event_id int64, subject, content st params = append(params, user.Phone) this.callQueue.PutNoWait(&QueueTask{strategy_event_id, file_path, params, action, user}) case SEND_ACTION: - file_path := filepath.Join(GlobalConfig.SEND_NOTIFICATIONS_DIR, action.FilePath) + file_path := filepath.Join(GlobalConfig.SEND_ATIONS_DIR, action.FilePath) user_info, err := json.Marshal(user) if err != nil { lg.Error(err.Error()) @@ -552,6 +552,9 @@ func generateEvent(strategy_event *StrategyEvent, strategy_result *StrategyResul merged_strategy_event.ID = strategy_event.ID merged_strategy_event.Count = strategy_event.Count merged_strategy_event.Status = strategy_event.Status + merged_strategy_event.ProcessUser = strategy_event.ProcessUser + merged_strategy_event.ProcessComments = strategy_event.ProcessComments + merged_strategy_event.ProcessTime = strategy_event.ProcessTime } return diff --git a/controller/scripts/notifications/send_call.py b/controller/scripts/notifications/send_call.py index 843cda2..5bd95b8 100755 --- a/controller/scripts/notifications/send_call.py +++ b/controller/scripts/notifications/send_call.py @@ -1,51 +1,139 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- +#! /usr/bin/env python2 +# encoding:utf-8 +import httplib import json +import random +import time +import hashlib import argparse -import urllib2 -import sys -reload(sys) -sys.setdefaultencoding('utf-8') - -SERVICE_TOKEN = "" - - -def send(subject, content, receiver): - api = "http://www.linkedsee.com/alarm/channel" - data = dict() - data["receiver"] = receiver - data["type"] = "phone" - data["title"] = subject - data["content"] = content - try: - data = json.dumps(data, ensure_ascii=False).encode("utf-8") - req = urllib2.Request(api, data) - req.add_header("Servicetoken", SERVICE_TOKEN) - res = urllib2.urlopen(req) - result = json.loads(res.read(), encoding="utf-8") - if "status" in result and not result["status"]: - return False, json.dumps(result, ensure_ascii=False).encode("utf-8") - except Exception as e: - return False, e - - return True, json.dumps(result, ensure_ascii=False).encode("utf-8") +'''语音通知发送''' +class VoicePromptSender: + appid = 0 + appkey = "" + url = "/v5/tlsvoicesvr/sendvoiceprompt" + template = "电话报警:" + def __init__(self, appid, appkey): + self.appid = appid + self.appkey = appkey + self.util = SmsSenderUtil() + + """ 语音验证码发送 + Returns: + 请求包体 + { + "tel": { + "nationcode": "86", //国家码 + "mobile": "13788888888" //手机号码 + }, + "prompttype": 2, //语音类型,目前固定为2 + "promptfile": "语音内容文本", //通知内容,utf8编码,支持中文英文、数字及组合,需要和语音内容模版相匹配 + "playtimes": 2, //播放次数,可选,最多3次,默认2次 + "sig": "30db206bfd3fea7ef0db929998642c8ea54cc7042a779c5a0d9897358f6e9505", //app凭证,具体计算方式见下注 + "time": 1457336869, //unix时间戳,请求发起时间,如果和系统时间相差超过10分钟则会返回失败 + "ext": "" //用户的session内容,腾讯server回包中会原样返回,可选字段,不需要就填空。 + } + 应答包体 + { + "result": 0, //0表示成功,非0表示失败 + "errmsg": "OK", //result非0时的具体错误信息 + "ext": "", //用户的session内容,腾讯server回包中会原样返回 + "callid": "xxxx" //标识本次发送id,标识一次下发记录 + } + 参数说明: + nation_code: 国家码,如 86 为中国 + phone_number: 不带国家码的手机号 + msg: 信息内容,必须与申请的模板格式一致,否则将返回错误 + ext: 服务端原样返回的参数,可填空串 + """ + def send(self,nation_code, phone_number,playtimes,msg, ext): + rnd = self.util.get_random() + cur_time = self.util.get_cur_time() + + data = {} + tel = {"nationcode": nation_code, "mobile": phone_number} + data["tel"] = tel + data["prompttype"] = 2 + data["promptfile"] = msg + data["playtimes"] = playtimes + data["sig"] = hashlib.sha256("appkey=" + self.appkey + "&random=" + str(rnd) + + "&time=" + str(cur_time) + "&mobile=" + phone_number).hexdigest() + data["time"] = cur_time + data["ext"] = ext + + whole_url = self.url + "?sdkappid=" + str(self.appid) + "&random=" + str(rnd) + result = self.util.send_post_request("yun.tim.qq.com", whole_url, data) + obj = json.loads(result) + if obj["result"] == "0" and obj["errmsg"] == "OK": + return True, result + else: + return False, result + +class SmsSenderUtil: + """ 工具类定义 """ + + def get_random(self): + return random.randint(100000, 999999) + + def get_cur_time(self): + return long(time.time()) + + def calculate_sig(self, appkey, rnd, cur_time, phone_numbers): + phone_numbers_string = phone_numbers[0] + for i in range(1, len(phone_numbers)): + phone_numbers_string += "," + phone_numbers[i] + return hashlib.sha256("appkey=" + appkey + "&random=" + str(rnd) + "&time=" + str(cur_time) + + "&mobile=" + phone_numbers_string).hexdigest() + + def calculate_sig_for_templ_phone_numbers(self, appkey, rnd, cur_time, phone_numbers): + """ 计算带模板和手机号列表的 sig """ + phone_numbers_string = phone_numbers[0] + for i in range(1, len(phone_numbers)): + phone_numbers_string += "," + phone_numbers[i] + return hashlib.sha256("appkey=" + appkey + "&random=" + str(rnd) + "&time=" + + str(cur_time) + "&mobile=" + phone_numbers_string).hexdigest() + + def calculate_sig_for_templ(self, appkey, rnd, cur_time, phone_number): + phone_numbers = [phone_number] + return self.calculate_sig_for_templ_phone_numbers(appkey, rnd, cur_time, phone_numbers) + + def phone_numbers_to_list(self, nation_code, phone_numbers): + tel = [] + for phone_number in phone_numbers: + tel.append({"nationcode": nation_code, "mobile":phone_number}) + return tel + + def send_post_request(self, host, url, data): + con = None + try: + con = httplib.HTTPSConnection(host) + con.request('POST', url, json.dumps(data)) + response = con.getresponse() + if '200' != str(response.status): + obj = {} + obj["result"] = -1 + obj["errmsg"] = "connect failed:\t"+str(response.status) + " " + response.reason + result = json.dumps(obj) + else: + result = response.read() + except Exception,e: + obj = {} + obj["result"] = -2 + obj["errmsg"] = "connect failed:\t" + str(e) + result = json.dumps(obj) + finally: + if con: + con.close() + return result if __name__ == "__main__": - parser = argparse.ArgumentParser(description="script for sending alarm by linkedsee") + parser = argparse.ArgumentParser(description="script for sending alarm call") parser.add_argument("subject", help="the subject of the alarm call") parser.add_argument("content", help="the content of the alarm call") - parser.add_argument("receiver", help="the alarm to send by linkedsee") + parser.add_argument("receiver", help="the phone number who receive the call") args = parser.parse_args() - retry = 3 - while retry: - status, response = send(args.subject, args.content, args.receiver) - if status: - break - retry -= 1 - if not status: - sys.exit("{0} {1}".format(status, response)) - + vps = VoicePromptSender(VoicePromptSender.appid, VoicePromptSender.appkey) + status, response = vps.send(86, args.receiver, 2, "{0}{1}".format(VoicePromptSender.template, args.content), "") print status, response diff --git a/controller/scripts/notifications/send_tencent_sms.py b/controller/scripts/notifications/send_tencent_sms.py deleted file mode 100755 index 5202063..0000000 --- a/controller/scripts/notifications/send_tencent_sms.py +++ /dev/null @@ -1,148 +0,0 @@ -#! /usr/bin/env python2 -# encoding:utf-8 -# python 2.7 测试通过 -# python 3 更换适当的开发库就能使用,在此我们不额外提供 - -import httplib -import json -import hashlib -import random -import time -import argparse - -class SmsSingleSender: - """ 单发类定义""" - appid = 0 - appkey = "" - url = "https://yun.tim.qq.com/v5/tlssmssvr/sendsms" - template = "" - - def __init__(self, appid, appkey): - self.appid = appid - self.appkey = appkey - self.util = SmsSenderUtil() - - def send(self, sms_type, nation_code, phone_number, msg, extend, ext): - """ 普通群发接口 - 明确指定内容,如果有多个签名,请在内容中以【】的方式添加到信息内容中,否则系统将使用默认签名 - - Args: - sms_type: 短信类型,0 为普通短信,1 为营销短信 - nation_code: 国家码,如 86 为中国 - phone_number: 不带国家码的手机号 - msg: 信息内容,必须与申请的模板格式一致,否则将返回错误 - extend: 扩展码,可填空串 - ext: 服务端原样返回的参数,可填空串 - - Returns: - json string { "result": xxxx, "errmsg": "xxxxx" ... },被省略的内容参见协议文档 - 请求包体 - { - "tel": { - "nationcode": "86", - "mobile": "13788888888" - }, - "type": 0, - "msg": "你的验证码是1234", - "sig": "fdba654e05bc0d15796713a1a1a2318c", - "time": 1479888540, - "extend": "", - "ext": "" - } - 应答包体 - { - "result": 0, - "errmsg": "OK", - "ext": "", - "sid": "xxxxxxx", - "fee": 1 - } - """ - rnd = self.util.get_random() - cur_time = self.util.get_cur_time() - - data = {} - - tel = {"nationcode": nation_code, "mobile": phone_number} - data["tel"] = tel - data["type"] = sms_type - data["msg"] = msg - data["sig"] = hashlib.sha256("appkey=" + self.appkey + "&random=" + str(rnd) - + "&time=" + str(cur_time) + "&mobile=" + phone_number).hexdigest() - data["time"] = cur_time - data["extend"] = extend - data["ext"] = ext - - whole_url = self.url + "?sdkappid=" + str(self.appid) + "&random=" + str(rnd) - return self.util.send_post_request("yun.tim.qq.com", whole_url, data) - - -class SmsSenderUtil: - """ 工具类定义 """ - - def get_random(self): - return random.randint(100000, 999999) - - def get_cur_time(self): - return long(time.time()) - - def calculate_sig(self, appkey, rnd, cur_time, phone_numbers): - phone_numbers_string = phone_numbers[0] - for i in range(1, len(phone_numbers)): - phone_numbers_string += "," + phone_numbers[i] - return hashlib.sha256("appkey=" + appkey + "&random=" + str(rnd) + "&time=" + str(cur_time) - + "&mobile=" + phone_numbers_string).hexdigest() - - # def calculate_sig_for_templ_phone_numbers(self, appkey, rnd, cur_time, phone_numbers): - # """ 计算带模板和手机号列表的 sig """ - # phone_numbers_string = phone_numbers[0] - # for i in range(1, len(phone_numbers)): - # phone_numbers_string += "," + phone_numbers[i] - # return hashlib.sha256("appkey=" + appkey + "&random=" + str(rnd) + "&time=" - # + str(cur_time) + "&mobile=" + phone_numbers_string).hexdigest() - - # def calculate_sig_for_templ(self, appkey, rnd, cur_time, phone_number): - # phone_numbers = [phone_number] - # return self.calculate_sig_for_templ_phone_numbers(appkey, rnd, cur_time, phone_numbers) - - # def phone_numbers_to_list(self, nation_code, phone_numbers): - # tel = [] - # for phone_number in phone_numbers: - # tel.append({"nationcode": nation_code, "mobile":phone_number}) - # return tel - - def send_post_request(self, host, url, data): - con = None - try: - con = httplib.HTTPSConnection(host) - con.request('POST', url, json.dumps(data)) - response = con.getresponse() - if '200' != str(response.status): - obj = {} - obj["result"] = -1 - obj["errmsg"] = "connect failed:\t"+str(response.status) + " " + response.reason - result = json.dumps(obj) - else: - result = response.read() - except Exception,e: - obj = {} - obj["result"] = -2 - obj["errmsg"] = "connect failed:\t" + str(e) - result = json.dumps(obj) - return False, result - finally: - if con: - con.close() - return True, result - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="script for sending alarm sms_type") - parser.add_argument("subject", help="the subject of the alarm sms") - parser.add_argument("content", help="the content of the alarm sms") - parser.add_argument("receiver", help="the phone number who receive the sms") - args = parser.parse_args() - - ss = SmsSingleSender(SmsSingleSender.appid, SmsSingleSender.appkey) - receiver = json.loads(args.receiver) - status, response = ss.send(0, 86, receiver["phone"], "{0}{1}".format(SmsSingleSender.template, args.content), "", "") - print status, response diff --git a/inspector/README.md b/inspector/README.md index 04afa64..f0eae6f 100644 --- a/inspector/README.md +++ b/inspector/README.md @@ -2,7 +2,7 @@ #### 报警方法 -​ 监控系统本身要监控许多种服务指标以及系统指标,而且各种指标的变化和监控的重点也是不一样的,针对不同的指标采用合适的报警算法,可以大大提高监控的准确性,降低误报率。目前我们应用的几种算法都是比较普遍的,主要有最大值,最小值,环比,Top, Bottom, Nodata, Last, Diff 下面我分别介绍一下这几种算法的具体实现和应用场景。 +​ 监控系统本身要监控许多种服务指标以及系统指标,而且各种指标的变化和监控的重点也是不一样的,针对不同的指标采用合适的报警算法,可以大大提高监控报警的准确性,降低误报率。目前我们应用的几种算法都是比较普遍的,主要有最大值,最小值,环比,Top, Bottom, Nodata, Last, Diff,平均值 下面分别介绍一下这几种算法的具体实现和应用场景。 ##### 最大值 @@ -14,7 +14,7 @@ ##### 环比 -​ 当前时间段的数据集的平均值(data2)与之前某一段时间段的数据集的平均值(data1)进行差值然后除以之前数据集的平均值,公式是:(data2 – data1 / data1) * 100 ,之前的的数据平均值需要依赖Number参数,它的单位为天,例如输入1,则是与1天前同一时间点的数据进行比较, 此种算法的具体应用场景是针对那些平时指标曲线比较稳定坡度不是很大服务,但当某一个段时间的数据坡度明显增高或者降低时,说明服务一定有很大的波动,那么我们就要触发相应的报警提示。 +​ 当前时间段的数据集的平均值(data2)与之前某一段时间段的数据集的平均值(data1)进行差值然后除以之前数据集的平均值,公式是:(data2 – data1 / data1) * 100 ,之前的的数据平均值需要依赖Number参数,它的单位为分钟,例如输入1,则是与1分钟前同一时间点的数据进行比较, 此种算法的具体应用场景是针对那些平时指标曲线比较稳定坡度不是很大服务,但当某一个段时间的数据坡度明显增高或者降低时,说明服务一定有很大的波动,那么我们就要触发相应的报警提示。 ##### Top @@ -30,12 +30,16 @@ ##### Last -​ 采集数据的数据集中选自然的前Number个点和阈值进行比较,所有点都满足阈值比对时才触发报警。 +​ 采集数据集中所有的点并选前Number个自然点和阈值进行比较,所有点都满足阈值比对时才触发报警。 ##### Diff ​ 采集数据集中的所有点,若这些点的值有不一样的时候,返回1,否则返回0。 +##### 平均值 +​ 采集数据集所有点的平均值。当需要计算某时间段内所有数据点的平均值时可以使用此方法。 + + diff --git a/inspector/compute.go b/inspector/compute.go index a3ce1d3..89dc78c 100644 --- a/inspector/compute.go +++ b/inspector/compute.go @@ -20,6 +20,7 @@ const ( LAST_METHOD = "last" DIFF_METHOD = "diff" NODATA_METHOD = "nodata" + AVG_METHOD = "avg" ) func maxMethod(host_id string, cycle int, trigger *types.Trigger) (*types.TriggerResultSet, error) { @@ -408,6 +409,40 @@ func nodataMethod(host_id string, cycle int, trigger *types.Trigger) (*types.Tri return trigger_result_set, nil } +func avgMethod(host_id string, cycle int, trigger *types.Trigger) (*types.TriggerResultSet, error) { + trigger_result_set := &types.TriggerResultSet{TriggerResults: make([]*types.TriggerResult, 0), Triggered: false} + + params := NewQueryParams(host_id, fmt.Sprintf("%dm-ago", cycle), "", trigger.Tags, "sum", trigger.Metric) + results, err := tsdbClient.Query(params) + if err != nil { + return nil, err + } + + for _, result := range results { + if len(result.Dps) == 0 { + continue + } + + parameters := make(map[string]interface{}, 8) + current_threshold := avg(result.Dps) + parameters["current_threshold"] = current_threshold + parameters["threshold"] = trigger.Threshold + expression := fmt.Sprintf("current_threshold %s threshold", trigger.Symbol) + trigger_result, err := compute(parameters, expression) + if err != nil { + return trigger_result_set, err + } + + if !trigger_result_set.Triggered && trigger_result { + trigger_result_set.Triggered = trigger_result + } + + trigger_result_set.TriggerResults = append(trigger_result_set.TriggerResults, types.NewTriggerResult(trigger.Index, result.Tags, result.AggregateTags, current_threshold, trigger_result)) + } + + return trigger_result_set, nil +} + func compute(params map[string]interface{}, express string) (bool, error) { expression, err := govaluate.NewEvaluableExpression(express) if err != nil { diff --git a/inspector/inspector.go b/inspector/inspector.go index 52401f5..177c219 100644 --- a/inspector/inspector.go +++ b/inspector/inspector.go @@ -133,6 +133,8 @@ func (this *Inspector) processTask(task *types.AlarmTask) { trigger_result_set, err = diffMethod(task.Host.ID, task.Strategy.Cycle, trigger) case NODATA_METHOD: trigger_result_set, err = nodataMethod(task.Host.ID, task.Strategy.Cycle, trigger) + case AVG_METHOD: + trigger_result_set, err = avgMethod(task.Host.ID, task.Strategy.Cycle, trigger) default: err = errors.New(fmt.Sprintf("Trigger method %v not found", trigger.Method)) } diff --git a/inspector/tsdb.go b/inspector/tsdb.go index 959e8ae..d27bf8a 100644 --- a/inspector/tsdb.go +++ b/inspector/tsdb.go @@ -106,7 +106,7 @@ func NewQueryParams(host_id, start, end string, rawTags string, aggregator strin if _, ok := tags["hostname"]; !ok { tags["uuid"] = host_id } - queries := []Query{{Aggregator: aggregator, Metric: metric, Tags: tags}} + queries := []Query{Query{Aggregator: aggregator, Metric: metric, Tags: tags}} return &QueryParams{Start: start, End: end, Queries: queries} }