Skip to content

Commit

Permalink
add new options to keep services running as long as they are ok.
Browse files Browse the repository at this point in the history
The issue with the options:

- host_down_disable_service_checks
- service_skip_check_dependency_status
- service_skip_check_host_down_status
- host_skip_check_dependency_status

is that reports break because hosts/services suddenly stop executing and
keep their OK state. Which makes those options pretty unusable.

So in order to keep reporting correct, you need to keep services running, even
if the host is down. With these new options, hosts/services keep on running as
long as they are up. And as soon as the service is down, it stops running until
the host comes back up. That way naemon has to do less checks, especially less
checks which run into timeouts and such but reporting is still correct.

The option service_skip_check_dependency_status=-2 will also be used for service parents.

Recommended settings are:

host_down_disable_service_checks=1       ; disable service checks if host is down
service_skip_check_host_down_status=-2   ; but keep running as long as they are ok
service_skip_check_dependency_status=-2  ; same, but for dependency checks.
host_skip_check_dependency_status=-2     ; and for host checks
  • Loading branch information
sni committed Mar 18, 2024
1 parent 7031173 commit 58a6f2c
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 4 deletions.
3 changes: 2 additions & 1 deletion sample-config/naemon.cfg.in
Original file line number Diff line number Diff line change
Expand Up @@ -1042,7 +1042,6 @@ allow_empty_hostgroup_assignment=0
#
# While desirable in some environments, enabling this value can distort report
# values as the expected quantity of checks will not have been performed

#host_down_disable_service_checks=0

# SET SERVICE/HOST STATUS WHEN SERVICE CHECK SKIPPED
Expand All @@ -1053,6 +1052,7 @@ allow_empty_hostgroup_assignment=0
# is set to 1.
# Valid values for the service* options are:
# -1 Do not change the service status (default)
# -2 Keep service running as long as it is ok/warning. (also be used for service parents)
# 0 Set the service status to STATE_OK
# 1 Set the service status to STATE_WARNING
# 2 Set the service status to STATE_CRITICAL
Expand All @@ -1064,6 +1064,7 @@ allow_empty_hostgroup_assignment=0
# status of a host when its check is skipped due to a failed dependency check.
# Valid values for the host_dependency_skip_check_status are:
# -1 Do not change the service status (default)
# -2 Keep host running as long as it is up.
# 0 Set the host status to STATE_UP
# 1 Set the host status to STATE_DOWN
# 2 Set the host status to STATE_UNREACHABLE
Expand Down
18 changes: 16 additions & 2 deletions src/naemon/checks_service.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,23 @@ static void handle_service_check_event(struct nm_event_execution_properties *evp
if ((temp_host = temp_service->host_ptr) == NULL) {
log_debug_info(DEBUGL_CHECKS, 2, "Host pointer NULL in handle_service_check_event().\n");
return;
} else {
if (temp_host->current_state != STATE_UP) {
}
if (temp_host->current_state != STATE_UP) {
int keep_running = TRUE;
switch (service_skip_check_host_down_status) {
/* only keep running if service is up or host_down_disable_service_checks is disabled */
case SKIP_CHECK_KEEP_RUNNING_WHEN_UP:
if (temp_service->current_state > STATE_WARNING) {
log_debug_info(DEBUGL_CHECKS, 2, "Host and service state not UP, so service check will not be performed - will be rescheduled as normal.\n");
keep_running = FALSE;
}
break;
default:
log_debug_info(DEBUGL_CHECKS, 2, "Host state not UP, so service check will not be performed - will be rescheduled as normal.\n");
keep_running = FALSE;
break;
}
if(!keep_running) {
if (service_skip_check_host_down_status >= 0) {
temp_service->current_state = service_skip_check_host_down_status;
if (strstr(temp_service->plugin_output, "(host is down)") == NULL) {
Expand Down
2 changes: 1 addition & 1 deletion src/naemon/configuration.c
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,7 @@ read_config_file(const char *main_config_file, nagios_macros *mac)
}
} else if (!strcmp(variable, "service_skip_check_host_down_status")) {
service_skip_check_host_down_status = atoi(value);
if (service_skip_check_host_down_status < -1 || service_skip_check_host_down_status > 3) {
if (service_skip_check_host_down_status < -2 || service_skip_check_host_down_status > 3) {
nm_asprintf(&error_message, "Illegal value for service_skip_check_host_down_status");
error = TRUE;
break;
Expand Down
1 change: 1 addition & 0 deletions src/naemon/defaults.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
#define DEFAULT_ALLOW_CIRCULAR_DEPENDENCIES 0 /* Allow circular dependencies */
#define DEFAULT_HOST_DOWN_DISABLE_SERVICE_CHECKS 0 /* run service checks if the host is down */
#define DEFAULT_SKIP_CHECK_STATUS -1 /* do not change status by default */
#define SKIP_CHECK_KEEP_RUNNING_WHEN_UP -2 /* run service checks as long as the host and service is up (ok/warning) */

#define DEFAULT_HOST_PERFDATA_FILE_TEMPLATE "[HOSTPERFDATA]\t$TIMET$\t$HOSTNAME$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$"
#define DEFAULT_SERVICE_PERFDATA_FILE_TEMPLATE "[SERVICEPERFDATA]\t$TIMET$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$"
Expand Down

0 comments on commit 58a6f2c

Please sign in to comment.