From ddc4d76831fc21ec3367ad4b4d8065ef304cb60a Mon Sep 17 00:00:00 2001 From: Sven Nierlein Date: Thu, 4 Jul 2024 15:47:52 +0200 Subject: [PATCH] launch command worker earlier since the command worker forks the main naemon process, it inherits all open files like ex.: pidfile, logfiles, etc... It will keep those references open, even if the main process rotates and reopens those files. This patch closes query handler and pid file references after starting the command worker and also moves starting the command worker before initializing the neb modules, so it won't inherit open logfiles from neb modules. references: - https://github.com/ConSol-Monitoring/omd/issues/146 Signed-off-by: Sven Nierlein --- src/naemon/commands.c | 8 +++++++ src/naemon/naemon.c | 46 +++++++++++++++++++------------------- src/naemon/query-handler.c | 8 ++++++- src/naemon/query-handler.h | 1 + src/naemon/utils.c | 31 +++++++++++++++---------- src/naemon/utils.h | 1 + 6 files changed, 59 insertions(+), 36 deletions(-) diff --git a/src/naemon/commands.c b/src/naemon/commands.c index 2dd93e7b2..a842f3c20 100644 --- a/src/naemon/commands.c +++ b/src/naemon/commands.c @@ -19,6 +19,7 @@ #include "globals.h" #include "logging.h" #include "nm_alloc.h" +#include "query-handler.h" #include "lib/libnaemon.h" #include #include @@ -388,6 +389,13 @@ int launch_command_file_worker(void) /* make our own process-group so we can be traced into and stuff */ setpgid(0, 0); + + // close inherited file handles + close_log_file(); + close_standard_fds(); + qh_close_socket(); + close_lockfile_fd(); + str = nm_strdup(command_file); free_memory(get_global_macros()); command_file = str; diff --git a/src/naemon/naemon.c b/src/naemon/naemon.c index 72979036a..059154c42 100644 --- a/src/naemon/naemon.c +++ b/src/naemon/naemon.c @@ -543,22 +543,6 @@ int main(int argc, char **argv) nerd_init(); timing_point("Initialized NERD\n"); - /* initialize check workers */ - timing_point("Spawning %u workers\n", wproc_num_workers_spawned); - if (init_workers(num_check_workers) < 0) { - nm_log(NSLOG_RUNTIME_ERROR, "Failed to spawn workers. Aborting\n"); - exit(EXIT_FAILURE); - } - timing_point("Spawned %u workers\n", wproc_num_workers_spawned); - - timing_point("Connecting %u workers\n", wproc_num_workers_online); - i = 0; - while (i < 50 && wproc_num_workers_online < wproc_num_workers_spawned) { - iobroker_poll(nagios_iobs, 50); - i++; - } - timing_point("Connected %u workers\n", wproc_num_workers_online); - /* read in all object config data */ if (result == OK) { timing_point("Reading all object data\n"); @@ -576,6 +560,29 @@ int main(int argc, char **argv) init_event_queue(); timing_point("Initialized Event queue\n"); + registered_commands_init(200); + register_core_commands(); + /* fire up command file worker */ + timing_point("Launching command file worker\n"); + launch_command_file_worker(); + timing_point("Launched command file worker\n"); + + /* initialize check workers */ + timing_point("Spawning %u workers\n", wproc_num_workers_spawned); + if (init_workers(num_check_workers) < 0) { + nm_log(NSLOG_RUNTIME_ERROR, "Failed to spawn workers. Aborting\n"); + exit(EXIT_FAILURE); + } + timing_point("Spawned %u workers\n", wproc_num_workers_spawned); + + timing_point("Connecting %u workers\n", wproc_num_workers_online); + i = 0; + while (i < 50 && wproc_num_workers_online < wproc_num_workers_spawned) { + iobroker_poll(nagios_iobs, 50); + i++; + } + timing_point("Connected %u workers\n", wproc_num_workers_online); + /* load modules */ timing_point("Loading modules\n"); if (neb_load_all_modules() != OK) { @@ -680,13 +687,6 @@ int main(int argc, char **argv) log_service_states(INITIAL_STATES, NULL); timing_point("Logged initial states\n"); - registered_commands_init(200); - register_core_commands(); - /* fire up command file worker */ - timing_point("Launching command file worker\n"); - launch_command_file_worker(); - timing_point("Launched command file worker\n"); - broker_program_state(NEBTYPE_PROCESS_EVENTLOOPSTART, NEBFLAG_NONE, NEBATTR_NONE); /* get event start time and save as macro */ diff --git a/src/naemon/query-handler.c b/src/naemon/query-handler.c index e0481e33c..2d1b75fd1 100644 --- a/src/naemon/query-handler.c +++ b/src/naemon/query-handler.c @@ -394,7 +394,7 @@ int qh_init(const char *path) result = iobroker_register(nagios_iobs, qh_listen_sock, NULL, qh_registration_input); if (result < 0) { g_hash_table_destroy(qh_table); - close(qh_listen_sock); + qh_close_socket(); nm_log(NSLOG_RUNTIME_ERROR, "qh: Failed to register socket with io broker: %s\n", iobroker_strerror(result)); return ERROR; } @@ -408,3 +408,9 @@ int qh_init(const char *path) return 0; } + +void qh_close_socket() { + if( qh_listen_sock > 0 ) + close(qh_listen_sock); + qh_listen_sock = -1; +} \ No newline at end of file diff --git a/src/naemon/query-handler.h b/src/naemon/query-handler.h index e599ff908..7fda0dbc4 100644 --- a/src/naemon/query-handler.h +++ b/src/naemon/query-handler.h @@ -20,6 +20,7 @@ int qh_init(const char *path); void qh_deinit(const char *path); int qh_register_handler(const char *name, const char *description, unsigned int options, qh_handler handler); const char *qh_strerror(int code); +void qh_close_socket(void); NAGIOS_END_DECL diff --git a/src/naemon/utils.c b/src/naemon/utils.c index 1c101f44d..f10c535e8 100644 --- a/src/naemon/utils.c +++ b/src/naemon/utils.c @@ -172,6 +172,8 @@ int host_skip_check_dependency_status = DEFAULT_SKIP_CHECK_STATUS; static long long check_file_size(char *, unsigned long, struct rlimit); +static int lock_file_fd = -1; /* the file handle of the lockfile */ + time_t max_check_result_file_age = DEFAULT_MAX_CHECK_RESULT_AGE; check_stats check_statistics[MAX_CHECK_STATS_TYPES]; @@ -498,7 +500,6 @@ int signal_parent(int sig) int daemon_init(void) { int pid = 0; - int lockfile = 0; int val = 0; char buf[256]; struct flock lock; @@ -509,16 +510,16 @@ int daemon_init(void) umask(S_IWGRP | S_IWOTH); - lockfile = open(lock_file, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH); + lock_file_fd = open(lock_file, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH); - if (lockfile < 0) { + if (lock_file_fd < 0) { nm_log(NSLOG_RUNTIME_ERROR, "Failed to obtain lock on file %s: %s\n", lock_file, strerror(errno)); nm_log(NSLOG_PROCESS_INFO | NSLOG_RUNTIME_ERROR, "Bailing out due to errors encountered while attempting to daemonize... (PID=%d)", (int)getpid()); return (ERROR); } /* see if we can read the contents of the lockfile */ - if ((val = read(lockfile, buf, (size_t)10)) < 0) { + if ((val = read(lock_file_fd, buf, (size_t)10)) < 0) { nm_log(NSLOG_RUNTIME_ERROR, "Lockfile exists but cannot be read"); return (ERROR); } @@ -540,7 +541,7 @@ int daemon_init(void) lock.l_start = 0; lock.l_whence = SEEK_SET; lock.l_len = 0; - if (fcntl(lockfile, F_GETLK, &lock) == -1) { + if (fcntl(lock_file_fd, F_GETLK, &lock) == -1) { nm_log(NSLOG_RUNTIME_ERROR, "Failed to access lockfile '%s'. %s. Bailing out...", lock_file, strerror(errno)); return (ERROR); } @@ -609,9 +610,9 @@ int daemon_init(void) lock.l_whence = SEEK_SET; lock.l_len = 0; lock.l_pid = getpid(); - if (fcntl(lockfile, F_SETLK, &lock) == -1) { + if (fcntl(lock_file_fd, F_SETLK, &lock) == -1) { if (errno == EACCES || errno == EAGAIN) { - fcntl(lockfile, F_GETLK, &lock); + fcntl(lock_file_fd, F_GETLK, &lock); nm_log(NSLOG_RUNTIME_ERROR, "Lockfile '%s' looks like its already held by another instance of Naemon (PID %d). Bailing out, post-fork...", lock_file, (int)lock.l_pid); } else nm_log(NSLOG_RUNTIME_ERROR, "Cannot lock lockfile '%s': %s. Bailing out...", lock_file, strerror(errno)); @@ -620,28 +621,34 @@ int daemon_init(void) } /* write PID to lockfile... */ - lseek(lockfile, 0, SEEK_SET); - if (ftruncate(lockfile, 0) != 0) { + lseek(lock_file_fd, 0, SEEK_SET); + if (ftruncate(lock_file_fd, 0) != 0) { nm_log(NSLOG_RUNTIME_ERROR, "Cannot truncate lockfile '%s': %s. Bailing out...", lock_file, strerror(errno)); return (ERROR); } sprintf(buf, "%d\n", (int)getpid()); - if (nsock_write_all(lockfile, buf, strlen(buf)) != 0) { + if (nsock_write_all(lock_file_fd, buf, strlen(buf)) != 0) { nm_log(NSLOG_RUNTIME_ERROR, "Cannot write PID to lockfile '%s': %s. Bailing out...", lock_file, strerror(errno)); return (ERROR); } /* make sure lock file stays open while program is executing... */ - val = fcntl(lockfile, F_GETFD, 0); + val = fcntl(lock_file_fd, F_GETFD, 0); val |= FD_CLOEXEC; - fcntl(lockfile, F_SETFD, val); + fcntl(lock_file_fd, F_SETFD, val); broker_program_state(NEBTYPE_PROCESS_DAEMONIZE, NEBFLAG_NONE, NEBATTR_NONE); return OK; } +void close_lockfile_fd() { + if(lock_file_fd > 0) + close(lock_file_fd); + lock_file_fd = -1; +} + /******************************************************************/ /************************* FILE FUNCTIONS *************************/ /******************************************************************/ diff --git a/src/naemon/utils.h b/src/naemon/utils.h index f7106a8b6..c9105c6b3 100644 --- a/src/naemon/utils.h +++ b/src/naemon/utils.h @@ -37,6 +37,7 @@ void signal_react(void); /* General signal reaction routines */ void handle_sigxfsz(void); /* handle SIGXFSZ */ int signal_parent(int); /* signal parent when daemonizing */ int daemon_init(void); /* switches to daemon mode */ +void close_lockfile_fd(void); /* close lock_file file handle */ int init_check_stats(void); int update_check_stats(int, time_t);