diff --git a/app/models/miq_server/worker_management/kubernetes.rb b/app/models/miq_server/worker_management/kubernetes.rb index 3aff19f85ce8..7f63c08c1dbe 100644 --- a/app/models/miq_server/worker_management/kubernetes.rb +++ b/app/models/miq_server/worker_management/kubernetes.rb @@ -12,10 +12,6 @@ def sync_from_system # we only have to sync the list of pods and deployments once ensure_kube_monitors_started if my_server_is_primary? - # Before syncing the workers check for any orphaned worker rows that don't have - # a current pod and delete them - cleanup_orphaned_worker_rows - # Update worker deployments with updated settings such as cpu/memory limits sync_deployment_settings end @@ -50,6 +46,14 @@ def cleanup_orphaned_worker_rows end end + def cleanup_orphaned_workers + orphaned_pods = current_pods.keys - miq_workers.pluck(:system_uid) + return if orphaned_pods.empty? + + # TODO destroy orphaned pods + orphaned_pods.each { |_pod| } + end + def cleanup_failed_workers super diff --git a/app/models/miq_server/worker_management/monitor.rb b/app/models/miq_server/worker_management/monitor.rb index ee8a374a0c4f..ec7a1dd5d4f7 100644 --- a/app/models/miq_server/worker_management/monitor.rb +++ b/app/models/miq_server/worker_management/monitor.rb @@ -18,6 +18,12 @@ def monitor_workers # Cache a list of the native objects backing the miq_workers (e.g.: pods, services, or processes) sync_from_system + # Cleanup any worker rows that don't have running workers + cleanup_orphaned_worker_rows + + # Cleanup any workers that don't have corresponding miq_workers rows + cleanup_orphaned_workers + sync_monitor # Sync the workers after sync'ing the child worker settings @@ -48,6 +54,14 @@ def sync_workers end end + def cleanup_orphaned_worker_rows + raise NotImplementedError, "cleanup_orphaned_worker_rows must be implemented in a subclass" + end + + def cleanup_orphaned_workers + raise NotImplementedError, "cleanup_orphaned_workers must be implemented in a subclass" + end + def cleanup_failed_workers check_pending_stop clean_worker_records diff --git a/app/models/miq_server/worker_management/process.rb b/app/models/miq_server/worker_management/process.rb index 6c36e163f4e5..a7bb8687d009 100644 --- a/app/models/miq_server/worker_management/process.rb +++ b/app/models/miq_server/worker_management/process.rb @@ -1,13 +1,29 @@ class MiqServer::WorkerManagement::Process < MiqServer::WorkerManagement def sync_from_system require "sys/proctable" - self.miq_processes = Sys::ProcTable.ps.select { |proc| proc.ppid == my_server.pid } + @miq_processes_by_pid = Sys::ProcTable.ps.select { |proc| proc.ppid == my_server.pid }.index_by(&:pid) end def sync_starting_workers MiqWorker.find_all_starting.to_a end + def cleanup_orphaned_worker_rows + orphaned_rows = miq_workers.where.not(:pid => miq_pids) + return if orphaned_rows.empty? + + _log.warn("Removing orphaned worker rows without corresponding processes: #{orphaned_rows.collect(&:pid).inspect}") + orphaned_rows.destroy_all + end + + def cleanup_orphaned_workers + orphaned_workers = miq_pids - miq_workers.pluck(:pid) + return if orphaned_workers.empty? + + _log.warn("Removing orphaned processes without corresponding worker rows: #{orphaned_workers.inspect}") + orphaned_workers.each { |pid| ::Process.kill(9, pid) } + end + def monitor_workers super @@ -70,5 +86,13 @@ def validate_worker(worker) private - attr_accessor :miq_processes + attr_reader :miq_processes_by_pid + + def miq_processes + miq_processes_by_pid.values + end + + def miq_pids + miq_processes_by_pid.keys + end end diff --git a/app/models/miq_server/worker_management/systemd.rb b/app/models/miq_server/worker_management/systemd.rb index 41cd20b68910..a1b553c27461 100644 --- a/app/models/miq_server/worker_management/systemd.rb +++ b/app/models/miq_server/worker_management/systemd.rb @@ -20,6 +20,12 @@ def sync_starting_workers starting end + def cleanup_orphaned_worker_rows + end + + def cleanup_orphaned_workers + end + def cleanup_failed_workers super