Skip to content

Commit

Permalink
Cleanup orphan worker rows
Browse files Browse the repository at this point in the history
  • Loading branch information
agrare committed Oct 25, 2023
1 parent 883525a commit 625f118
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 29 deletions.
4 changes: 0 additions & 4 deletions app/models/miq_server/worker_management/kubernetes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ def sync_from_system
# we only have to sync the list of pods and deployments once
ensure_kube_monitors_started if my_server_is_primary?

# Before syncing the workers check for any orphaned worker rows that don't have
# a current pod and delete them
cleanup_orphaned_worker_rows

# Update worker deployments with updated settings such as cpu/memory limits
sync_deployment_settings
end
Expand Down
4 changes: 4 additions & 0 deletions app/models/miq_server/worker_management/monitor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ def monitor_workers
# Cache a list of the native objects backing the miq_workers (e.g.: pods, services, or processes)
sync_from_system

# Before syncing the workers check for any orphaned worker rows that don't have
# a current resource and delete them
cleanup_orphaned_worker_rows

sync_monitor

# Sync the workers after sync'ing the child worker settings
Expand Down
9 changes: 9 additions & 0 deletions app/models/miq_server/worker_management/process.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ def monitor_workers
do_system_limit_exceeded if kill_workers_due_to_resources_exhausted?
end

def cleanup_orphaned_worker_rows
orphaned_rows = miq_workers.where.not(:pid => miq_processes.map(&:pid))
return if orphaned_rows.empty?

_log.warn("Removing orphaned worker rows without processes: #{orphaned_rows.collect(&:pid).inspect}")

orphaned_rows.destroy_all
end

def monitor_active_workers
# Monitor all remaining current worker records
miq_workers.find_current_or_starting.each do |worker|
Expand Down
3 changes: 3 additions & 0 deletions app/models/miq_server/worker_management/systemd.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ def sync_starting_workers
starting
end

def cleanup_orphaned_worker_rows
end

def cleanup_failed_workers
super

Expand Down
47 changes: 22 additions & 25 deletions spec/models/miq_server/worker_management/kubernetes_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,42 +120,39 @@

context "#sync_from_system" do
context "#ensure_kube_monitors_started" do
it "podified, ensures pod monitor started and orphaned rows are removed" do
it "podified, ensures pod monitor started" do
expect(server.worker_manager).to receive(:ensure_kube_monitors_started)
expect(server.worker_manager).to receive(:cleanup_orphaned_worker_rows)
server.worker_manager.sync_from_system
end
end
end

context "#cleanup_orphaned_worker_rows" do
context "podified" do
let(:server2) { EvmSpecHelper.remote_miq_server }
let(:worker) do
FactoryBot.create(:miq_worker, :type => "MiqGenericWorker", :miq_server => server, :last_heartbeat => 5.minutes.ago)
end
let(:server2) { EvmSpecHelper.remote_miq_server }
let(:worker) do
FactoryBot.create(:miq_worker, :type => "MiqGenericWorker", :miq_server => server, :last_heartbeat => 5.minutes.ago)
end

before do
server.worker_manager.current_pods = {"1-generic-active" => {}}
end
before do
server.worker_manager.current_pods = {"1-generic-active" => {}}
end

after do
server.worker_manager.current_pods.clear
end
after do
server.worker_manager.current_pods.clear
end

it "removes this server's orphaned rows" do
worker.update(:system_uid => "1-generic-orphan")
FactoryBot.create(:miq_worker, :type => "MiqGenericWorker", :miq_server => server, :system_uid => "1-generic-active")
server.worker_manager.cleanup_orphaned_worker_rows
expect(MiqWorker.count).to eq(1)
end
it "removes this server's orphaned rows" do
worker.update(:system_uid => "1-generic-orphan")
FactoryBot.create(:miq_worker, :type => "MiqGenericWorker", :miq_server => server, :system_uid => "1-generic-active")
server.worker_manager.cleanup_orphaned_worker_rows
expect(MiqWorker.count).to eq(1)
end

it "skips orphaned rows for other servers" do
worker.update(:miq_server => server2, :system_uid => "1-generic-orphan")
FactoryBot.create(:miq_worker, :type => "MiqGenericWorker", :miq_server => server2, :system_uid => "1-generic-active")
server.worker_manager.cleanup_orphaned_worker_rows
expect(MiqWorker.count).to eq(2)
end
it "skips orphaned rows for other servers" do
worker.update(:miq_server => server2, :system_uid => "1-generic-orphan")
FactoryBot.create(:miq_worker, :type => "MiqGenericWorker", :miq_server => server2, :system_uid => "1-generic-active")
server.worker_manager.cleanup_orphaned_worker_rows
expect(MiqWorker.count).to eq(2)
end
end

Expand Down
15 changes: 15 additions & 0 deletions spec/models/miq_server/worker_management/process_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,19 @@
end
end
end

context "#cleanup_orphaned_worker_rows" do
let!(:worker) do
FactoryBot.create(:miq_worker, :type => "MiqGenericWorker", :miq_server => server, :status => MiqWorker::STATUS_CREATING, :last_heartbeat => 5.minutes.ago)
end

before { server.worker_manager.instance_variable_set(:@miq_processes, []) }
before { server.worker_manager.instance_variable_set(:@miq_processes, nil) }

it "removes this server's orphaned rows" do
byebug
server.worker_manager.cleanup_orphaned_worker_rows
expect(MiqWorker.count).to be_zero
end
end
end
14 changes: 14 additions & 0 deletions spec/models/miq_server/worker_management/systemd_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,20 @@
end
end

context "#cleanup_orphaned_worker_rows" do
let!(:worker) do
FactoryBot.create(:miq_worker, :type => "MiqGenericWorker", :miq_server => server, :status => MiqWorker::STATUS_CREATING, :last_heartbeat => 5.minutes.ago)
end

before { server.worker_manager.instance_variable_set(:@miq_processes, []) }
before { server.worker_manager.instance_variable_set(:@miq_processes, nil) }

it "removes this server's orphaned rows" do
server.worker_manager.cleanup_orphaned_worker_rows
expect(MiqWorker.count).to be_zero
end
end

context "#failed_miq_services (private)" do
before { server.worker_manager.sync_from_system }

Expand Down

0 comments on commit 625f118

Please sign in to comment.