Skip to content

Commit

Permalink
Merge pull request ManageIQ#22674 from kbrock/speed_delete
Browse files Browse the repository at this point in the history
Add deep delete functionality
  • Loading branch information
Fryguy authored Nov 1, 2023
2 parents 6e66c89 + 43c2652 commit b9698d8
Show file tree
Hide file tree
Showing 2 changed files with 250 additions and 0 deletions.
195 changes: 195 additions & 0 deletions lib/manageiq/deep_delete.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
# USAGE:
#
# toggle_console_sql_logging
# ManageIQ::DeepDelete.delete(ExtManagementSystem.find(2))
#
# currently only tested on ContainerManager / ems
module ManageIQ
class DeepDelete
include Vmdb::Logging
# list of callbacks that we have reviewed and can safely ignore
IGNORE_CALLBACKS = {
# Storages#all_relationships(ems_metadata) - we're deleting all children anyway
"Storages" => 1,
# EmsFolders#all_relationships(ems_metadata)
"EmsFolders" => 1,
# EmsClusters#all_relationships(ems_metadata)
"EmsClusters" => 1,
# ResourcePools#all_relationships(ems_metadata) - TODO: do we want ancestry here?
"ResourcePools" => 1,
# MonitorManager => ExtManagementSystem
"ExtManagementSystem" => 1,
# ContainerManager#monitoring_manager (apply_orphan_strategy?)
# MonitorManager#endpoints: endpoint_destroyed (using this/destroy_queue instead of dependent => destroy)
# ContainerManager#persistent_volumes (?)
# InfraManager#orchestration_templates (check_not_in_use (before destroy)
# InfraManager#orchestration_stacks apply_orphan_strategy
# BlacklistedEvent#reload_all_server_settings, BlacklistedEvent#audit_deletion
"BlacklistedEvent" => 2,
# EventStream.after_commit :emit_notifications, :on => :create
"EventStream" => 1
# VmOrTemplate#apply_orphan_strategy (/via ancesty - needed to destroy tree)
# Relationship#apply_orphan_strategy (/via ancestry - needed to destroy tree)
# Endpoint#endpoint_destroyed (using this/destroy_queue instead of dependent => destroy)
}.freeze

IGNORE_CONSTRAINTS = {
# Hardware.disks is an order clause
"Hardware.disks" => true,
# MiqAlertStatus.miq_alert_status_actions is an order clause
"MiqAlertStatus.miq_alert_status_actions" => true,
# BinaryBlob.binary_blob_parts is an order clause
"BinaryBlob.binary_blob_parts" => true
}.freeze

# true to recurse into associations that have no values and would be skipped
# Useful for developers to ensure all relationships could be reached
attr_accessor :visit_skips

def initialize(visit_skips: false)
@visit_skips = visit_skips
end

# @param scope the record(s) to be destroyed - only a single child class/type can be passed in
def self.delete(scope, **options)
if scope.kind_of?(ActiveRecord::Base)
# convert an individual record into a scope
scope = scope.class.where(:id => scope.id)
else
# update scope.klass to a more specific class.
# Ems child classes have additional relationships defined
record = scope.first
if record.nil?
_log.error("DeepDelete found no records")
return
end
scope = record.class.merge(scope)
end

name = scope.klass.name.split(":").last
_, timing = Benchmark.realtime_block(:deep_delete) do
new(**options).recursive_delete(scope, :name => name)
end
_log.info("Finished DeepDelete in #{timing[:deep_delete]} seconds")

self
end

# @param scope [Relation] scope used to reach this relation
# @param klass [Class] class for this relation (default to scope's klass)
# @param name [String] name of this relation relative to the root node
# @param mode [:delete_all, :destroy] what we are to do with this node
# If we are deleting this node, then we do not traverse into children to destroy them
# @return [Numeric] number of outstanding callbacks (so others know not to delete this object)
def recursive_delete(scope, klass = scope.klass, name: klass.name)
_log.debug { "=> deep_delete #{name} begin" }
# TODO: fetch distinct.pluck(:type).map(&:constantize).map { |k| refs_callbacks(k)} - to handle STI?
# current code does not require this but may be more future proof
refs, callbacks = refs_callbacks(scope.klass)

has_record = scope.exists?
if has_record || visit_skips
refs.each do |n, relation|
rscope = rel_scope(klass, relation, scope)
# rscope = select * from hardwares where host_id in (select id from hosts where ems_id in (select id from ems where id = 2))
rname = "#{name}.#{n}"
case relation.options[:dependent]
when :destroy
# If we recurse to self without ensuring a record exists, then this will be an infinite loop
if has_record || relation.klass != scope.klass
recursive_delete(rscope, :name => rname)
end
when :delete_all
run(rname, "delete") { batch(rscope) { |subset| subset.delete_all } }
when :nullify
run(rname, "null") { rscope.update_all(relation.foreign_key => nil) } # rubocop:disable Rails/SkipsModelValidations
else
raise "unknown relation dependent option #{rname} :dependent => #{relation.options[:dependent]}"
end
end
end

if !has_record
skip_run(name, callbacks ? "destroy" : "delete")
elsif callbacks
# issue - this works for after destroy, but may have problems with a before destroy hook
# they can reference children
run(name, "destroy") { batch(scope) { |subset| subset.destroy_all.count } }
else
run(name, "delete") { batch(scope) { |subset| subset.delete_all } }
end
_log.debug { "/> deep_delete #{name}" }
end

private

# @param scope starting table
# @param relation foreign table (i.e.: relation = scope.klass.send(relation.name))
def rel_scope(klass, relation, scope)
# Hardware#disks MiqAlertStatus#miq_alert_status_action have order constraint - so are false positives
# TODO: TEST harnes if there is a constraint, don't have dependent destroy
if !relation.constraints.empty? && !IGNORE_CONSTRAINTS["#{scope.klass.name}.#{relation.name}"]
_log.warn("CONSTRAINT: #{scope.klass.name}.#{relation.name} not handled")
end

# For a :through, point to the link record not the target record
# ASIDE: chances are the :through relation also has a destroy, so this will be a no-op
if relation.options[:through]
_log.warn("RELATION: #{scope.klass.name}.#{relation.name} has a :through. move destroy to linking record")
relation = klass.reflections[relation.options[:through].to_s]
end

relation.chain.reverse.inject(scope) do |sc, ar|
ret = ar.klass.where(ar.join_primary_key => sc.select(ar.join_foreign_key))
ar.type ? ret.where(ar.type => sc.klass.polymorphic_name) : ret
end
end

# @param klass [Class] class of model that has reflections
# @return [Array<Reflection>, Boolean]
def refs_callbacks(klass)
dependent_refs = klass.reflections.select { |_, v| v.options[:dependent] }
callback_count = klass._destroy_callbacks.count + klass._commit_callbacks.count
ruby_callback_count = callback_count - dependent_refs.count

ruby_callbacks_we_can_ignore = IGNORE_CALLBACKS[klass.name]&.to_i || IGNORE_CALLBACKS[klass.base_class.name].to_i
need_to_call_ruby_callbacks = ruby_callback_count > ruby_callbacks_we_can_ignore
[dependent_refs, need_to_call_ruby_callbacks]
end

# in_batches for delete or destroy (not nullify)
# similar to:
# scope.in_batches(of: batch_size, :load => true).destroy_all.count
#
# @block takes a subscope and returns a count
def batch(scope, batch_size: 1000, &block)
pk = scope.primary_key
total = 0

loop do
if (id = scope.order(pk).limit(1).offset(batch_size).pluck(pk).first)
total += block.call(scope.where("#{pk} < ?", id))
else
return total += block.call(scope)
end
end
end

def run(name, action, &block)
count, timings = Benchmark.realtime_block(action, &block)
_log.info("%-7{action} %{name} %{count} records in %.6<timings>d seconds" % {
:action => action,
:count => count,
:name => name,
:timings => timings[action]
})
end

def skip_run(name, action)
_log.debug("%-7{action} %{name} skipped" % {
:action => action,
:name => name
})
end
end
end
55 changes: 55 additions & 0 deletions spec/lib/manageiq/deep_delete_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
RSpec.describe ManageIQ::DeepDelete do
let(:ems) { FactoryBot.create(:ems_container) }
let(:ems_other) { FactoryBot.create(:ems_container) }

it "calls destroy for objects with ruby callbacks" do
expect_any_instance_of(ems.class).to receive(:destroy)

described_class.delete(ems)
end

it "deletes Ems#has_many" do
child1 = FactoryBot.create(:storage, :ext_management_system => ems)
child2 = FactoryBot.create(:storage, :ext_management_system => ems_other)
described_class.delete(ems)

expect(child1).to be_deleted
expect(child2).not_to be_deleted
end

it "nulls out Ems#has_many" do
child1 = FactoryBot.create(:vm, :ext_management_system => ems)
child2 = FactoryBot.create(:vm, :ext_management_system => ems_other)
described_class.delete(ems)

expect(child1.reload.ext_management_system).to eq(nil)
expect(child2.reload.ext_management_system).to eq(ems_other)
end

it "deletes (child STI class) ContainerManager#has_many" do
child1 = FactoryBot.create(:container_service, :ext_management_system => ems)
child2 = FactoryBot.create(:container_service, :ext_management_system => ems_other)

described_class.delete(ExtManagementSystem.where(:id => ems.id))
expect(child1).to be_deleted
expect(child2).not_to be_deleted
end

it "deletes has_many :through deletes the join record not the target record" do
ems = FactoryBot.create(:ems_cloud)
net_ems = ems.network_manager
subnet = FactoryBot.create(:cloud_subnet, :ext_management_system => net_ems)
# has_many :cloud_subnet_network_port, :dependent => destroy
# has_many :network_ports, :through => :cloud_subnet_network_ports
# we've since moved the :dependent => :destroy from network_ports, to cloud_subnet_network_port
# but it would have the same outcome
net_port = FactoryBot.create(:network_port, :ext_management_system => net_ems)
subnet_port = FactoryBot.create(:cloud_subnet_network_port, :cloud_subnet => subnet, :network_port => net_port)

described_class.delete(subnet)

expect(subnet).to be_deleted
expect(net_port).not_to be_deleted
expect(subnet_port).to be_deleted
end
end

0 comments on commit b9698d8

Please sign in to comment.