From 7f8c91033921e0ac3ed925612645ec45df2fff79 Mon Sep 17 00:00:00 2001 From: Martin Matyas Date: Wed, 10 Jul 2024 09:19:14 +0200 Subject: [PATCH] Add possibility to change node drain duration (#2102) Add possibility to change duration of node drain litmus chaos test. This is needed for CNFs with longer startup/shutdown. Additionally, fix litmus waiter code and timeout module. Slight refactor of LitmusManager module. Refs: #2098 Signed-off-by: Martin Matyas --- USAGE.md | 1 + src/tasks/litmus_setup.cr | 65 ++++++++++++------------------ src/tasks/utils/chaos_templates.cr | 4 +- src/tasks/utils/timeouts.cr | 6 +-- 4 files changed, 33 insertions(+), 43 deletions(-) diff --git a/USAGE.md b/USAGE.md index d705d6e01..1b4476b59 100644 --- a/USAGE.md +++ b/USAGE.md @@ -157,6 +157,7 @@ CNF_TESTSUITE_RESOURCE_CREATION_TIMEOUT=120 CNF_TESTSUITE_NODE_READINESS_TIMEOUT=240 CNF_TESTSUITE_POD_READINESS_TIMEOUT=180 CNF_TESTSUITE_LITMUS_CHAOS_TEST_TIMEOUT=1800 +CNF_TESTSUITE_NODE_DRAIN_TOTAL_CHAOS_DURATION=90 ``` #### Running The Linter in Developer Mode diff --git a/src/tasks/litmus_setup.cr b/src/tasks/litmus_setup.cr index ce108019a..4774910c0 100644 --- a/src/tasks/litmus_setup.cr +++ b/src/tasks/litmus_setup.cr @@ -58,6 +58,7 @@ module LitmusManager DOWNLOADED_LITMUS_FILE = "litmus-operator-downloaded.yaml" MODIFIED_LITMUS_FILE = "litmus-operator-modified.yaml" LITMUS_NAMESPACE = "litmus" + LITMUS_K8S_DOMAIN = "litmuschaos.io" @@ -85,60 +86,46 @@ module LitmusManager appNodeName_response.to_s end + private def self.get_status_info(chaos_resource, test_name, output_format, namespace) : {Int32, String} + status_cmd = "kubectl get #{chaos_resource}.#{LITMUS_K8S_DOMAIN} #{test_name} -n #{namespace} -o '#{output_format}'" + Log.info { "Getting litmus status info: #{status_cmd}" } + status_code = Process.run("#{status_cmd}", shell: true, output: status_response = IO::Memory.new, error: stderr = IO::Memory.new).exit_status + status_response = status_response.to_s + Log.info { "status_code: #{status_code}, response: #{status_response}" } + {status_code, status_response} + end + + private def self.get_status_info_until(chaos_resource, test_name, output_format, timeout, namespace, &block) + repeat_with_timeout(timeout: timeout, errormsg: "Litmus response timed-out") do + status_code, status_response = get_status_info(chaos_resource, test_name, output_format, namespace) + status_code == 0 && yield status_response + end + end + ## wait_for_test will wait for the completion of litmus test def self.wait_for_test(test_name, chaos_experiment_name, args, namespace : String = "default") chaos_result_name = "#{test_name}-#{chaos_experiment_name}" - - experimentStatus_cmd = "kubectl get chaosengine.litmuschaos.io #{test_name} -n #{namespace} -o jsonpath='{.status.engineStatus}'" - Log.for("wait_for_test").info { "Checking experiment status #{experimentStatus_cmd}" } if check_verbose(args) - - ## Wait for completion of chaosengine which indicates the completion of chaos - repeat_with_timeout(timeout: LITMUS_CHAOS_TEST_TIMEOUT, errormsg: "Litmus test has timed-out") do - status_code = Process.run("#{experimentStatus_cmd}", - shell: true, - output: experimentStatus_response = IO::Memory.new, - error: stderr = IO::Memory.new).exit_status - Log.for("wait_for_test").info { "#{chaos_experiment_name} status_code: #{status_code}" } if check_verbose(args) - experimentStatus = experimentStatus_response.to_s - Log.for("wait_for_test").info {"#{chaos_experiment_name} experiment status: " + experimentStatus} - if (experimentStatus != "Waiting for Job Creation" && experimentStatus != "Running" && experimentStatus != "Completed") - true - else - status_code == 0 && experimentStatus == "Completed" - end + Log.info { "wait_for_test: #{chaos_result_name}" } + + get_status_info_until("chaosengine", test_name, "jsonpath={.status.engineStatus}", LITMUS_CHAOS_TEST_TIMEOUT, namespace) do |engineStatus| + ["completed", "stopped"].includes?(engineStatus) end - verdict_cmd = "kubectl get chaosresults.litmuschaos.io #{chaos_result_name} -n #{namespace} -o jsonpath='{.status.experimentStatus.verdict}'" - Log.for("wait_for_test").info { "Checking experiment verdict #{verdict_cmd}" } if check_verbose(args) - ## Check the chaosresult verdict - repeat_with_timeout(timeout: GENERIC_OPERATION_TIMEOUT, errormsg: "Litmus verdict aquiring has timed-out") do - status_code = Process.run("#{verdict_cmd}", - shell: true, - output: verdict_response = IO::Memory.new, - error: stderr = IO::Memory.new).exit_status - Log.for("wait_for_test").info { "status_code: #{status_code}" } if check_verbose(args) - Log.for("wait_for_test").info { "verdict: #{verdict_response.to_s}" } if check_verbose(args) - verdict = verdict_response.to_s - status_code == 0 && verdict != "Awaited" + get_status_info_until("chaosresults", chaos_result_name, "jsonpath={.status.experimentStatus.verdict}", GENERIC_OPERATION_TIMEOUT, namespace) do |verdict| + verdict != "Awaited" end end ## check_chaos_verdict will check the verdict of chaosexperiment def self.check_chaos_verdict(chaos_result_name, chaos_experiment_name, args, namespace : String = "default") : Bool - verdict_cmd = "kubectl get chaosresults.litmuschaos.io #{chaos_result_name} -n #{namespace} -o jsonpath='{.status.experimentStatus.verdict}'" - Log.for("LitmusManager.check_chaos_verdict").debug { "Checking experiment verdict with command: #{verdict_cmd}" } - status_code = Process.run("#{verdict_cmd}", shell: true, output: verdict_response = IO::Memory.new, error: stderr = IO::Memory.new).exit_status - Log.for("LitmusManager.check_chaos_verdict").debug { "status_code: #{status_code}; verdict: #{verdict_response.to_s}" } - verdict = verdict_response.to_s + _, verdict = get_status_info("chaosresult", chaos_result_name, "jsonpath={.status.experimentStatus.verdict}", namespace) - emoji_test_failed= "🗡️💀♻️" if verdict == "Pass" return true else Log.for("LitmusManager.check_chaos_verdict#details").debug do - verdict_details_cmd = "kubectl get chaosresults.litmuschaos.io #{chaos_result_name} -n #{namespace} -o json" - status_code = Process.run("#{verdict_details_cmd}", shell: true, output: verdict_details_response = IO::Memory.new, error: stderr = IO::Memory.new).exit_status - "#{verdict_details_response.to_s}" + status_code, verdict_details_response = get_status_info("chaosresult", chaos_result_name, "json", namespace) + "#{verdict_details_response}" end Log.for("LitmusManager.check_chaos_verdict").info {"#{chaos_experiment_name} chaos test failed: #{chaos_result_name}, verdict: #{verdict}"} diff --git a/src/tasks/utils/chaos_templates.cr b/src/tasks/utils/chaos_templates.cr index e19e3644b..b130373a2 100644 --- a/src/tasks/utils/chaos_templates.cr +++ b/src/tasks/utils/chaos_templates.cr @@ -1,3 +1,5 @@ +NODE_DRAIN_TOTAL_CHAOS_DURATION = ENV.has_key?("CNF_TESTSUITE_NODE_DRAIN_TOTAL_CHAOS_DURATION") ? ENV["CNF_TESTSUITE_NODE_DRAIN_TOTAL_CHAOS_DURATION"].to_i : 90 + class ChaosTemplates class PodIoStress def initialize( @@ -113,7 +115,7 @@ class ChaosTemplates @deployment_label : String, @deployment_label_value : String, @app_nodename : String, - @total_chaos_duration : String = "90" + @total_chaos_duration : String = "#{NODE_DRAIN_TOTAL_CHAOS_DURATION}" ) end ECR.def_to_s("src/templates/chaos_templates/node_drain.yml.ecr") diff --git a/src/tasks/utils/timeouts.cr b/src/tasks/utils/timeouts.cr index ae8dd2594..fdeef69f0 100644 --- a/src/tasks/utils/timeouts.cr +++ b/src/tasks/utils/timeouts.cr @@ -9,7 +9,7 @@ LITMUS_CHAOS_TEST_TIMEOUT = ENV.has_key?("CNF_TESTSUITE_LITMUS_CHAOS_TEST_TIMEOU def repeat_with_timeout(timeout, errormsg, reset_on_nil=false, delay=2, &block) start_time = Time.utc - while (Time.utc - start_time).seconds < timeout + while (Time.utc - start_time).to_i < timeout result = yield if result.nil? if reset_on_nil @@ -21,8 +21,8 @@ def repeat_with_timeout(timeout, errormsg, reset_on_nil=false, delay=2, &block) return true end sleep delay - Log.for("verbose").info { "Time left: #{timeout - (Time.utc - start_time).seconds} seconds" } + Log.for("verbose").info { "Time left: #{timeout - (Time.utc - start_time).to_i} seconds" } end Log.error { errormsg } false -end \ No newline at end of file +end