From be31d108bf7c0c15fed692eacc806936fdfa09fd Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Tue, 13 Aug 2024 16:23:17 +0100 Subject: [PATCH 1/5] Add RabbitMQ Quorum queue migration playbook --- .../ansible/migrate-rabbitmq-queues.yml | 114 ++++++++++++++++++ ...t-migration-playbook-4721c2e37ac9d3c0.yaml | 4 + 2 files changed, 118 insertions(+) create mode 100644 etc/kayobe/ansible/migrate-rabbitmq-queues.yml create mode 100644 releasenotes/notes/rabbit-migration-playbook-4721c2e37ac9d3c0.yaml diff --git a/etc/kayobe/ansible/migrate-rabbitmq-queues.yml b/etc/kayobe/ansible/migrate-rabbitmq-queues.yml new file mode 100644 index 000000000..745346038 --- /dev/null +++ b/etc/kayobe/ansible/migrate-rabbitmq-queues.yml @@ -0,0 +1,114 @@ +--- +# Migrate RabbitMQ queues from HA to Quorum +# This is primarily used in CI workflows + +- name: Migrate RabbitMQ queues + hosts: localhost + gather_facts: no + vars: + - rabbitmq_container_name: "rabbitmq" + - services_to_restart: "barbican,blazar,cinder,cloudkitty,designate,heat,ironic,keystone,magnum,manila,neutron,nova,octavia" + tasks: + - name: Checking timedatectl status + command: timedatectl status + register: timedatectl_status + changed_when: false + + - name: Generate kolla configuration + shell: + cmd: > + kayobe overcloud service configuration generate --node-config-dir /tmp/rabbit-migration --kolla-tags none + executable: /bin/bash + run_once: true + delegate_to: localhost + changed_when: false + + - name: Fail if HA is set or Quorum is unset + shell: + cmd: > + grep 'om_enable_rabbitmq_quorum_queues: true' $KOLLA_CONFIG_PATH/globals.yml && + ! grep 'om_enable_rabbitmq_high_availability: true' $KOLLA_CONFIG_PATH/globals.yml + executable: /bin/bash + run_once: true + delegate_to: localhost + changed_when: false + + - name: Fail if the clock is not synchronized + assert: + that: + - "'synchronized: yes' not in timedatectl_status.stdout" + fail_msg: > + timedatectl sees the system clock as unsynchronized. + You may need to force synchronisation using `chronyc makestep`. + Otherwise, please wait for synchronization. + + - name: Inspect the {{ rabbitmq_container_name }} container + shell: + cmd: "docker container inspect --format '{{ '{{' }} .State.Running {{ '}}' }}' {{ rabbitmq_container_name }}" + register: inspection + become: true + + - name: Ensure the {{ rabbitmq_container_name }} container is running + command: "systemctl start kolla-{{ rabbitmq_container_name }}-container.service" + when: inspection.stdout == 'false' + become: true + + - name: Wait for the {{ rabbitmq_container_name }} container to reach state 'Running' + shell: + cmd: "docker container inspect --format '{{ '{{' }} .State.Running {{ '}}' }}' {{ rabbitmq_container_name }}" + register: result + until: result.stdout == 'true' + retries: 10 + delay: 6 + become: true + + - name: Wait for the rabbitmq node to automatically start on container start + command: "docker exec {{ rabbitmq_container_name }} /bin/bash -c 'rabbitmqctl wait /var/lib/rabbitmq/mnesia/rabbitmq.pid --timeout 60'" + when: inspection.stdout == 'false' + become: true + + - name: Generate new configuration and stop services + shell: + cmd: > + kayobe overcloud service configuration generate --node-config-dir /etc/kolla --kolla-skip-tags rabbitmq-ha-precheck && + kayobe kolla ansible run "stop --yes-i-really-really-mean-it" -kt {{ services_to_restart }} && + kayobe kolla ansible run rabbitmq-reset-state + executable: /bin/bash + run_once: true + delegate_to: localhost + tags: rabbit-queue-migration + + - name: Ensure that no queues exist + shell: + cmd: > + docker exec rabbitmq rabbitmqctl list_queues name --silent && + docker exec rabbitmq rabbitmqctl list_exchanges name --silent + | grep -v '^$' + | (! grep -v 'amq.') + executable: /bin/bash + become: true + register: queues + + - name: Redeploy services with quorum queues + shell: + cmd: > + kayobe kolla ansible run deploy -kt {{ services_to_restart }} + executable: /bin/bash + run_once: true + delegate_to: localhost + tags: rabbit-queue-migration + + - name: Inspect RabbitMQ queues + shell: + cmd: "docker exec {{ rabbitmq_container_name }} rabbitmqctl list_queues type" + run_once: true + delegate_to: localhost + register: queues + become: true + + - name: Assert that queues have been migrated + assert: + that: "{{ 'quorum' in queues.stdout }}" + fail_msg: Queue migration has failed. Run the migration manually. + run_once: true + delegate_to: localhost diff --git a/releasenotes/notes/rabbit-migration-playbook-4721c2e37ac9d3c0.yaml b/releasenotes/notes/rabbit-migration-playbook-4721c2e37ac9d3c0.yaml new file mode 100644 index 000000000..dd5aefe10 --- /dev/null +++ b/releasenotes/notes/rabbit-migration-playbook-4721c2e37ac9d3c0.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + Added a new playbook to automate RabbitMQ Quorum queue migrations. From 4e5e88dbe556e3cc2fac69c50bf993efa4700c7e Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Wed, 14 Aug 2024 14:49:19 +0100 Subject: [PATCH 2/5] Explicitly set RabbitMQ queue types in AIO --- etc/kayobe/environments/ci-aio/kolla/globals.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/etc/kayobe/environments/ci-aio/kolla/globals.yml b/etc/kayobe/environments/ci-aio/kolla/globals.yml index 3967a5075..99d48d4e5 100644 --- a/etc/kayobe/environments/ci-aio/kolla/globals.yml +++ b/etc/kayobe/environments/ci-aio/kolla/globals.yml @@ -14,3 +14,7 @@ opensearch_heap_size: 200m # Increase Grafana timeout grafana_start_first_node_retries: 20 + +# Ensure Rabbit is deployed with HA rather than Quorum queues (to test migrations) +om_enable_rabbitmq_high_availability: true +om_enable_rabbitmq_quorum_queues: false From ad8aa2a64990b02ec0f770a02119c54ff7e98850 Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Thu, 15 Aug 2024 14:33:08 +0100 Subject: [PATCH 3/5] Fix rabbit migration playbook --- .../ansible/migrate-rabbitmq-queues.yml | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/etc/kayobe/ansible/migrate-rabbitmq-queues.yml b/etc/kayobe/ansible/migrate-rabbitmq-queues.yml index 745346038..4842cad4d 100644 --- a/etc/kayobe/ansible/migrate-rabbitmq-queues.yml +++ b/etc/kayobe/ansible/migrate-rabbitmq-queues.yml @@ -3,7 +3,7 @@ # This is primarily used in CI workflows - name: Migrate RabbitMQ queues - hosts: localhost + hosts: controllers gather_facts: no vars: - rabbitmq_container_name: "rabbitmq" @@ -14,6 +14,15 @@ register: timedatectl_status changed_when: false + - name: Fail if the clock is not synchronized + assert: + that: + - "'synchronized: yes' not in timedatectl_status.stdout" + fail_msg: > + timedatectl sees the system clock as unsynchronized. + You may need to force synchronisation using `chronyc makestep`. + Otherwise, please wait for synchronization. + - name: Generate kolla configuration shell: cmd: > @@ -33,15 +42,6 @@ delegate_to: localhost changed_when: false - - name: Fail if the clock is not synchronized - assert: - that: - - "'synchronized: yes' not in timedatectl_status.stdout" - fail_msg: > - timedatectl sees the system clock as unsynchronized. - You may need to force synchronisation using `chronyc makestep`. - Otherwise, please wait for synchronization. - - name: Inspect the {{ rabbitmq_container_name }} container shell: cmd: "docker container inspect --format '{{ '{{' }} .State.Running {{ '}}' }}' {{ rabbitmq_container_name }}" From eedc6fababd1f345c983275c4e0d0fd0238ef322 Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Fri, 16 Aug 2024 11:14:36 +0100 Subject: [PATCH 4/5] Refactor RabbitMQ migration playbook into script --- .../ansible/migrate-rabbitmq-queues.yml | 114 ------------------ ...t-migration-playbook-4721c2e37ac9d3c0.yaml | 4 - ...bit-migration-script-4721c2e37ac9d3c0.yaml | 4 + tools/rabbitmq-quorum-migration.sh | 60 +++++++++ 4 files changed, 64 insertions(+), 118 deletions(-) delete mode 100644 etc/kayobe/ansible/migrate-rabbitmq-queues.yml delete mode 100644 releasenotes/notes/rabbit-migration-playbook-4721c2e37ac9d3c0.yaml create mode 100644 releasenotes/notes/rabbit-migration-script-4721c2e37ac9d3c0.yaml create mode 100755 tools/rabbitmq-quorum-migration.sh diff --git a/etc/kayobe/ansible/migrate-rabbitmq-queues.yml b/etc/kayobe/ansible/migrate-rabbitmq-queues.yml deleted file mode 100644 index 4842cad4d..000000000 --- a/etc/kayobe/ansible/migrate-rabbitmq-queues.yml +++ /dev/null @@ -1,114 +0,0 @@ ---- -# Migrate RabbitMQ queues from HA to Quorum -# This is primarily used in CI workflows - -- name: Migrate RabbitMQ queues - hosts: controllers - gather_facts: no - vars: - - rabbitmq_container_name: "rabbitmq" - - services_to_restart: "barbican,blazar,cinder,cloudkitty,designate,heat,ironic,keystone,magnum,manila,neutron,nova,octavia" - tasks: - - name: Checking timedatectl status - command: timedatectl status - register: timedatectl_status - changed_when: false - - - name: Fail if the clock is not synchronized - assert: - that: - - "'synchronized: yes' not in timedatectl_status.stdout" - fail_msg: > - timedatectl sees the system clock as unsynchronized. - You may need to force synchronisation using `chronyc makestep`. - Otherwise, please wait for synchronization. - - - name: Generate kolla configuration - shell: - cmd: > - kayobe overcloud service configuration generate --node-config-dir /tmp/rabbit-migration --kolla-tags none - executable: /bin/bash - run_once: true - delegate_to: localhost - changed_when: false - - - name: Fail if HA is set or Quorum is unset - shell: - cmd: > - grep 'om_enable_rabbitmq_quorum_queues: true' $KOLLA_CONFIG_PATH/globals.yml && - ! grep 'om_enable_rabbitmq_high_availability: true' $KOLLA_CONFIG_PATH/globals.yml - executable: /bin/bash - run_once: true - delegate_to: localhost - changed_when: false - - - name: Inspect the {{ rabbitmq_container_name }} container - shell: - cmd: "docker container inspect --format '{{ '{{' }} .State.Running {{ '}}' }}' {{ rabbitmq_container_name }}" - register: inspection - become: true - - - name: Ensure the {{ rabbitmq_container_name }} container is running - command: "systemctl start kolla-{{ rabbitmq_container_name }}-container.service" - when: inspection.stdout == 'false' - become: true - - - name: Wait for the {{ rabbitmq_container_name }} container to reach state 'Running' - shell: - cmd: "docker container inspect --format '{{ '{{' }} .State.Running {{ '}}' }}' {{ rabbitmq_container_name }}" - register: result - until: result.stdout == 'true' - retries: 10 - delay: 6 - become: true - - - name: Wait for the rabbitmq node to automatically start on container start - command: "docker exec {{ rabbitmq_container_name }} /bin/bash -c 'rabbitmqctl wait /var/lib/rabbitmq/mnesia/rabbitmq.pid --timeout 60'" - when: inspection.stdout == 'false' - become: true - - - name: Generate new configuration and stop services - shell: - cmd: > - kayobe overcloud service configuration generate --node-config-dir /etc/kolla --kolla-skip-tags rabbitmq-ha-precheck && - kayobe kolla ansible run "stop --yes-i-really-really-mean-it" -kt {{ services_to_restart }} && - kayobe kolla ansible run rabbitmq-reset-state - executable: /bin/bash - run_once: true - delegate_to: localhost - tags: rabbit-queue-migration - - - name: Ensure that no queues exist - shell: - cmd: > - docker exec rabbitmq rabbitmqctl list_queues name --silent && - docker exec rabbitmq rabbitmqctl list_exchanges name --silent - | grep -v '^$' - | (! grep -v 'amq.') - executable: /bin/bash - become: true - register: queues - - - name: Redeploy services with quorum queues - shell: - cmd: > - kayobe kolla ansible run deploy -kt {{ services_to_restart }} - executable: /bin/bash - run_once: true - delegate_to: localhost - tags: rabbit-queue-migration - - - name: Inspect RabbitMQ queues - shell: - cmd: "docker exec {{ rabbitmq_container_name }} rabbitmqctl list_queues type" - run_once: true - delegate_to: localhost - register: queues - become: true - - - name: Assert that queues have been migrated - assert: - that: "{{ 'quorum' in queues.stdout }}" - fail_msg: Queue migration has failed. Run the migration manually. - run_once: true - delegate_to: localhost diff --git a/releasenotes/notes/rabbit-migration-playbook-4721c2e37ac9d3c0.yaml b/releasenotes/notes/rabbit-migration-playbook-4721c2e37ac9d3c0.yaml deleted file mode 100644 index dd5aefe10..000000000 --- a/releasenotes/notes/rabbit-migration-playbook-4721c2e37ac9d3c0.yaml +++ /dev/null @@ -1,4 +0,0 @@ ---- -features: - - | - Added a new playbook to automate RabbitMQ Quorum queue migrations. diff --git a/releasenotes/notes/rabbit-migration-script-4721c2e37ac9d3c0.yaml b/releasenotes/notes/rabbit-migration-script-4721c2e37ac9d3c0.yaml new file mode 100644 index 000000000..8aa993885 --- /dev/null +++ b/releasenotes/notes/rabbit-migration-script-4721c2e37ac9d3c0.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + Added a script to automate RabbitMQ Quorum queue migrations. diff --git a/tools/rabbitmq-quorum-migration.sh b/tools/rabbitmq-quorum-migration.sh new file mode 100755 index 000000000..2b4e973dd --- /dev/null +++ b/tools/rabbitmq-quorum-migration.sh @@ -0,0 +1,60 @@ +#! /usr/bin/bash + +set -ex + +RABBITMQ_SERVICES_TO_RESTART=barbican,blazar,cinder,cloudkitty,designate,heat,ironic,keystone,magnum,manila,neutron,nova,octavia +RABBITMQ_CONTAINER_NAME=rabbitmq + +if [[ ! $KAYOBE_CONFIG_PATH ]]; then + echo "Environment variable \$KAYOBE_CONFIG_PATH is not defined" + echo "Ensure your environment is set up to run kayobe commands" + exit 2 +fi + +if [[ ! "$1" = "--skip-checks" ]]; then + # Fail if clocks are not synced + if ! kayobe overcloud host command run -l controllers -b --command "timedatectl status | grep 'synchronized: yes'"; then + echo "Failed precheck: Time not synced on controllers" + echo "Use 'timedatectl status' to check sync state" + echo "Either wait for sync or use 'chronyc makestep'" + exit 1 + fi + kayobe overcloud service configuration generate --node-config-dir /tmp/rabbit-migration --kolla-tags none + # Fail if HA is set or Quorum is not + if ! grep 'om_enable_rabbitmq_quorum_queues: true' $KOLLA_CONFIG_PATH/globals.yml || grep 'om_enable_rabbitmq_high_availability: true' $KOLLA_CONFIG_PATH/globals.yml; then + echo "Failed precheck: om_enable_rabbitmq_quorum_queues must be enabled, om_enable_rabbitmq_high_availability must be disabled" + exit 1 + fi +fi + +# Generate new config, stop services using rabbit, and reset rabbit state +kayobe overcloud service configuration generate --node-config-dir /etc/kolla --kolla-skip-tags rabbitmq-ha-precheck && +kayobe kolla ansible run "stop --yes-i-really-really-mean-it" -kt $RABBITMQ_SERVICES_TO_RESTART && +kayobe kolla ansible run rabbitmq-reset-state + +if [[ ! "$1" = "--skip-checks" ]]; then + # Fail if any queues still exist + sleep 20 + if kayobe overcloud host command run -l controllers -b --command "docker exec $RABBITMQ_CONTAINER_NAME rabbitmqctl list_queues name --silent | grep -v '^$'"; then + echo "Failed check: RabbitMQ has not stopped properly, queues still exist" + exit 1 + fi + # Fail if any exchanges still exist (excluding those starting with 'amq.') + if kayobe overcloud host command run -l controllers -b --command "docker exec $RABBITMQ_CONTAINER_NAME rabbitmqctl list_exchanges name --silent | grep -v '^$' | grep -v '^amq.'"; then + echo "Failed check: RabbitMQ has not stopped properly, exchanges still exist" + exit 1 + fi +fi + +# Redeploy with Quorum Queues enabled +kayobe kolla ansible run deploy-containers -kt $RABBITMQ_SERVICES_TO_RESTART + +if [[ ! "$1" = "--skip-checks" ]]; then + sleep 20 + # Assert that at least one quorum queue exists on each controller + if kayobe overcloud host command run -l controllers -b --command "docker exec $RABBITMQ_CONTAINER_NAME rabbitmqctl list_queues type | grep quorum"; then + echo "Queues migrated successfully" + else + echo "Failed post-check: A controller does not have any Quorum queues" + fi +fi From 9cfefa6ab485cb5aae43b9699bfbc5628279113b Mon Sep 17 00:00:00 2001 From: Alex-Welsh Date: Tue, 20 Aug 2024 10:33:04 +0100 Subject: [PATCH 5/5] Address RabbitMQ quorum PR nits --- etc/kayobe/environments/ci-aio/kolla/globals.yml | 2 +- .../rabbit-migration-script-4721c2e37ac9d3c0.yaml | 2 +- tools/rabbitmq-quorum-migration.sh | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/etc/kayobe/environments/ci-aio/kolla/globals.yml b/etc/kayobe/environments/ci-aio/kolla/globals.yml index 99d48d4e5..9f058ed19 100644 --- a/etc/kayobe/environments/ci-aio/kolla/globals.yml +++ b/etc/kayobe/environments/ci-aio/kolla/globals.yml @@ -15,6 +15,6 @@ opensearch_heap_size: 200m # Increase Grafana timeout grafana_start_first_node_retries: 20 -# Ensure Rabbit is deployed with HA rather than Quorum queues (to test migrations) +# Ensure Rabbit is deployed with HA rather than quorum queues (to test migrations) om_enable_rabbitmq_high_availability: true om_enable_rabbitmq_quorum_queues: false diff --git a/releasenotes/notes/rabbit-migration-script-4721c2e37ac9d3c0.yaml b/releasenotes/notes/rabbit-migration-script-4721c2e37ac9d3c0.yaml index 8aa993885..f019551ae 100644 --- a/releasenotes/notes/rabbit-migration-script-4721c2e37ac9d3c0.yaml +++ b/releasenotes/notes/rabbit-migration-script-4721c2e37ac9d3c0.yaml @@ -1,4 +1,4 @@ --- features: - | - Added a script to automate RabbitMQ Quorum queue migrations. + Added a script to automate RabbitMQ quorum queue migrations. diff --git a/tools/rabbitmq-quorum-migration.sh b/tools/rabbitmq-quorum-migration.sh index 2b4e973dd..b24e0d446 100755 --- a/tools/rabbitmq-quorum-migration.sh +++ b/tools/rabbitmq-quorum-migration.sh @@ -20,7 +20,7 @@ if [[ ! "$1" = "--skip-checks" ]]; then exit 1 fi kayobe overcloud service configuration generate --node-config-dir /tmp/rabbit-migration --kolla-tags none - # Fail if HA is set or Quorum is not + # Fail if HA is set or quorum is not if ! grep 'om_enable_rabbitmq_quorum_queues: true' $KOLLA_CONFIG_PATH/globals.yml || grep 'om_enable_rabbitmq_high_availability: true' $KOLLA_CONFIG_PATH/globals.yml; then echo "Failed precheck: om_enable_rabbitmq_quorum_queues must be enabled, om_enable_rabbitmq_high_availability must be disabled" exit 1 @@ -28,8 +28,8 @@ if [[ ! "$1" = "--skip-checks" ]]; then fi # Generate new config, stop services using rabbit, and reset rabbit state -kayobe overcloud service configuration generate --node-config-dir /etc/kolla --kolla-skip-tags rabbitmq-ha-precheck && -kayobe kolla ansible run "stop --yes-i-really-really-mean-it" -kt $RABBITMQ_SERVICES_TO_RESTART && +kayobe overcloud service configuration generate --node-config-dir /etc/kolla --kolla-skip-tags rabbitmq-ha-precheck +kayobe kolla ansible run "stop --yes-i-really-really-mean-it" -kt $RABBITMQ_SERVICES_TO_RESTART kayobe kolla ansible run rabbitmq-reset-state if [[ ! "$1" = "--skip-checks" ]]; then @@ -46,7 +46,7 @@ if [[ ! "$1" = "--skip-checks" ]]; then fi fi -# Redeploy with Quorum Queues enabled +# Redeploy with quorum queues enabled kayobe kolla ansible run deploy-containers -kt $RABBITMQ_SERVICES_TO_RESTART if [[ ! "$1" = "--skip-checks" ]]; then @@ -55,6 +55,6 @@ if [[ ! "$1" = "--skip-checks" ]]; then if kayobe overcloud host command run -l controllers -b --command "docker exec $RABBITMQ_CONTAINER_NAME rabbitmqctl list_queues type | grep quorum"; then echo "Queues migrated successfully" else - echo "Failed post-check: A controller does not have any Quorum queues" + echo "Failed post-check: A controller does not have any quorum queues" fi fi