Skip to content

Commit

Permalink
some ansible
Browse files Browse the repository at this point in the history
  • Loading branch information
gdoteof committed Jun 10, 2023
1 parent 2fedee9 commit aaf6180
Show file tree
Hide file tree
Showing 11 changed files with 198 additions and 5 deletions.
24 changes: 24 additions & 0 deletions .taskfiles/AnsibleTasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,30 @@ tasks:
- ansible-playbook -i {{.ANSIBLE_INVENTORY_DIR}}/hosts.yml {{.ANSIBLE_PLAYBOOK_DIR}}/cluster-nuke.yml
- task: force-reboot

ceph-nuke:
desc: Remove All Ceph Data
dir: "{{.ANSIBLE_DIR}}"
interactive: true
cmds:
- ansible-playbook -i {{.ANSIBLE_INVENTORY_DIR}}/hosts.yml {{.ANSIBLE_PLAYBOOK_DIR}}/ceph-nuke.yml
- task: force-reboot

rancher-nuke:
desc: Remove All Rancher Data (necessary to fully reset ceph)
dir: "{{.ANSIBLE_DIR}}"
interactive: true
cmds:
- ansible-playbook -i {{.ANSIBLE_INVENTORY_DIR}}/hosts.yml {{.ANSIBLE_PLAYBOOK_DIR}}/rancher-nuke.yml
- task: force-reboot


ceph-list:
desc: List All Ceph Data
dir: "{{.ANSIBLE_DIR}}"
interactive: true
cmds:
- ansible-playbook -i {{.ANSIBLE_INVENTORY_DIR}}/hosts.yml {{.ANSIBLE_PLAYBOOK_DIR}}/ceph-list.yml

ping:
desc: Ping all the hosts
dir: "{{.ANSIBLE_DIR}}"
Expand Down
2 changes: 1 addition & 1 deletion Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,4 @@ tasks:
configure:
desc: Configure repository from env settings
cmds:
- ./configure
- echo we don't run ./configure because ceph is not configured for it
1 change: 1 addition & 0 deletions ansible.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ nocows = True
executable = /bin/bash
stdout_callback = yaml
force_valid_group_names = ignore
forks = 10
# File/Directory settings
log_path = ~/.ansible/ansible.log
inventory = ./ansible/inventory
Expand Down
2 changes: 2 additions & 0 deletions ansible/inventory/group_vars/kubernetes/os.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ ubuntu:
- python3-kubernetes
- python3-yaml
- ufw
- ceph
- util-linux
File renamed without changes.
File renamed without changes.
8 changes: 4 additions & 4 deletions ansible/inventory/hosts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ kubernetes:
hosts:
ctrl-k8s-0:
ansible_host: 10.10.1.7
ctrl-k8s-1:
ansible_host: 10.10.1.14
ctrl-k8s-2:
ansible_host: 10.10.1.15
ctrl-k8s-4:
ansible_host: 10.10.1.13
worker:
hosts:
k8s-2:
ansible_host: 10.10.1.15
k8s-1:
ansible_host: 10.10.1.17
k8s-3:
ansible_host: 10.10.1.11
Empty file added ansible/playbooks/ceph-list.yml
Empty file.
130 changes: 130 additions & 0 deletions ansible/playbooks/ceph-nuke.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
---
- hosts:
- master
- worker
become: true
gather_facts: true
any_errors_fatal: true
vars_prompt:
- name: nuke
prompt: |-
Are you sure you want to nuke these ceph disks and the cluster?
@@@
@@@ THIS WILL DESTROY ANY DATA THAT IS ATTACHED TO ANY DEVICE IN YOUR cluster
@@@
Type 'YES I FUCKED UP' to proceed
default: "n"
private: false
pre_tasks:
- name: Check for confirmation
ansible.builtin.fail:
msg: Aborted nuking the cluster
when: nuke != 'YES I FUCKED UP'

- name: Pausing for 5 seconds...
ansible.builtin.pause:
seconds: 2

tasks:
- name: Gather facts from all hosts
setup:
filter: ansible_devices

- name: Display NVMe disks
debug:
msg: "Host: {{ inventory_hostname }}, NVMe Disks: {{ ansible_devices.keys() | select('match', 'nvme.*') | list }}"

- name: Gather facts
setup:
filter: ansible_devices
register: disk_facts

- name: Get root device
command: df -h /
register: root_device
changed_when: false

- name: Parse root device
set_fact:
root_device: "{{ root_device.stdout.split()[0] }}"

- name: Get disk information
set_fact:
disks: >
{{
disks|default([]) +
[{
'hostname': ansible_hostname,
'host_ip': ansible_default_ipv4.address,
'device': item.key,
'mounted': (item.value.partitions | length > 0),
'root_device': (item.key in root_device),
'size': item.value.size
}]
}}
when: "'nvme' in item.key"
with_dict: "{{ disk_facts.ansible_facts.ansible_devices }}"
- name: Check if Ceph is active
shell: ceph health
register: ceph_health
ignore_errors: true

- name: Get Ceph OSD IDs
shell: "ceph osd ls"
register: osd_ids
run_once: true
ignore_errors: true

- name: Stop and unmount Ceph OSDs
when: ceph_ids.rc == 0
block:
- name: Stop the Ceph OSD service
systemd:
name: ceph-osd@{{ item }}
state: stopped
with_items: "{{ osd_ids.stdout_lines }}"
- name: Unmount the Ceph OSD
mount:
path: "/var/lib/ceph/osd/ceph-{{ item }}"
state: unmounted
with_items: "{{ osd_ids.stdout_lines }}"

- name: Reboot
ansible.builtin.reboot:
msg: Rebooting nodes
reboot_timeout: 3600

- name: Get all unmounted disks
set_fact:
unmounted_disks: "{{ disks | selectattr('mounted', 'equalto', false) | list }}"

- name: Remove Crypts
block:
- name: Find Crypt Fingerprints
shell: "dmsetup info -c --noheadings | grep 'CRYPT-' | awk -F: '{{ item.device }}'"
register: latent_crypts
become: true
with_items: "{{ unmounted_disks }}"

- name: Remove Crypts
command: "dmsetup remove '{{ item }}'"
become: true
with_items: "{{ latent_crypts.stdout }}"
when: latent_crylatent_crypts.stdout is defined

- name: Clean unmounted disks
block:
- name: Zap disk
command: "sgdisk --zap-all /dev/{{ item.device }}"
become: true
with_items: "{{ unmounted_disks }}"

- name: Blkdiscard disk
command: "blkdiscard /dev/{{ item.device }}"
become: true
with_items: "{{ unmounted_disks }}"

- name: Partprobe disk
command: "partprobe /dev/{{ item.device }}"
become: true
with_items: "{{ unmounted_disks }}"
7 changes: 7 additions & 0 deletions ansible/playbooks/cluster-nuke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,10 @@
path: "{{ item.path }}"
state: absent
loop: "{{ directory_contents.files }}"

- name: Delete Old Rancher
ansible.builtin.file:
recurse: true
path: "/var/lib/rancher"
state: absent
loop: "{{ directory_contents.files }}"
29 changes: 29 additions & 0 deletions ansible/playbooks/rancher-nuke.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
- hosts:
- master
- worker
become: true
gather_facts: true
any_errors_fatal: true
vars_prompt:
- name: nuke
prompt: |-
Are you sure you want to nuke this rancher?
Type 'YES I WANT TO DESTROY THIS RANCHER' to proceed
default: "n"
private: false
pre_tasks:
- name: Check for confirmation
ansible.builtin.fail:
msg: Aborted nuking the cluster
when: nuke != 'YES I WANT TO DESTROY THIS RANCHER'

- name: Pausing for 5 seconds...
ansible.builtin.pause:
seconds: 5

tasks:
- name: Delete Old Rancher
ansible.builtin.file:
path: "/var/lib/rancher"
state: absent

0 comments on commit aaf6180

Please sign in to comment.