diff --git a/ansible/cloud_providers/redfish_baremetal_destroy_env.yml b/ansible/cloud_providers/redfish_baremetal_destroy_env.yml new file mode 100644 index 00000000000..70212189aef --- /dev/null +++ b/ansible/cloud_providers/redfish_baremetal_destroy_env.yml @@ -0,0 +1,92 @@ +--- +- name: Step 000.0 Restore output directory archive + hosts: localhost + connection: local + gather_facts: false + become: false + tasks: + - name: Restore output_dir archive + when: agnosticd_save_output_dir_archive is defined + ansible.builtin.include_role: + name: agnosticd_restore_output_dir + +- name: Step 001.1 Gather Baremetal Server Information + hosts: localhost + connection: local + gather_facts: false + become: false + tasks: + - name: Validate IBM Cloud server ID is provided + ansible.builtin.fail: + msg: "ibm_cloud_server_id is required. Please provide it as an extra var: -e ibm_cloud_server_id=YOUR_SERVER_ID" + when: ibm_cloud_server_id is not defined or ibm_cloud_server_id == "" + + - name: Run baremetal info role + include_role: + name: "infra-ibm-cloud-classic-bm-info" + vars: + ibm_cloud_server_id: "{{ ibm_cloud_server_id }}" + save_to_file: false + display_results: false + +- name: Step 001.2 BMC Operations run from Bastion + hosts: bastions[0] + gather_facts: false + become: false + vars: + ansible_python_interpreter: /usr/bin/python3 + tasks: + - name: Remove console user + include_role: + name: "infra-redfish-user-management" + vars: + # BMC connection parameters + bmc_hostname: "{{ hostvars['localhost']['bm_server_info']['remote_mgmt_ip'] }}" + bmc_username: "{{ hostvars['localhost']['bm_server_info']['remote_mgmt_user'] }}" + bmc_password: "{{ hostvars['localhost']['bm_server_info']['remote_mgmt_password'] }}" + # User management parameters (no recursion) + target_username: "console" + user_action: "delete" + user_role: "PowerUser" + enable_user: false + when: + - cleanup_users | default(true) | bool + - hostvars['localhost']['bm_server_info'] is defined + - hostvars['localhost']['bm_server_info']['remote_mgmt_ip'] != "" + + - name: Force power off servers before cleanup + include_role: + name: "infra-redfish-power-management" + vars: + # BMC connection parameters + bmc_hostname: "{{ hostvars['localhost']['bm_server_info']['remote_mgmt_ip'] }}" + bmc_username: "{{ hostvars['localhost']['bm_server_info']['remote_mgmt_user'] }}" + bmc_password: "{{ hostvars['localhost']['bm_server_info']['remote_mgmt_password'] }}" + # Force power off during destroy + power_action: "force_off" + enable_pxe_boot_and_reset: false + when: + - hostvars['localhost']['bm_server_info'] is defined + - hostvars['localhost']['bm_server_info']['remote_mgmt_ip'] != "" + +- name: Step 001.3 Destroy IBM Cloud Classic VM Resources + hosts: localhost + connection: local + gather_facts: false + become: false + tasks: + - name: Destroy IBM Cloud Classic VMs and Security Groups + include_role: + name: "infra-ibm-cloud-classic-manage-vms" + vars: + ACTION: "destroy" + ibm_cloud_api_key: "{{ ibm_cloud_api_key }}" + output_dir: "{{ output_dir }}" + guid: "{{ guid }}" + + - name: Destroy SSH key in IBM Cloud Classic + include_role: + name: "infra-ibm-cloud-classic-ssh-key" + vars: + ACTION: "destroy" + ibm_cloud_classic_api_key: "{{ ibm_cloud_api_key }}" \ No newline at end of file diff --git a/ansible/cloud_providers/redfish_baremetal_infrastructure_deployment.yml b/ansible/cloud_providers/redfish_baremetal_infrastructure_deployment.yml new file mode 100644 index 00000000000..59844324ccb --- /dev/null +++ b/ansible/cloud_providers/redfish_baremetal_infrastructure_deployment.yml @@ -0,0 +1,293 @@ +--- +- name: Step 001.1 Deploy Redfish Baremetal Infrastructure + hosts: localhost + connection: local + gather_facts: false + become: false + tasks: + - name: Validate IBM Cloud server ID is provided + ansible.builtin.fail: + msg: "ibm_cloud_server_id is required. Please provide it as an extra var: -e ibm_cloud_server_id=YOUR_SERVER_ID" + when: ibm_cloud_server_id is not defined or ibm_cloud_server_id == "" + + - name: Validate IBM Cloud API key is provided + ansible.builtin.fail: + msg: "ibm_cloud_api_key is required. Please provide it as an extra var: -e ibm_cloud_api_key=YOUR_API_KEY" + when: ibm_cloud_api_key is not defined or ibm_cloud_api_key == "" + + - name: Generate secure password if not provided + ansible.builtin.set_fact: + generated_password: "{{ lookup('password', '/dev/null length=12 chars=ascii_letters,digits') }}" + + - name: Ensure generated password contains at least one number + ansible.builtin.set_fact: + generated_password: "{{ generated_password[:-1] + (range(0, 10) | random | string) }}" + when: not (generated_password | regex_search('[0-9]')) + + - name: Ensure generated password contains at least one letter + ansible.builtin.set_fact: + generated_password: "{{ generated_password[:-1] + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] | random }}" + when: not (generated_password | regex_search('[A-Za-z]')) + + - name: Run baremetal info role + ansible.builtin.include_role: + name: "infra-ibm-cloud-classic-bm-info" + vars: + ibm_cloud_server_id: "{{ ibm_cloud_server_id }}" + ibm_cloud_api_key: "{{ ibm_cloud_api_key }}" + save_to_file: false + display_results: true + + - name: Create SSH provision key + ansible.builtin.include_role: + name: "create_ssh_provision_key" + when: + - ssh_provision_key_name is undefined + + - name: Locate environment authorized key + ansible.builtin.include_role: + name: "locate_env_authorized_key" + + - name: Create SSH key in IBM Cloud Classic + ansible.builtin.include_role: + name: "infra-ibm-cloud-classic-ssh-key" + vars: + ibm_cloud_classic_api_key: "{{ ibm_cloud_api_key }}" + + - name: Store variables for PXE server setup on bastion + ansible.builtin.set_fact: + pxe_server_vars: + bm_server_info: "{{ bm_server_info }}" + rhel_iso_url: "{{ rhel_iso_url }}" + rhel_iso_username: "{{ rhel_iso_username | default(omit) }}" + rhel_iso_password: "{{ rhel_iso_password | default(omit) }}" + generated_password: "{{ generated_password }}" + domain_name: "{{ domain_name | default(cluster_dns_zone | default('example.com')) }}" + kickstart_ssh_key: "{{ ssh_provision_pubkey_content | default(ansible_ssh_user_public_key | default('')) }}" + pxe_server_private_interface: "{{ pxe_server_private_interface | default('eth0') }}" + kickstart_satellite_url: "{{ kickstart_satellite_url | default('') }}" + kickstart_activation_key: "{{ kickstart_activation_key | default('') }}" + kickstart_auth_token: "{{ kickstart_auth_token | default('') }}" + bm_server_private_netmask: "{{ bm_server_private_netmask | default('255.255.255.0') }}" + bm_server_public_netmask: "{{ bm_server_public_netmask | default('255.255.255.0') }}" + bm_server_public_gateway: "{{ bm_server_public_gateway | default('') }}" + bm_server_bonded_network: "{{ bm_server_bonded_network | default(false) }}" + baremetal_user_name: "{{ baremetal_user_name | default('cloud-user') }}" + when: + - setup_pxe_server | default(false) + - rhel_iso_url is defined + - bm_server_info is defined + + - name: Set instances configuration for bastion host + ansible.builtin.set_fact: + instances: + - name: "bastion-{{ guid }}" + datacenter: "{{ bm_server_info.datacenter }}" + count: 1 + cores: 1 + memory: 2048 + rootfs_size: 100 + image: "REDHAT_9_64" + public_security_group_rules: + - name: "ssh" + description: "SSH access from anywhere" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 22 + to_port: 22 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "https" + description: "HTTPS access from anywhere" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 443 + to_port: 443 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "console-access" + description: "Console access port" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 3900 + to_port: 3900 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "all-egress-tcp" + description: "All tcp outbound traffic" + rule_type: "egress" + ether_type: "IPv4" + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "all-egress-udp" + description: "All udp outbound traffic" + rule_type: "egress" + ether_type: "IPv4" + protocol: "udp" + cidr: "0.0.0.0/0" + tags: + - "ansible_group:bastions" + - "guid:{{ guid }}" + ssh_key_ids: + - "{{ ssh_key_id }}" + user_metadata: "{{ guid }}" + notes: "Bastion host for {{ guid }}" + when: + - bm_server_info.private_vlan_id is defined + - bm_server_info.private_vlan_id != "" + - ssh_key_id is defined + + - name: Deploy IBM Cloud Classic bastion host + ansible.builtin.include_role: + name: "infra-ibm-cloud-classic-manage-vms" + vars: + ibm_cloud_api_key: "{{ ibm_cloud_api_key }}" + private_vlan_id: "{{ bm_server_info.private_vlan_id }}" + ssh_private_key_path: "{{ env_authorized_key_path }}" + domain: "{{ cluster_dns_zone | default('example.com') }}" + when: + - bm_server_info.private_vlan_id is defined + - bm_server_info.private_vlan_id != "" + + - name: Create in-memory inventory for deployed VMs + ansible.builtin.include_role: + name: "infra-ibm-cloud-classic-vm-inventory" + vars: + ssh_private_key_path: "{{ env_authorized_key_path }}" + +- name: Step 001.1.1 Install packages on deployed VMs + hosts: all:!localhost + gather_facts: false + become: true + vars: + ansible_python_interpreter: /usr/bin/python3 + ansible_ssh_private_key_file: "{{ hostvars['localhost']['env_authorized_key_path'] }}" + tasks: + - name: Install common packages on deployed VMs + ansible.builtin.dnf: + name: "{{ common_packages }}" + state: present + when: + - common_packages is defined + - common_packages | length > 0 + + - name: Install common pip packages on deployed VMs + ansible.builtin.pip: + name: "{{ common_pip_packages }}" + executable: pip3 + state: present + when: + - common_pip_packages is defined + - common_pip_packages | length > 0 + +- name: Step 001.2 Configure PXE Server on Bastion Host + hosts: bastions + gather_facts: true + become: true + tasks: + - name: Set PXE server variables + ansible.builtin.set_fact: + bm_server_info: "{{ hostvars['localhost']['pxe_server_vars']['bm_server_info'] }}" + rhel_iso_url: "{{ hostvars['localhost']['pxe_server_vars']['rhel_iso_url'] }}" + rhel_iso_username: "{{ hostvars['localhost']['pxe_server_vars']['rhel_iso_username'] | default(omit) }}" + rhel_iso_password: "{{ hostvars['localhost']['pxe_server_vars']['rhel_iso_password'] | default(omit) }}" + generated_password: "{{ hostvars['localhost']['pxe_server_vars']['generated_password'] }}" + domain_name: "{{ hostvars['localhost']['pxe_server_vars']['domain_name'] | default(hostvars['localhost']['cluster_dns_zone'] | default('example.com')) }}" + kickstart_ssh_key: "{{ hostvars['localhost']['pxe_server_vars']['kickstart_ssh_key'] | default('') }}" + pxe_server_private_interface: "{{ hostvars['localhost']['pxe_server_vars']['pxe_server_private_interface'] | default('eth0') }}" + kickstart_satellite_url: "{{ hostvars['localhost']['pxe_server_vars']['kickstart_satellite_url'] | default('') }}" + kickstart_activation_key: "{{ hostvars['localhost']['pxe_server_vars']['kickstart_activation_key'] | default('') }}" + kickstart_auth_token: "{{ hostvars['localhost']['pxe_server_vars']['kickstart_auth_token'] | default('') }}" + bm_server_private_netmask: "{{ hostvars['localhost']['pxe_server_vars']['bm_server_private_netmask'] | default('255.255.255.0') }}" + bm_server_public_netmask: "{{ hostvars['localhost']['pxe_server_vars']['bm_server_public_netmask'] | default('255.255.255.0') }}" + bm_server_public_gateway: "{{ hostvars['localhost']['pxe_server_vars']['bm_server_public_gateway'] | default('') }}" + bm_server_bonded_network: "{{ hostvars['localhost']['pxe_server_vars']['bm_server_bonded_network'] | default(false) }}" + baremetal_user_name: "{{ hostvars['localhost']['pxe_server_vars']['baremetal_user_name'] | default('cloud-user') }}" + when: + - hostvars['localhost']['setup_pxe_server'] | default(false) + - hostvars['localhost']['pxe_server_vars'] is defined + - hostvars['localhost']['pxe_server_vars']['bm_server_info'] is defined + + - name: Setup PXE server on bastion host + ansible.builtin.include_role: + name: "infra-ibm-cloud-classic-bm-pxe-server" + when: + - hostvars['localhost']['setup_pxe_server'] | default(false) + - hostvars['localhost']['pxe_server_vars'] is defined + - hostvars['localhost']['pxe_server_vars']['bm_server_info'] is defined + +- name: Step 001.3 Configure Redfish User Management + hosts: bastions[0] + gather_facts: false + become: false + vars: + ansible_python_interpreter: /usr/bin/python3 + tasks: + - name: Run Redfish user management role + ansible.builtin.include_role: + name: "infra-redfish-user-management" + vars: + # BMC connection parameters - direct to BMC + bmc_hostname: "{{ bm_server_info.remote_mgmt_ip }}" + bmc_username: "{{ bm_server_info.remote_mgmt_user }}" + bmc_password: "{{ bm_server_info.remote_mgmt_password }}" + # User management parameters (no recursion) + target_username: "console" + target_password: "{{ generated_password }}" + user_action: "create" + user_role: "PowerUser" + enable_user: "true" + when: + - bm_server_info is defined + +- name: Step 001.3.5 Check and optimize boot order + hosts: bastions[0] + gather_facts: false + become: false + vars: + ansible_python_interpreter: /usr/bin/python3 + tasks: + - name: Run Redfish boot order management + ansible.builtin.include_role: + name: "infra-redfish-power-management" + vars: + # BMC connection parameters - direct to BMC + bmc_hostname: "{{ bm_server_info.remote_mgmt_ip }}" + bmc_username: "{{ bm_server_info.remote_mgmt_user }}" + bmc_password: "{{ bm_server_info.remote_mgmt_password }}" + power_action: "status" + check_boot_order: true + when: + - bm_server_info is defined + +- name: Step 001.4 Enable PXE boot and reset bare metal host + hosts: bastions[0] + gather_facts: false + become: false + vars: + ansible_python_interpreter: /usr/bin/python3 + tasks: + - name: Run Redfish power management role + ansible.builtin.include_role: + name: "infra-redfish-power-management" + vars: + # BMC connection parameters - direct to BMC + bmc_hostname: "{{ bm_server_info.remote_mgmt_ip }}" + bmc_username: "{{ bm_server_info.remote_mgmt_user }}" + bmc_password: "{{ bm_server_info.remote_mgmt_password }}" + power_action: "status" + enable_pxe_boot_and_reset: true + check_boot_order: false # Skip boot order check since we already did it + when: + - bm_server_info is defined + +- name: Step 002.0 Save output directory archive + hosts: localhost + connection: local + gather_facts: false + become: false + tasks: + - name: Save output_dir archive + when: agnosticd_save_output_dir_archive is defined + ansible.builtin.include_role: + name: agnosticd_save_output_dir diff --git a/ansible/configs/ibm-classic-bm-host/README.adoc b/ansible/configs/ibm-classic-bm-host/README.adoc new file mode 100755 index 00000000000..0b7c4fd15ff --- /dev/null +++ b/ansible/configs/ibm-classic-bm-host/README.adoc @@ -0,0 +1,436 @@ += IBM Classic Baremetal Host Configuration + +== Overview + +The `ibm-classic-bm-host` configuration provides a complete bare metal provisioning solution that delivers **RHEL 10 pre-installed bare metal hosts** to users, along with the infrastructure to re-provision them as needed. + +=== End Goal + +This configuration delivers: + +* **Bare Metal Host** - Physical server with RHEL 10 pre-installed and ready for user workloads +* **Access to Bare Metal Host BMC** - External access to the Bare Metal host BMC web interface +* **Bastion Host VM** - IBM Cloud Classic VM that can re-initialize the bare metal host via PXE boot +* **Management Access** - IPMI and SSH available through the internal BMC controller interface +* **Re-provisioning Capability** - Users can reset and re-install the bare metal host by re-enabling DHCP and resetting the server. + +=== Core Capabilities + +* **IBM Cloud Baremetal Discovery** - Automatic discovery of server VLAN information +* **BMC User Account Management** - Create, update, delete, and check status of BMC users +* **Power Management** - Complete power control and PXE boot functionality +* **IBM Cloud Classic VM Deployment** - Automated VM deployment using discovered VLANs +* **SSH Key Management** - Automated SSH key creation and distribution +* **Integrated Inventory** - Unified inventory for baremetal and VM resources + +=== PXE Server and Bare Metal Host Architecture + +The following diagram illustrates the complete PXE server and bare metal host deployment architecture: + +[plantuml,pxe-architecture,svg] +.... +@startuml +!theme plain +skinparam backgroundColor white +skinparam shadowing false + +rectangle "Bastion Host\n(PXE Server)" as BastionHost { + component "Private Interface\neth0 (10.x.x.x)" as BASTION_PRIV + component "Public Interface\neth1 (Public IP)" as BASTION_PUB +} + +rectangle "Bare Metal Host" as BareMetalHost { + component "Private Interface\neth1 (10.x.x.x)" as BM_PRIV + component "Public Interface\neth2 (Public IP)" as BM_PUB + component "BMC Interface\n10.x.x.x" as BMC +} + +component "Private VLAN\n10.x.x.x/24" as PRIVATE_VLAN +component "Public VLAN\nInternet Access" as PUBLIC_VLAN + +' Network Connections +BASTION_PRIV --> PRIVATE_VLAN +BASTION_PUB --> PUBLIC_VLAN +BM_PRIV --> PRIVATE_VLAN +BM_PUB --> PUBLIC_VLAN +BMC --> PRIVATE_VLAN + +@enduml +.... + +==== Kickstart and HAProxy Architecture + +The following diagram shows the internal kickstart and management services within the Bastion Host: + +[plantuml,kickstart-services,svg] +.... +@startuml +!theme plain +skinparam backgroundColor white +skinparam shadowing false +skinparam linetype ortho + +' Bastion Host Services +component "DHCP Server\nPort 67" as DHCP +component "TFTP Server\nPort 69" as TFTP +component "HTTP Server\nPort 80" as HTTP +component "HAProxy\nPort 443/3900" as HAPROXY + +' Service Resources +component "GRUB Config\nKernel/InitRD" as GRUB +component "Kickstart Files\nRHEL Repository" as KS +component "BMC Proxy\nHTTPS/Console" as BMC_PROXY + +' External Targets +component "Bare Metal Host\nPrivate Interface" as BM_TARGET + +' External Connectivity +component "Internet\nExternal User" as INTERNET + +' Service to Resource Connections +TFTP -down-> GRUB : "Serves" +HTTP -down-> KS : "Serves" +HAPROXY -down-> BMC_PROXY : "Proxies" + +' Service to Target Connections +DHCP -right-> BM_TARGET : "DHCP Response" +TFTP -right-> BM_TARGET : "Boot Files" +HTTP -right-> BM_TARGET : "Kickstart" + +' External Connectivity +INTERNET -right-> HAPROXY : "HTTPS" + +@enduml +.... + +==== PXE Boot Process Flow + +The following diagram shows the step-by-step PXE boot process: + +[plantuml,pxe-boot-process,svg] +.... +@startuml +!theme plain +skinparam backgroundColor white +skinparam shadowing false +skinparam linetype ortho + +rectangle "PXE Boot Process" as PXEProcess { + component "1. BMC Powers On\nPXE Boot Enabled" as STEP1 + component "2. DHCP Request\nNetwork Boot" as STEP2 + component "3. DHCP Response\nIP + Boot Server" as STEP3 + component "4. TFTP Request\nBoot Files" as STEP4 + component "5. HTTP Request\nKickstart File" as STEP5 + component "6. OS Installation\nvia HTTP Repository" as STEP6 + component "7. Reboot to\nLocal Storage" as STEP7 + component "8. SSH Available\nInstallation Complete" as STEP8 +} + +' Ansible Orchestration +component "Ansible\nOrchestration" as ANSIBLE + +' Process Flow +STEP1 -right-> STEP2 +STEP2 -right-> STEP3 +STEP3 -down-> STEP4 +STEP4 -right-> STEP5 +STEP5 -right-> STEP6 +STEP6 -down-> STEP7 +STEP7 -right-> STEP8 + +' Ansible Management +ANSIBLE -down-> STEP1 : "Power Control" +STEP8 -down-> ANSIBLE : "SSH Monitoring" + +@enduml +.... + +==== Post-Installation Process + +The following diagram shows the post-installation monitoring and cleanup process: + +[plantuml,post-installation-process,svg] +.... +@startuml +!theme plain +skinparam backgroundColor white +skinparam shadowing false +skinparam linetype ortho + +rectangle "Post-Installation Process" as PostInstall { + component "Wait for SSH\nPort 22" as SSH_WAIT + component "Wait for SSH Stop\nReboot Detected" as SSH_STOP + component "Wait for SSH Start\nLocal Boot Complete" as SSH_START + component "Disable DHCP\nPrevent Re-PXE" as DHCP_DISABLE +} + +' Ansible Orchestration +component "Ansible\nOrchestration" as ANSIBLE_POST + +' Process Flow +SSH_WAIT -right-> SSH_STOP +SSH_STOP -right-> SSH_START +SSH_START -right-> DHCP_DISABLE + +' Ansible Management +ANSIBLE_POST -down-> SSH_WAIT : "SSH Monitoring" +DHCP_DISABLE -down-> ANSIBLE_POST : "Cleanup" + +@enduml +.... + +==== Architecture Components + +The architecture centers around two main hosts connected via IBM Cloud Classic VLANs: + +* **Bastion Host (PXE Server)**: IBM Cloud Classic VM that provides PXE boot services and re-provisioning capability + - **Network Interfaces**: + - Private Interface (eth0): Connected to private VLAN (10.x.x.x) + - Public Interface (eth1): Connected to public VLAN for internet access + - **Core Services**: + - DHCP Server (Port 67): Assigns IP addresses and boot configuration (disabled after provisioning) + - TFTP Server (Port 69): Serves PXE boot files for installation and re-provisioning + - HTTP Server (Port 80): Serves kickstart files and repositories + - HAProxy (Port 443/3900): Provides BMC proxy and management access + - **Re-provisioning Role**: Users can re-enable DHCP and reset bare metal host to trigger fresh RHEL 10 installation + - **Service Details**: See Kickstart and HAProxy Architecture diagram below + +* **Bare Metal Host**: Physical server managed via BMC + - **Network Interfaces**: + - Private Interface (eth1): Connected to private VLAN for PXE boot and management + - Public Interface (eth2): Connected to public VLAN for internet access + - BMC Interface: Connected to same private VLAN for out-of-band management (IPMI and SSH available) + - **Boot States**: + - PXE Boot: Network boot for OS installation + - Local Boot: Boot from local hard disk after installation + - Operating System: RHEL 10 after successful installation and ready for user workloads + - **Re-provisioning**: Can be reset and re-installed by re-enabling DHCP on bastion and resetting the server + +* **Network Infrastructure**: + - **Private VLAN**: Isolated network (10.x.x.x/24) for PXE boot, management traffic, and BMC access + - **Public VLAN**: Internet-facing network for external access + +==== PXE Boot Process Flow Detail + +The PXE boot process follows these steps, with Ansible orchestrating the power management and monitoring: + +1. **BMC Power On**: Ansible triggers BMC to power on the bare metal host with PXE boot enabled +2. **DHCP Request**: Host requests IP address for network boot +3. **DHCP Response**: DHCP server responds with IP address and boot server information +4. **TFTP Boot Files**: Host downloads kernel, initrd, and GRUB configuration via TFTP +5. **HTTP Kickstart**: Host downloads kickstart file and accesses RHEL repository via HTTP +6. **OS Installation**: Automated RHEL 10 installation proceeds using kickstart configuration +7. **Reboot to Local Storage**: System reboots and boots from local hard disk +8. **SSH Available**: Installation complete, Ansible monitors for SSH access availability + +==== Post-Installation Process Detail + +The post-installation process includes Ansible orchestration: + +- **SSH Connectivity**: Ansible waits for SSH port 22 to become available +- **Reboot Detection**: Ansible waits for SSH to stop responding (indicates reboot) +- **Local Boot Verification**: Ansible waits for SSH to start responding (indicates successful boot from local storage) +- **PXE Disable**: Ansible automatically disables DHCP service to prevent accidental re-provisioning + +== Prerequisites + +* **IBM Cloud Classic BMC** - System with Redfish API access +* **BMC Admin Credentials** - Administrative access to the BMC +* **Network Connectivity** - HTTPS access to BMC from AgnosticD deployment host +* **AgnosticD Environment** - Standard AgnosticD deployment setup + +== Quick Start + +[source,bash] +---- +GUID=aaaa;ENV_TYPE=ibm-classic-bm-host;ansible-playbook ansible/main.yml \ + -e @ansible/configs/${ENV_TYPE}/default_vars.yml \ + -e env_type=${ENV_TYPE} \ + -e ACTION=provision \ + -e rhel_iso_url=https://example.com/images/rhel.iso \ + -e rhel_iso_username=exampleuser \ + -e rhel_iso_password=example password \ + -e ibm_cloud_server_id="1234567" \ + -e ibm_cloud_api_key=my_ibm_cloud_api_key \ + -e kickstart_satellite_url=https://example.satellite.com/register \ + -e kickstart_activation_key="example-activation-key" \ + -e kickstart_auth_token="eyJh..." \ + -e guid=${GUID} \ + -e output_dir=/tmp/output-${GUID} +---- + +=== Power Management Operations + +[source,bash] +---- +# Check power status +GUID=aaaa;ENV_TYPE=ibm-classic-bm-host;ansible-playbook ansible/main.yml \ + -e @ansible/configs/${ENV_TYPE}/default_vars.yml \ + -e env_type=${ENV_TYPE} \ + -e ibm_cloud_server_id="1234567" \ + -e ibm_cloud_api_key=my_ibm_cloud_api_key \ + -e power_action=status \ + -e guid=${GUID} + +# Power on system +GUID=aaaa;ENV_TYPE=ibm-classic-bm-host;ansible-playbook ansible/main.yml \ + -e @ansible/configs/${ENV_TYPE}/default_vars.yml \ + -e env_type=${ENV_TYPE} \ + -e ibm_cloud_server_id="1234567" \ + -e ibm_cloud_api_key=my_ibm_cloud_api_key \ + -e power_action=on \ + -e guid=${GUID} + +# PXE boot with reset +GUID=aaaa;ENV_TYPE=ibm-classic-bm-host;ansible-playbook ansible/main.yml \ + -e @ansible/configs/${ENV_TYPE}/default_vars.yml \ + -e env_type=${ENV_TYPE} \ + -e ibm_cloud_server_id="1234567" \ + -e ibm_cloud_api_key=my_ibm_cloud_api_key \ + -e enable_pxe_boot_and_reset=true \ + -e guid=${GUID} +---- + +== Configuration Variables + +=== Required Variables + +[cols="2,1,3"] +|=== +| Variable | Type | Description + +| `ibm_cloud_api_key` | String | **Hard requirement** - IBM Cloud API key for authentication +| `ibm_cloud_server_id` | String | **Hard requirement** - IBM Cloud baremetal server ID (region is automatically discovered from this server) +|=== + + +=== User Management Variables + +[cols="2,1,1,3"] +|=== +| Variable | Type | Default | Description + +| `target_username` | String | `console` | Username to create/manage +| `target_password` | String | `""` | Password (generated if empty) +| `user_action` | String | `create` | Action: create, update_password, delete, status +| `user_role` | String | `PowerUser` | Role: Administrator, Operator, ReadOnly, PowerUser +| `enable_user` | Boolean | `true` | Enable the user account +|=== + +=== Power Management Variables + +[cols="2,1,1,3"] +|=== +| Variable | Type | Default | Description + +| `power_action` | String | `status` | Action: status, on, off, force_off, reset (automatically `force_off` when ACTION=destroy) +| `enable_pxe_boot_and_reset` | Boolean | `false` | Enable PXE boot with auto reset (automatically `true` when ACTION=provision) +| `check_boot_order` | Boolean | `true` | Check and fix boot order to prioritize hard disk first +|=== + +=== Network Interface Naming + +The configuration uses traditional network interface naming (`eth0`, `eth1`, etc.) instead of predictable naming (`ens1f0np0`, `ens2f0np0`). This is achieved through kernel parameters in the PXE boot configuration: + +* **`net.ifnames=0`** - Disables systemd's predictable network interface naming +* **`biosdevname=0`** - Disables Dell's biosdevname scheme + +[cols="2,1,3"] +|=== +| Interface | Type | Description + +| `eth0` | Private | Private network interface (first physical port) +| `eth1` | Public | Public network interface (second physical port) +| `eth2` | Private | Additional private interface (bonding mode) +| `eth3` | Public | Additional public interface (bonding mode) +|=== + +=== Environment Variables + +[cols="2,1,1,3"] +|=== +| Variable | Type | Default | Description + +| `cleanup_users` | Boolean | `true` | Clean up users during destroy +|=== + +== Password Requirements + +The imported user management role enforces comprehensive password validation: + +* **Length**: 10-32 characters +* **Character Types**: At least 2 of uppercase, lowercase, special characters +* **Required**: At least one letter and one number +* **Restrictions**: No more than 2 consecutive identical characters +* **Security**: Cannot be username or reverse of username +* **Character Set**: A-Z, a-z, 0-9, and ~`!@#$%^&*()-+={}[]|:;"'<>,?/._ + +== PowerUser Role + +The PowerUser role provides custom OEM privileges: + +* **RemoteConsoleAndVirtualMediaAccess** - Console and virtual media management +* **RemoteServerPowerRestartAccess** - Power control capabilities + +== Deployment Workflow + +1. **Pre-checks** - Validate BMC connection parameters +2. **User Management** - Create/manage target user with role assignment +3. **Boot Order Management** - Check and optimize boot order to prioritize hard disk first +4. **Power Management** - Execute power operations if specified +5. **Inventory Creation** - Generate inventory for managed servers (if configured) + +== Environment Cleanup + +[source,bash] +---- +GUID=aaaa;ENV_TYPE=ibm-classic-bm-host;ansible-playbook ansible/destroy.yml \ + -e @ansible/configs/${ENV_TYPE}/default_vars.yml \ + -e env_type=${ENV_TYPE} \ + -e ibm_cloud_server_id="1234567" \ + -e ibm_cloud_api_key=my_ibm_cloud_api_key \ + -e guid=${GUID} +---- + +== Troubleshooting + +=== Common Issues + +**BMC Connection Failed** +* Verify BMC hostname/IP and network connectivity +* Check BMC username and password +* Ensure HTTPS (port 443) is accessible + +**User Creation Failed** +* Check available user slots on BMC +* Verify password meets complexity requirements +* Ensure sufficient BMC privileges + +**Power Operation Failed** +* Check current power state with `power_action=status` +* Verify BMC supports requested power operation +* Some operations require specific current power states + +=== Debug Mode + +Add `-vvv` for verbose debugging: + +[source,bash] +---- +ansible-playbook -vvv ansible/main.yml ... +---- + +== Imported Roles Reference + +The configuration uses these imported BMC roles: + +* **Source**: https://github.com/rut31337/lenovo-xclarity-ansible +* **Integration**: Direct import into AgnosticD role structure +* **API**: Full Redfish API support with comprehensive error handling +* **Features**: All original playbook functionality preserved + +== Support + +* **BMC Documentation**: IBM Cloud Classic BMC guides +* **Redfish API**: DMTF Redfish specification +* **AgnosticD**: Standard AgnosticD documentation and community support \ No newline at end of file diff --git a/ansible/configs/ibm-classic-bm-host/default_vars.yml b/ansible/configs/ibm-classic-bm-host/default_vars.yml new file mode 100755 index 00000000000..fbe22eb8e81 --- /dev/null +++ b/ansible/configs/ibm-classic-bm-host/default_vars.yml @@ -0,0 +1,83 @@ +--- +# IBM Classic Baremetal Host Configuration - Default Variables +# Integrated deployment for IBM Cloud Classic baremetal servers and VMs + +## Cloud Provider +cloud_provider: redfish_baremetal + +## BMC Connection Settings +# These must be provided as extra vars or in sample_vars file +# Variables for BMC management: +# bmc_hostname: "192.168.1.100" +# bmc_username: "admin" +# bmc_password: "password123" + +## BMC User Management Settings +target_username: "console" # Default console user +enable_user: true # Enable user account + +## User Configuration +bastion_user_name: "cloud-user" # Username for bastion host +baremetal_user_name: "cloud-user" # Username for baremetal host + +## Environment Management +cleanup_users: true # Clean up users during destroy + +## AgnosticD Standard Variables +output_dir: "{{ output_dir | default('/tmp/' + env_type + '-' + guid) }}" +infra_workloads: [] # Additional workloads to run +post_infra_workloads: [] # Post-infrastructure workloads +software_to_deploy: none # No additional software deployment + +## DNS Configuration +create_dns_records: true # Enable Route53 DNS records by default +route53_aws_access_key_id: "" # AWS access key ID for Route53 (required if DNS enabled) +route53_aws_secret_access_key: "" # AWS secret access key for Route53 (required if DNS enabled) +aws_region: "us-east-1" # AWS region for Route53 +# route53_aws_zone_id: "" # Route53 hosted zone ID (auto-discovered from cluster_dns_zone) +# cluster_dns_zone: "" # DNS domain for Route53 records (REQUIRED if DNS enabled) +dns_ttl: 300 # TTL for DNS records in seconds + +## SSH Key Management +env_authorized_key: "ssh_provision_{{ guid }}" # SSH key filename for environment +set_env_authorized_key: true # Enable SSH key management + +## PXE Server Configuration +setup_pxe_server: true # Enable PXE server setup on bastion +# Network interfaces are automatically discovered based on private/public IP addresses + +## Optional: Managed Servers Configuration +# managed_servers: [] + +## BMC Connection Options +validate_certs: false +force_basic_auth: true + +## Password Generation Settings +user_password_length: 16 +user_password_complexity: true + +# Common packages to install on all deployed VMs +common_packages: + - python3-pip + - python3-requests + - python3-urllib3 + +# Common pip packages to install on all deployed VMs +common_pip_packages: + - passlib + +## User Info and Output +user_info_messages: [] +user_info_data: {} + +## Workloads +# Workloads can be enabled to perform additional configuration +# on the managed systems after user creation +workloads: [] + +## SSH Configuration +ansible_user: root + +## Lab Configuration +lab_users: [] \ No newline at end of file diff --git a/ansible/configs/ibm-classic-bm-host/destroy_env.yml b/ansible/configs/ibm-classic-bm-host/destroy_env.yml new file mode 100755 index 00000000000..20703b7c8ec --- /dev/null +++ b/ansible/configs/ibm-classic-bm-host/destroy_env.yml @@ -0,0 +1,2 @@ +--- +- import_playbook: "../../cloud_providers/redfish_baremetal_destroy_env.yml" \ No newline at end of file diff --git a/ansible/configs/ibm-classic-bm-host/post_infra.yml b/ansible/configs/ibm-classic-bm-host/post_infra.yml new file mode 100755 index 00000000000..7fd998d3dcc --- /dev/null +++ b/ansible/configs/ibm-classic-bm-host/post_infra.yml @@ -0,0 +1,13 @@ +--- +- name: Step 002 Post Infrastructure + hosts: localhost + connection: local + gather_facts: false + become: false + tags: + - step002 + - post_infrastructure + tasks: + - name: IBM Classic Baremetal Host Post Infrastructure Message + ansible.builtin.debug: + msg: "Step 002 Post Infrastructure - IBM Classic Baremetal Host Ready for Software Installation" \ No newline at end of file diff --git a/ansible/configs/ibm-classic-bm-host/post_software.yml b/ansible/configs/ibm-classic-bm-host/post_software.yml new file mode 100755 index 00000000000..04b2e6832e7 --- /dev/null +++ b/ansible/configs/ibm-classic-bm-host/post_software.yml @@ -0,0 +1,220 @@ +--- +- name: Step 005 Post Software + hosts: localhost + connection: local + gather_facts: false + become: false + tags: + - step005 + - post_software + tasks: + - name: Collect variables from bastion host + ansible.builtin.set_fact: + bm_server_private_netmask: "{{ hostvars[groups['bastions'][0]]['bm_server_private_netmask'] | default('255.255.255.0') }}" + when: + - groups['bastions'] is defined + - groups['bastions'] | length > 0 + + - name: Calculate CIDR prefix and bastion IP + ansible.builtin.set_fact: + # Map common netmasks to CIDR notation + cidr_prefix: "{{ netmask_to_cidr[bm_server_private_netmask | default('255.255.255.0')] | default('24') }}" + bastion_public_ip: "{{ hostvars[groups['bastions'][0]]['ansible_default_ipv4']['address'] | default(hostvars[groups['bastions'][0]]['public_ip']) }}" + vars: + netmask_to_cidr: + "255.255.255.0": "24" + "255.255.0.0": "16" + "255.0.0.0": "8" + "255.255.255.128": "25" + "255.255.255.192": "26" + when: + - bm_server_info is defined + - groups['bastions'] is defined + - groups['bastions'] | length > 0 + + - name: Calculate private subnet information + ansible.builtin.set_fact: + network_octets: "{{ bm_server_info.private_ip.split('.') }}" + private_subnet_info: "{{ bm_server_info.private_ip.split('.')[0] }}.{{ bm_server_info.private_ip.split('.')[1] }}.{{ bm_server_info.private_ip.split('.')[2] }}.0/{{ cidr_prefix }}" + when: + - bm_server_info is defined + - cidr_prefix is defined + + - name: Store deployment information for users + agnosticd_user_info: + msg: "IBM Cloud Classic Baremetal Deployment Complete" + data: + # BMC Access via Bastion HAProxy + bmc_access_url: "https://bastion-{{ guid }}.{{ cluster_dns_zone }}" + bmc_private_ip: "{{ bm_server_info.remote_mgmt_ip }}" + bmc_console_username: "console" + bmc_console_password: "{{ generated_password }}" + + # Baremetal Server Information + baremetal_hostname: "{{ bm_server_info.hostname | default(bm_server_info.server_id) }}" + baremetal_public_ip: "{{ bm_server_info.public_ip }}" + baremetal_private_ip: "{{ bm_server_info.private_ip }}" + baremetal_private_subnet: "{{ private_subnet_info }}" + baremetal_ssh_command: "ssh cloud-user@{{ bm_server_info.hostname }}" + baremetal_ssh_password: "{{ generated_password }}" + + # Bastion Information + bastion_hostname: "bastion-{{ guid }}.{{ cluster_dns_zone }}" + bastion_public_ip: "{{ bastion_public_ip }}" + bastion_ssh_command: "ssh cloud-user@bastion-{{ guid }}.{{ cluster_dns_zone }}" + bastion_ssh_password: "{{ generated_password }}" + + # Network Information + baremetal_datacenter: "{{ bm_server_info.datacenter }}" + when: + - bm_server_info is defined + - generated_password is defined + - bastion_public_ip is defined + + - name: Display deployment summary + ansible.builtin.debug: + msg: > + WARNING: With great power comes great responsibility! + DO NOT: + - Share these credentials with anyone + - Post these credentials to any public or private repository + - Change ANY of these provided credentials for any reason + + NOTE: Any charges related to misuse of these systems due to leaked credentials will be charged back to your cost center and reported to INFOSEC. + + === IBM Cloud Classic Baremetal Deployment Summary === + + BMC Access: + Management URL: https://bastion-{{ guid }}.{{ cluster_dns_zone }} + BMC Private IP (for IPMI/SSH): {{ bm_server_info.remote_mgmt_ip }} + Username: console + Password: {{ generated_password }} + NOTE: The BMC server uses a self-signed certificate. You may need to add an exception to your browser to access it. + + Baremetal Server: + Hostname: {{ bm_server_info.hostname | default(bm_server_info.server_id) }}.{{ guid }}.{{ cluster_dns_zone }} + Public IP: {{ bm_server_info.public_ip }} + Private IP: {{ bm_server_info.private_ip }} + Private Subnet: {{ private_subnet_info }} + SSH Access: ssh cloud-user@{{ bm_server_info.hostname | default(bm_server_info.server_id) }}.{{ guid }}.{{ cluster_dns_zone }} + SSH Password: {{ generated_password }} + + Bastion Host (PXE Server): + Hostname: bastion-{{ guid }}.{{ cluster_dns_zone }} + Public IP: {{ bastion_public_ip }} + SSH Access: ssh cloud-user@bastion-{{ guid }}.{{ cluster_dns_zone }} + SSH Password: {{ generated_password }} + + Network Details: + Datacenter: {{ bm_server_info.datacenter }} + + PXE Boot and Bare Metal Host Provisioning: + Your bare metal host has been provisioned with RHEL 10 via PXE boot from the bastion. + PXE boot has now been DISABLED to prevent accidental re-provisioning by disabling the DHCP service on the bastion. + + To re-enable PXE boot in the future, run this on the bastion and restart the bare metal host: + sudo systemctl enable dhcpd + sudo systemctl start dhcpd + when: + - bm_server_info is defined + - bastion_public_ip is defined + +- name: Disable PXE boot for provisioned bare metal host + hosts: bastions[0] + become: true + gather_facts: false + tags: + - step005 + - post_software + - disable_pxe + tasks: + - name: Display waiting message for installation + ansible.builtin.debug: + msg: "Waiting for SSH connectivity to bare metal host {{ bm_server_info.private_ip }} (up to 30 minutes)..." + when: + - bm_server_info is defined + - bm_server_info.private_ip is defined + + - name: Wait for SSH connectivity to bare metal host + ansible.builtin.wait_for: + port: 22 + host: "{{ bm_server_info.private_ip }}" + timeout: 1800 # 30 minutes + delay: 10 + register: ssh_connectivity_check + when: + - bm_server_info is defined + - bm_server_info.private_ip is defined + + - name: Display SSH connectivity established + ansible.builtin.debug: + msg: "SSH connectivity established to bare metal host {{ bm_server_info.private_ip }} - proceeding to disable PXE boot" + when: + - ssh_connectivity_check is defined + - ssh_connectivity_check is succeeded + + - name: Disable PXE boot + block: + - name: Display PXE disable operation + ansible.builtin.debug: + msg: "Installation assumed to have started - disabling PXE boot by stopping DHCP service on bastion" + + - name: Stop DHCP service + ansible.builtin.systemd: + name: dhcpd + state: stopped + + - name: Disable DHCP service from starting on boot + ansible.builtin.systemd: + name: dhcpd + enabled: false + + - name: Verify DHCP service is stopped + ansible.builtin.systemd: + name: dhcpd + register: dhcpd_status + + - name: Display DHCP service status + ansible.builtin.debug: + msg: "DHCP service is {{ dhcpd_status.status.ActiveState }} and {{ 'enabled' if dhcpd_status.status.UnitFileState == 'enabled' else 'disabled' }}" + when: + - ssh_connectivity_check is defined + - ssh_connectivity_check is succeeded + + - name: Wait for bare metal host to reboot (SSH to stop responding) + ansible.builtin.wait_for: + port: 22 + host: "{{ bm_server_info.private_ip }}" + state: stopped + timeout: 600 # 10 minutes + delay: 30 # Wait 30 seconds before checking + register: ssh_stopped_check + when: + - ssh_connectivity_check is defined + - ssh_connectivity_check is succeeded + + - name: Display reboot detected + ansible.builtin.debug: + msg: "Bare metal host {{ bm_server_info.private_ip }} has rebooted - waiting for it to boot from local storage" + when: + - ssh_stopped_check is defined + - ssh_stopped_check is succeeded + + - name: Wait for bare metal host to boot from local storage (SSH to start responding) + ansible.builtin.wait_for: + port: 22 + host: "{{ bm_server_info.private_ip }}" + state: started + timeout: 1800 # 30 minutes + delay: 60 # Wait 1 minute before checking + register: ssh_started_check + when: + - ssh_stopped_check is defined + - ssh_stopped_check is succeeded + + - name: Display successful boot from local storage + ansible.builtin.debug: + msg: "Bare metal host {{ bm_server_info.private_ip }} has successfully booted from local storage - deployment complete" + when: + - ssh_started_check is defined + - ssh_started_check is succeeded \ No newline at end of file diff --git a/ansible/configs/ibm-classic-bm-host/pre_infra.yml b/ansible/configs/ibm-classic-bm-host/pre_infra.yml new file mode 100755 index 00000000000..29906328f93 --- /dev/null +++ b/ansible/configs/ibm-classic-bm-host/pre_infra.yml @@ -0,0 +1,13 @@ +--- +- name: Step 000 Pre Infrastructure + hosts: localhost + connection: local + gather_facts: false + become: false + tags: + - step000 + - pre_infrastructure + tasks: + - name: IBM Classic Baremetal Host Pre Infrastructure Message + ansible.builtin.debug: + msg: "Step 000 Pre Infrastructure - IBM Classic Baremetal Host" \ No newline at end of file diff --git a/ansible/configs/ibm-classic-bm-host/pre_software.yml b/ansible/configs/ibm-classic-bm-host/pre_software.yml new file mode 100755 index 00000000000..d04d23e27d2 --- /dev/null +++ b/ansible/configs/ibm-classic-bm-host/pre_software.yml @@ -0,0 +1,13 @@ +--- +- name: Step 003 Pre Software + hosts: localhost + connection: local + gather_facts: false + become: false + tags: + - step003 + - pre_software + tasks: + - name: IBM Cloud Classic Baremetal Pre Software Message + ansible.builtin.debug: + msg: "Step 003 Pre Software - No software deployment required for IBM Cloud Classic Baremetal management" \ No newline at end of file diff --git a/ansible/configs/ibm-classic-bm-host/requirements.txt b/ansible/configs/ibm-classic-bm-host/requirements.txt new file mode 100644 index 00000000000..5fd1b7fe29d --- /dev/null +++ b/ansible/configs/ibm-classic-bm-host/requirements.txt @@ -0,0 +1,10 @@ +# Python requirements for IBM Cloud Classic configuration with Route53 DNS support +# AWS/Route53 dependencies +boto3>=1.20.0 +botocore>=1.23.0 +# Core dependencies +ansible>=2.10.0 +jmespath>=0.10.0 +python-dateutil>=2.8.0 +PyYAML>=5.4.0 +requests>=2.25.0 \ No newline at end of file diff --git a/ansible/configs/ibm-classic-bm-host/requirements.yml b/ansible/configs/ibm-classic-bm-host/requirements.yml new file mode 100755 index 00000000000..b0f48285ccf --- /dev/null +++ b/ansible/configs/ibm-classic-bm-host/requirements.yml @@ -0,0 +1,8 @@ +--- +collections: + - name: ansible.posix + version: ">=1.0.0" + - name: community.general + version: ">=1.0.0" + - name: amazon.aws + version: ">=6.0.0" \ No newline at end of file diff --git a/ansible/configs/ibm-classic-bm-host/sample_vars.yml b/ansible/configs/ibm-classic-bm-host/sample_vars.yml new file mode 100755 index 00000000000..dc35be68f4c --- /dev/null +++ b/ansible/configs/ibm-classic-bm-host/sample_vars.yml @@ -0,0 +1,82 @@ +--- +# IBM Classic Baremetal Host Configuration - Sample Variables +# Copy this file and customize for your environment +# Integrated deployment for IBM Cloud Classic baremetal servers and VMs + +## Required Variables - MUST be set +cloud_provider: ibm_cloud_classic +env_type: ibm-classic-bm-host +guid: test01 + +## IBM Cloud Configuration (REQUIRED) +ibm_cloud_api_key: "{{ vault_ibm_cloud_api_key }}" # Store in vault +ibm_cloud_server_id: "1234567" # Your IBM Cloud baremetal server ID + +## VM Deployment Configuration (Optional) +# instances: +# - name: "bastion" +# datacenter: "dal13" +# count: 1 +# cores: 2 +# memory: 4096 +# rootfs_size: 25 +# image: "REDHAT_9_64" +# public_security_group_rules: +# - name: "ssh" +# description: "Allow SSH access" +# rule_type: "ingress" +# ether_type: "IPv4" +# from_port: 22 +# to_port: 22 +# protocol: "tcp" +# cidr: "0.0.0.0/0" +# - name: "tcp_out" +# description: "Allow outbound TCP traffic" +# rule_type: "egress" +# ether_type: "IPv4" +# protocol: "tcp" +# cidr: "0.0.0.0/0" +# tags: +# - "agnosticd" +# - "bastion" + +## BMC Connection (Usually Auto-discovered) +# These variables control BMC access to the baremetal server +# NOTE: These are typically gathered automatically from the infra-ibm-cloud-classic-bm-info role using ibm_cloud_server_id +# Only override these if you need to specify BMC credentials manually +bmc_hostname: "192.168.1.100" # BMC IP address or hostname (auto-discovered from server info) +bmc_username: "admin" # BMC admin username (auto-discovered from server info) +bmc_password: "YourBMCPassword" # BMC admin password (auto-discovered from server info) + +## BMC User Management Configuration +target_username: "console" # Username to create/manage (defaults to console) +target_password: "MyConsolePass123!" # Password for the target user (will be generated if empty) +user_action: "create" # create, update_password, delete, status +user_role: "PowerUser" # Administrator, Operator, ReadOnly, PowerUser +enable_user: true # Enable the user account + +## User Configuration +bastion_user_name: "cloud-user" # Username for bastion host (default: cloud-user) +baremetal_user_name: "cloud-user" # Username for baremetal host kickstart (default: cloud-user) + +## Environment Cleanup Settings +cleanup_users: true # Clean up users during destroy + +## Optional: Managed Servers List (for inventory creation) +# managed_servers: +# - hostname: "server1.example.com" +# ip_address: "192.168.1.101" +# group: "baremetal_servers" +# uuid: "12345678-1234-1234-1234-123456789012" +# - hostname: "server2.example.com" +# ip_address: "192.168.1.102" +# group: "baremetal_servers" +# uuid: "87654321-4321-4321-4321-210987654321" + +## Optional: Additional Configuration +# validate_certs: false # Validate SSL certificates (default: false) +# user_password_length: 16 # Generated password length (default: 16) + +## Optional: Workloads to run after user creation +# workloads: +# - ocp-workload-example \ No newline at end of file diff --git a/ansible/configs/ibm-classic-bm-host/software.yml b/ansible/configs/ibm-classic-bm-host/software.yml new file mode 100755 index 00000000000..459b718daae --- /dev/null +++ b/ansible/configs/ibm-classic-bm-host/software.yml @@ -0,0 +1,67 @@ +--- +- name: Step 004 Software + hosts: localhost + connection: local + gather_facts: false + become: false + tags: + - step004 + - software + tasks: + - name: Run additional workloads if defined + include_role: + name: "{{ workload_loop_var }}" + loop: "{{ workloads | default([]) }}" + loop_control: + loop_var: workload_loop_var + when: workloads is defined and workloads | length > 0 + +- name: Step 004.1 Configure Bastion Hosts + hosts: bastions + become: true + gather_facts: false + vars: + bastion_user_name: "{{ hostvars['localhost']['bastion_user_name'] | default('cloud-user') }}" + generated_password: "{{ hostvars['localhost']['generated_password'] | default('') }}" + tags: + - step004 + - bastion_tasks + tasks: + - name: "Create {{ bastion_user_name }} with generated password" + ansible.builtin.user: + name: "{{ bastion_user_name }}" + password: "{{ generated_password | password_hash('sha512') }}" + comment: "IBM Cloud Classic Baremetal User" + group: "users" + groups: "wheel" + shell: "/bin/bash" + state: present + when: generated_password is defined and generated_password != "" + + - name: "Enable passwordless sudo for {{ bastion_user_name }}" + ansible.builtin.copy: + content: "{{ bastion_user_name }} ALL=(ALL) NOPASSWD: ALL" + dest: "/etc/sudoers.d/{{ bastion_user_name }}" + mode: "0440" + when: generated_password is defined and generated_password != "" + + - name: Enable password authentication + ansible.builtin.lineinfile: + line: "PasswordAuthentication yes" + regexp: '^ *PasswordAuthentication' + path: "/etc/ssh/sshd_config" + when: generated_password is defined and generated_password != "" + + - name: Remove PasswordAuthentication line from 50-cloud-init.conf + ansible.builtin.lineinfile: + path: "/etc/ssh/sshd_config.d/50-cloud-init.conf" + regexp: "PasswordAuthentication" + state: absent + ignore_errors: true + when: generated_password is defined and generated_password != "" + + - name: Restart sshd service + ansible.builtin.service: + name: sshd + state: restarted + when: generated_password is defined and generated_password != "" \ No newline at end of file diff --git a/ansible/lifecycle.yml b/ansible/lifecycle.yml index 680cad9f4a7..768de5ad7c7 100644 --- a/ansible/lifecycle.yml +++ b/ansible/lifecycle.yml @@ -71,3 +71,6 @@ - when: cloud_provider == 'ibm_resource_group' include_tasks: lifecycle_ibm_resource_group.yml + + - when: cloud_provider == 'redfish_baremetal' + include_tasks: lifecycle_redfish_baremetal.yml diff --git a/ansible/lifecycle_redfish_baremetal.yml b/ansible/lifecycle_redfish_baremetal.yml new file mode 100755 index 00000000000..317efc96b31 --- /dev/null +++ b/ansible/lifecycle_redfish_baremetal.yml @@ -0,0 +1,17 @@ +--- +# IBM Cloud Classic Infrastructure Lifecycle Management +# +# The BM lifecycle role now manages VM dependencies internally: +# - START: VM → BM (ensures bastion running for BMC access) +# - STOP: BM → VM (stops BM via BMC, then stops bastion) +# - STATUS: VM + BM (checks both infrastructure components) +# +# No separate VM lifecycle call needed - it's handled automatically +# by the BM lifecycle role based on BMC access requirements. + +- when: >- + ACTION == 'stop' + or ACTION == 'start' + or ACTION == 'status' + include_role: + name: infra-ibm-cloud-classic-bm-lifecycle \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/README.md b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/README.md new file mode 100644 index 00000000000..b20359b7c4c --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/README.md @@ -0,0 +1,82 @@ +# infra-ibm-cloud-classic-bm-info + +This role handles IBM Cloud CLI setup and retrieves information about IBM Cloud Classic bare metal servers including IP addresses, VLAN information, MAC addresses, and remote management details. + +## Features + +- **IBM Cloud CLI Setup**: Automatically installs IBM Cloud CLI if not present +- **Authentication**: Logs in to IBM Cloud using the provided API key +- **Plugin Management**: Installs required SoftLayer plugins +- **Server Information**: Retrieves comprehensive bare metal server details +- **Validation**: Ensures all required information is available + +## Requirements + +- Valid IBM Cloud API key with Classic Infrastructure permissions +- Server ID for the bare metal server to query +- Internet access for CLI installation (if needed) +- sudo privileges for CLI installation (if needed) + +## Role Variables + +### Required Variables (must be passed as extra vars) +- `ibm_cloud_api_key`: IBM Cloud API key with Classic Infrastructure permissions +- `ibm_cloud_server_id`: ID of the bare metal server to query + +### Optional Variables +- `display_results`: Whether to display results in debug output (default: true) +- `save_to_file`: Whether to save results to file (default: false) +- `output_file`: Output file path if save_to_file is true (default: "bm_server_info.json") +- `output_dir`: Output directory for temporary files (default: "/tmp/{{ guid | default('agnosticd') }}") + +## Dependencies + +None + +## IBM Cloud CLI Setup Process + +The role automatically handles IBM Cloud CLI setup in the following steps: + +1. **Installation Check**: Verifies if IBM Cloud CLI is installed +2. **CLI Installation**: Downloads and installs IBM Cloud CLI if not present +3. **Authentication**: Logs in to IBM Cloud using the provided API key +4. **Plugin Installation**: Installs required SoftLayer plugins (`infrastructure-service` and `sl`) +5. **Validation**: Verifies that all components are working correctly + +The role will fail with detailed error messages if any step fails, providing guidance for manual troubleshooting. + +## Example Playbook + +```yaml +- name: Get bare metal server information + hosts: localhost + vars: + ibm_cloud_api_key: "{{ vault_ibm_cloud_api_key }}" + ibm_cloud_server_id: "1234567" + roles: + - role: infra-ibm-cloud-classic-bm-info +``` + +## Output + +The role creates a `bm_server_info` fact containing: +- `server_id`: The server ID +- `hostname`: Server hostname (falls back to server_id if not available) +- `datacenter`: Datacenter name +- `public_ip`: Public IP address +- `private_ip`: Private IP address +- `public_vlan_id`: Public VLAN ID +- `private_vlan_id`: Private VLAN ID +- `public_mac_address`: Public interface MAC address +- `private_mac_address`: Private interface MAC address +- `remote_mgmt_ip`: Remote management IP address +- `remote_mgmt_user`: Remote management username +- `remote_mgmt_password`: Remote management password + +## License + +GPL-3.0+ + +## Author Information + +Created for AgnosticD project \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/defaults/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/defaults/main.yml new file mode 100644 index 00000000000..7bd25bbd5be --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/defaults/main.yml @@ -0,0 +1,17 @@ +--- +# defaults file for infra-ibm-cloud-classic-bm-info + +# API key for IBM Cloud authentication (should be passed as extra var) +ibm_cloud_api_key: "" + +# Server ID to query (should be passed as extra var) +ibm_cloud_server_id: "" + +# Whether to display results +display_results: true + +# Whether to save results to file +save_to_file: false + +# Output file path (if save_to_file is true) +output_file: "bm_server_info.json" diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/handlers/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/handlers/main.yml new file mode 100644 index 00000000000..f8cdf00e1eb --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/handlers/main.yml @@ -0,0 +1,6 @@ +--- +# handlers file for ibm_cloud_classic_bm_info + +- name: restart service + ansible.builtin.debug: + msg: "No handlers required for this role" \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/meta/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/meta/main.yml new file mode 100644 index 00000000000..6f33714cfff --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/meta/main.yml @@ -0,0 +1,24 @@ +--- +galaxy_info: + author: AgnosticD Team + description: Retrieves information about IBM Cloud Classic bare metal servers + company: Red Hat + license: GPL-3.0+ + min_ansible_version: 2.9 + platforms: + - name: EL + versions: + - 8 + - 9 + - name: Ubuntu + versions: + - 20.04 + - 22.04 + galaxy_tags: + - ibm + - cloud + - baremetal + - infrastructure + - classic + +dependencies: [] \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/tasks/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/tasks/main.yml new file mode 100644 index 00000000000..86386185531 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/tasks/main.yml @@ -0,0 +1,199 @@ +--- +# tasks file for infra-ibm-cloud-classic-bm-info +- name: Validate IBM Cloud API key is provided + ansible.builtin.fail: + msg: "ibm_cloud_api_key is required. Please provide it as an extra var: -e ibm_cloud_api_key=YOUR_API_KEY" + when: ibm_cloud_api_key is not defined or ibm_cloud_api_key == "" + +- name: Setup IBM Cloud CLI and login + ansible.builtin.include_tasks: setup_ibm_cloud_cli.yml + +- name: Get bare metal server details + ansible.builtin.command: ibmcloud sl hardware detail {{ ibm_cloud_server_id }} --output json + register: server_details + changed_when: false + +- name: Parse server details + ansible.builtin.set_fact: + server_info: "{{ server_details.stdout | from_json }}" + +- name: Extract public IP address + ansible.builtin.set_fact: + public_ip: "{{ server_info.primaryIpAddress | default(server_info.publicIpAddress | default('')) }}" + +- name: Extract private IP address + ansible.builtin.set_fact: + private_ip: "{{ server_info.privateIpAddress | default(server_info.primaryBackendIpAddress | default('')) }}" + +- name: Get VLAN information + ansible.builtin.command: ibmcloud sl hardware detail {{ ibm_cloud_server_id }} --output json + register: vlan_info + changed_when: false + +- name: Parse VLAN details + ansible.builtin.set_fact: + vlan_data: "{{ vlan_info.stdout | from_json }}" + +- name: Extract VLAN information from networkVlans + ansible.builtin.set_fact: + # Try to identify public and private VLANs from networkVlans array + public_vlan_id: "{{ (vlan_data.networkVlans | selectattr('networkSpace', 'equalto', 'PUBLIC') | first).id | default('') }}" + public_vlan_number: "{{ (vlan_data.networkVlans | selectattr('networkSpace', 'equalto', 'PUBLIC') | first).vlanNumber | default('') }}" + private_vlan_id: "{{ (vlan_data.networkVlans | selectattr('networkSpace', 'equalto', 'PRIVATE') | first).id | default('') }}" + private_vlan_number: "{{ (vlan_data.networkVlans | selectattr('networkSpace', 'equalto', 'PRIVATE') | first).vlanNumber | default('') }}" + when: vlan_data.networkVlans is defined and vlan_data.networkVlans | length > 0 + +- name: Extract VLAN information from networkVlans (alternative method) + ansible.builtin.set_fact: + # If networkSpace field doesn't exist, try by position or other indicators + public_vlan_id: "{{ vlan_data.networkVlans[0].id | default('') }}" + public_vlan_number: "{{ vlan_data.networkVlans[0].vlanNumber | default('') }}" + private_vlan_id: "{{ vlan_data.networkVlans[1].id | default('') }}" + private_vlan_number: "{{ vlan_data.networkVlans[1].vlanNumber | default('') }}" + when: vlan_data.networkVlans is defined and vlan_data.networkVlans | length > 1 and (public_vlan_id is not defined or public_vlan_id == "") + +- name: Fallback to legacy network component extraction + ansible.builtin.set_fact: + public_vlan_id: "{{ vlan_data.primaryNetworkComponent.networkVlan.id | default(vlan_data.frontendNetworkComponents[0].networkVlan.id | default('')) }}" + public_vlan_number: "{{ vlan_data.primaryNetworkComponent.networkVlan.vlanNumber | default(vlan_data.frontendNetworkComponents[0].networkVlan.vlanNumber | default('')) }}" + private_vlan_id: "{{ vlan_data.primaryBackendNetworkComponent.networkVlan.id | default(vlan_data.backendNetworkComponents[0].networkVlan.id | default('')) }}" + private_vlan_number: "{{ vlan_data.primaryBackendNetworkComponent.networkVlan.vlanNumber | default(vlan_data.backendNetworkComponents[0].networkVlan.vlanNumber | default('')) }}" + when: (vlan_data.networkVlans is not defined or vlan_data.networkVlans | length == 0) and (vlan_data.primaryNetworkComponent is defined or (vlan_data.frontendNetworkComponents is defined and vlan_data.frontendNetworkComponents | length > 0)) + +- name: Set empty VLAN info if not available + ansible.builtin.set_fact: + public_vlan_id: "" + public_vlan_number: "" + private_vlan_id: "" + private_vlan_number: "" + when: (public_vlan_id is not defined or public_vlan_id == "") and (private_vlan_id is not defined or private_vlan_id == "") + +# Extract MAC addresses from network components +- name: Extract MAC addresses from networkComponents based on IP addresses + ansible.builtin.set_fact: + # Find private MAC (RFC 1918 private IP ranges: 10.x.x.x, 172.16-31.x.x, 192.168.x.x) + private_mac_address: >- + {{ + (vlan_data.networkComponents + | selectattr('primaryIpAddress', 'defined') + | selectattr('macAddress', 'defined') + | selectattr('name', 'equalto', 'eth') + | selectattr('primaryIpAddress', 'match', '^(10\.|172\.(1[6-9]|2[0-9]|3[0-1])\.|192\.168\.)') + | first).macAddress | default('') + }} + # Find public MAC (non-private IP addresses) + public_mac_address: >- + {{ + (vlan_data.networkComponents + | selectattr('primaryIpAddress', 'defined') + | selectattr('macAddress', 'defined') + | selectattr('name', 'equalto', 'eth') + | rejectattr('primaryIpAddress', 'match', '^(10\.|172\.(1[6-9]|2[0-9]|3[0-1])\.|192\.168\.)') + | first).macAddress | default('') + }} + when: vlan_data.networkComponents is defined and vlan_data.networkComponents | length > 0 + +- name: Fallback MAC extraction by port number + ansible.builtin.set_fact: + # Try port-based identification as fallback + private_mac_address: >- + {{ + (vlan_data.networkComponents + | selectattr('macAddress', 'defined') + | selectattr('name', 'equalto', 'eth') + | selectattr('port', 'equalto', 0) + | first).macAddress | default('') + }} + public_mac_address: >- + {{ + (vlan_data.networkComponents + | selectattr('macAddress', 'defined') + | selectattr('name', 'equalto', 'eth') + | selectattr('port', 'equalto', 1) + | first).macAddress | default('') + }} + when: + - vlan_data.networkComponents is defined and vlan_data.networkComponents | length > 0 + - (private_mac_address is not defined or private_mac_address == '') or (public_mac_address is not defined or public_mac_address == '') + +- name: Set empty MAC addresses if not available + ansible.builtin.set_fact: + public_mac_address: "" + private_mac_address: "" + when: (public_mac_address is not defined or public_mac_address == "") and (private_mac_address is not defined or private_mac_address == "") + +- name: Extract remote management information + ansible.builtin.set_fact: + remote_mgmt_ip: "{{ server_info.remoteManagementAccounts[0].ipAddress | default(server_info.networkManagementIpAddress | default('')) }}" + remote_mgmt_user: "{{ server_info.remoteManagementAccounts[0].username | default(server_info.remoteManagementAccounts[0].user | default('')) }}" + remote_mgmt_password: "{{ server_info.remoteManagementAccounts[0].password | default('') }}" + datacenter: "{{ server_info.datacenter.name | default(server_info.datacenter | default('')) }}" + when: server_info.remoteManagementAccounts is defined and server_info.remoteManagementAccounts | length > 0 + +- name: Set empty remote management info if not available + ansible.builtin.set_fact: + remote_mgmt_ip: "{{ server_info.networkManagementIpAddress | default('') }}" + remote_mgmt_user: "" + remote_mgmt_password: "" + datacenter: "{{ server_info.datacenter.name | default(server_info.datacenter | default('')) }}" + when: server_info.remoteManagementAccounts is not defined or server_info.remoteManagementAccounts | length == 0 + +- name: Display collected information + ansible.builtin.debug: + msg: + - "Server ID: {{ ibm_cloud_server_id }}" + - "Hostname: {{ server_info.hostname | default(server_info.fullyQualifiedDomainName | default(ibm_cloud_server_id)) }}" + - "Datacenter: {{ datacenter }}" + - "Public IP: {{ public_ip }}" + - "Private IP: {{ private_ip }}" + - "Public VLAN ID: {{ public_vlan_id }}" + - "Private VLAN ID: {{ private_vlan_id }}" + - "Public MAC Address: {{ public_mac_address }}" + - "Private MAC Address: {{ private_mac_address }}" + - "Remote Management IP: {{ remote_mgmt_ip }}" + - "Remote Management User: {{ remote_mgmt_user }}" + - "Remote Management Password: [REDACTED]" + when: display_results | default(true) + +- name: Create bm_server_info dictionary + ansible.builtin.set_fact: + bm_server_info: + server_id: "{{ ibm_cloud_server_id }}" + hostname: "{{ server_info.hostname | default(server_info.fullyQualifiedDomainName | default(ibm_cloud_server_id)) }}" + datacenter: "{{ datacenter }}" + public_ip: "{{ public_ip }}" + private_ip: "{{ private_ip }}" + public_vlan_id: "{{ public_vlan_id }}" + private_vlan_id: "{{ private_vlan_id }}" + public_mac_address: "{{ public_mac_address }}" + private_mac_address: "{{ private_mac_address }}" + remote_mgmt_ip: "{{ remote_mgmt_ip }}" + remote_mgmt_user: "{{ remote_mgmt_user }}" + remote_mgmt_password: "{{ remote_mgmt_password }}" + +- name: Validate critical server information + ansible.builtin.fail: + msg: "Failed to retrieve critical server information. Missing values: {{ missing_values | join(', ') }}" + vars: + missing_values: >- + {{ + ([] + + (['datacenter'] if datacenter == '' else []) + + (['public_ip'] if public_ip == '' else []) + + (['private_ip'] if private_ip == '' else []) + + (['public_vlan_id'] if public_vlan_id == '' else []) + + (['private_vlan_id'] if private_vlan_id == '' else []) + + (['public_mac_address'] if public_mac_address == '' else []) + + (['private_mac_address'] if private_mac_address == '' else []) + + (['remote_mgmt_ip'] if remote_mgmt_ip == '' else []) + + (['remote_mgmt_user'] if remote_mgmt_user == '' else []) + + (['remote_mgmt_password'] if remote_mgmt_password == '' else [])) + }} + when: + - datacenter == '' or public_ip == '' or private_ip == '' or public_vlan_id == '' or private_vlan_id == '' or public_mac_address == '' or private_mac_address == '' or remote_mgmt_ip == '' or remote_mgmt_user == '' or remote_mgmt_password == '' + +- name: Save results to file + ansible.builtin.copy: + content: "{{ bm_server_info | to_nice_json }}" + dest: "{{ output_file | default('bm_server_info.json') }}" + when: save_to_file | default(false) and output_file is defined \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/tasks/setup_ibm_cloud_cli.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/tasks/setup_ibm_cloud_cli.yml new file mode 100644 index 00000000000..32e8a970f25 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/tasks/setup_ibm_cloud_cli.yml @@ -0,0 +1,100 @@ +--- +# tasks file for IBM Cloud CLI setup +- name: Check if IBM Cloud CLI is installed + ansible.builtin.command: ibmcloud --version + register: ibmcloud_version + failed_when: false + changed_when: false + +- name: Install IBM Cloud CLI if not present + block: + - name: Download and install IBM Cloud CLI + ansible.builtin.shell: | + curl -fsSL https://clis.cloud.ibm.com/install/linux | sh + become: true + + - name: Verify IBM Cloud CLI installation + ansible.builtin.command: ibmcloud --version + register: ibmcloud_verify + failed_when: ibmcloud_verify.rc != 0 + changed_when: false + + when: ibmcloud_version.rc != 0 + +- name: Login to IBM Cloud with API key + ansible.builtin.command: ibmcloud login --apikey "{{ ibm_cloud_api_key }}" + register: ibmcloud_login_result + failed_when: false + changed_when: false + no_log: true + +- name: Check if logged in to IBM Cloud + ansible.builtin.command: ibmcloud account show + register: ibmcloud_login_status + failed_when: false + changed_when: false + +- name: Fail if not logged in to IBM Cloud + ansible.builtin.fail: + msg: "Failed to authenticate with IBM Cloud. Please verify your API key is correct and has appropriate permissions." + when: ibmcloud_login_status.rc != 0 + +- name: Check if SoftLayer plugin is installed + ansible.builtin.command: ibmcloud plugin list + register: plugin_list + changed_when: false + +- name: Install SoftLayer plugin if not present + ansible.builtin.command: ibmcloud plugin install infrastructure-service -f + register: plugin_install_result + failed_when: false + when: "'infrastructure-service' not in plugin_list.stdout and 'sl' not in plugin_list.stdout" + +- name: Install SoftLayer plugin (alternative method) if not present + ansible.builtin.command: ibmcloud plugin install sl -f + register: plugin_install_result2 + failed_when: false + when: "'sl' not in plugin_list.stdout and (plugin_install_result.rc is not defined or plugin_install_result.rc != 0)" + +- name: Check plugin list after installation + ansible.builtin.command: ibmcloud plugin list + register: plugin_list_after + changed_when: false + +- name: Display plugin installation results + ansible.builtin.debug: + msg: + - "Plugin installation results:" + - "infrastructure-service install result: {{ plugin_install_result.rc | default('not run') }}" + - "sl install result: {{ plugin_install_result2.rc | default('not run') }}" + - "Plugins after installation: {{ plugin_list_after.stdout_lines | default([]) }}" + +- name: Verify SoftLayer plugin installation + ansible.builtin.command: ibmcloud sl help + register: sl_help_result + failed_when: false + changed_when: false + +- name: Fail if SoftLayer plugin not working + ansible.builtin.fail: + msg: | + SoftLayer plugin is not working properly. + + Plugin installation results: + - infrastructure-service install: {{ plugin_install_result.rc | default('not run') }} + - sl install: {{ plugin_install_result2.rc | default('not run') }} + + Plugin verification results: + - 'ibmcloud sl help' returned: {{ sl_help_result.rc }} + + Error details: + - sl help error: {{ sl_help_result.stderr | default('None') }} + + Current plugins installed: + {{ plugin_list_after.stdout | default('Unable to list plugins') }} + + Please check plugin installation manually: + 1. Run: ibmcloud plugin list + 2. Try: ibmcloud plugin install infrastructure-service --force + 3. Verify: ibmcloud sl help + when: sl_help_result.rc != 0 \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/vars/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/vars/main.yml new file mode 100644 index 00000000000..592fc59201c --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-info/vars/main.yml @@ -0,0 +1,11 @@ +--- +# vars file for ibm_cloud_classic_bm_info + +# Internal variables used by the role +_ibmcloud_cli_required_version: "2.0.0" +_supported_os: + - "RedHat" + - "CentOS" + - "Ubuntu" + - "Debian" + - "SLES" \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/README.md b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/README.md new file mode 100644 index 00000000000..c406db6c661 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/README.md @@ -0,0 +1,289 @@ +# IBM Cloud Classic Bare Metal Lifecycle Role (infra-ibm-cloud-classic-bm-lifecycle) + +This role provides lifecycle management for IBM Cloud Classic bare metal servers using BMC (Baseboard Management Controller) operations through a bastion host proxy. + +## Features + +- **BMC-Based Power Management**: Reliable start, stop, and status operations via IPMI +- **VM Dependency Management**: Automatically manages bastion VM dependencies for BMC access +- **HAProxy Tunnel Integration**: Uses bastion host as secure proxy for BMC communication +- **Intelligent Sequencing**: Proper VM→BM start order and BM→VM stop order +- **AgnosticD Integration**: Designed for AgnosticD infrastructure workflows +- **Enhanced Reliability**: More dependable than IBM Cloud CLI power commands + +## Required Variables + +- `ibm_cloud_server_id`: IBM Cloud server ID for the bare metal server +- `ibm_cloud_api_key`: IBM Cloud API key with hardware management permissions +- `ACTION`: The action to perform (`start`, `stop`, or `status`) + +## Usage + +### Starting Servers + +```yaml +- include_role: + name: infra-ibm-cloud-classic-bm-lifecycle + vars: + ibm_cloud_server_id: "12345678" + ibm_cloud_api_key: "{{ ibm_cloud_api_key }}" + ACTION: "start" +``` + +### Stopping Servers + +```yaml +- include_role: + name: infra-ibm-cloud-classic-bm-lifecycle + vars: + ibm_cloud_server_id: "12345678" + ibm_cloud_api_key: "{{ ibm_cloud_api_key }}" + ACTION: "stop" +``` + +### Checking Status + +```yaml +- include_role: + name: infra-ibm-cloud-classic-bm-lifecycle + vars: + ibm_cloud_server_id: "12345678" + ibm_cloud_api_key: "{{ ibm_cloud_api_key }}" + ACTION: "status" +``` + +## How It Works + +### Start Sequence (VM → BM) +1. **VM Dependency**: Ensures bastion VM is running using `infra-ibm-cloud-classic-vm-lifecycle` +2. **SSH Connectivity**: Waits for bastion SSH accessibility for BMC proxy operations +3. **BMC Power On**: Uses Redfish API calls through bastion proxy to power on bare metal server + +### Stop Sequence (BM → VM) +1. **BMC Power Off**: Uses Redfish API calls through bastion proxy to power off bare metal server +2. **VM Dependency**: Stops bastion VM after BM operations complete + +### Status Check +1. **VM Status**: Uses IBM Cloud CLI to check bastion VM status and exact power state +2. **BM Status**: Uses IBM Cloud CLI to check bare metal server status and infer power state from hardware status + +**Note**: IBM Cloud Classic bare metal servers do not expose direct power state via CLI API. Power status is inferred from hardware status (ACTIVE = likely powered on). + +### Technical Implementation +- **BMC Communication**: Redfish API calls executed via bastion host proxy +- **Proxy Tunnel**: Bastion provides secure proxy access to BMC network +- **Dependency Management**: Automatic VM lifecycle management for BM operations +- **Error Handling**: Comprehensive validation and fallback procedures + +## Task Flow + +### Start Operation +- `initialize.yml`: Validates input and retrieves server information +- `discover_vm_inventory.yml`: Discovers existing IBM Cloud Classic VMs and adds bastion hosts to inventory +- `start_vm_dependency.yml`: Ensures bastion VM is running for BMC access +- `wait_for_bastion_ssh.yml`: Waits for SSH connectivity to bastion +- `start_bm_via_bmc.yml`: Powers on server via Redfish API through bastion proxy + +### Stop Operation +- `initialize.yml`: Validates input and retrieves server information +- `discover_vm_inventory.yml`: Discovers existing IBM Cloud Classic VMs and adds bastion hosts to inventory +- `start_vm_dependency.yml`: Ensures bastion VM is running for BMC access +- `wait_for_bastion_ssh.yml`: Waits for SSH connectivity to bastion +- `stop_bm_via_bmc.yml`: Powers off server via Redfish API through bastion proxy +- `stop_vm_dependency.yml`: Stops bastion VM after BM operations complete + +### Status Operation +- `initialize.yml`: Validates input and retrieves server information +- `discover_vm_inventory.yml`: Discovers existing IBM Cloud Classic VMs and adds bastion hosts to inventory +- `status_bm_via_cli.yml`: Checks both VM and bare metal server status via IBM Cloud CLI + +## Integration with AgnosticD + +This role integrates with AgnosticD lifecycle management: + +```yaml +# In lifecycle_redfish_baremetal.yml +- when: >- + ACTION == 'stop' + or ACTION == 'start' + or ACTION == 'status' + include_role: + name: infra-ibm-cloud-classic-bm-lifecycle +``` + +## Dependencies + +- `infra-ibm-cloud-classic-bm-info`: For retrieving server information including BMC credentials +- `infra-ibm-cloud-classic-vm-lifecycle`: For managing bastion VM dependencies (start/stop operations only) +- **Bastion Host**: Running VM with proxy configured for BMC access (start/stop operations only) +- **SSH Access**: Port 22 connectivity to bastion host for Redfish API calls (start/stop operations only) +- **BMC Network**: Bastion proxy must route to BMC management network (start/stop operations only) + +**Note**: Status operations use IBM Cloud CLI directly and have no bastion or VM dependencies. + +## Output + +When running status checks, the role provides: +- **VM (Bastion) Status**: ID, hostname, IPs, CPU/memory, power state +- **BM Server Status**: ID, hostname, datacenter, IPs, hardware specs +- Current power state and hardware status for both systems +- Operating system information +- Comprehensive infrastructure overview +- User-friendly status messages through `agnosticd_user_info` + +## Power Management Considerations + +### Power State Detection Limitations + +**IBM Cloud Classic API Limitations**: +- **VM Power State**: Exposed directly via `powerState.keyName` field (RUNNING/HALTED) +- **BM Power State**: NOT exposed via IBM Cloud CLI API - must be inferred from hardware status + +**BM Power State Inference**: +- `ACTIVE` hardware status → Likely powered on 🟢 +- `PROVISIONING` hardware status → Boot/provisioning in progress ⏳ +- `MAINTENANCE` hardware status → Under maintenance 🔧 +- Other statuses → Power state unknown ⚠️ + +**For Exact BM Power Status**: Use BMC/Redfish API operations (start/stop actions) which provide real power state through bastion proxy. + +### Known Issues with Stop Operations + +Some IBM Cloud Classic bare metal servers may not respond to remote power-off commands even when the command appears to succeed. This can happen due to: + +- **Server Configuration**: Power management disabled or restricted in server settings +- **Running Workloads**: Critical services preventing graceful shutdown +- **BMC/IPMI Issues**: Baseboard management controller not responding properly +- **Policy Restrictions**: IBM Cloud policies preventing remote power control + +### Troubleshooting BMC Operations + +When BMC operations fail, check these common issues: + +#### 1. **Bastion VM Dependency Issues** +- **Bastion not running**: Ensure bastion VM is powered on +- **SSH connectivity**: Verify SSH access to bastion host (port 22) +- **Proxy service**: Check proxy service is running on bastion for BMC routing + +#### 2. **BMC Communication Problems** +- **Network routing**: Verify bastion proxy routes to BMC management network +- **BMC credentials**: Check management username/password are correct +- **BMC responsiveness**: BMC may need time to respond to Redfish API calls +- **HTTPS connectivity**: Ensure bastion can reach BMC via HTTPS + +#### 3. **Power Operation Failures** +- **Manual verification**: Check status via Redfish API calls on bastion +- **Alternative methods**: Use server console via IBM Cloud web interface +- **Support escalation**: Submit IBM Cloud support ticket with BMC details +- **Direct server access**: SSH to server and run `sudo shutdown -h now` + +#### 4. **Dependency Sequence Issues** +- **Start problems**: VM must be running before BM operations (VM→BM) +- **Stop problems**: BM should be stopped before VM (BM→VM) +- **Status checks**: VM status affects BMC proxy availability + +### Manual BMC Commands + +If the role fails, you can manually execute Redfish API calls on the bastion: + +```bash +# Check power status +curl -k -u : -X GET https:///redfish/v1/Systems/1 + +# Power on +curl -k -u : -X POST https:///redfish/v1/Systems/1/Actions/ComputerSystem.Reset \ + -H "Content-Type: application/json" \ + -d '{"ResetType": "On"}' + +# Power off (graceful shutdown) +curl -k -u : -X POST https:///redfish/v1/Systems/1/Actions/ComputerSystem.Reset \ + -H "Content-Type: application/json" \ + -d '{"ResetType": "GracefulShutdown"}' + +# Force power off (if needed) +curl -k -u : -X POST https:///redfish/v1/Systems/1/Actions/ComputerSystem.Reset \ + -H "Content-Type: application/json" \ + -d '{"ResetType": "ForceOff"}' +``` + +### Start Operation Considerations + +Power-on operations may take time to complete due to: + +- **Boot Process**: Servers may take 5-15 minutes to fully boot and show ACTIVE status +- **Hardware Initialization**: BIOS/UEFI and hardware component initialization +- **Operating System Load**: OS boot process and service startup +- **Network Configuration**: Network interfaces and routing setup + +### Smart Operation Features + +Both start and stop operations include intelligent behavior: + +- **Pre-operation Status Check**: Verifies current state before executing commands +- **Skip Unnecessary Operations**: Won't start already ACTIVE servers or stop already INACTIVE servers +- **Wait Periods**: Appropriate wait times for operations to take effect (15s for start, 10s for stop) +- **Verification**: Post-operation status checks to confirm success +- **Manual Guidance**: Provides clear instructions for manual intervention when needed + +### Power Commands Used + +- **Status Check**: `GET https:///redfish/v1/Systems/1` +- **Start**: `POST https:///redfish/v1/Systems/1/Actions/ComputerSystem.Reset` with `{"ResetType": "On"}` +- **Stop**: `POST https:///redfish/v1/Systems/1/Actions/ComputerSystem.Reset` with `{"ResetType": "GracefulShutdown"}` +- **Manual Force-off**: `POST https:///redfish/v1/Systems/1/Actions/ComputerSystem.Reset` with `{"ResetType": "ForceOff"}` (user-initiated when needed) + +All commands executed on bastion host through SSH delegation for secure proxy access. + +## Error Handling + +The role includes comprehensive validation: +- Checks for required `ibm_cloud_server_id` +- Validates `ibm_cloud_api_key` presence +- Handles IBM Cloud CLI command failures gracefully +- Verifies power state changes after operations +- Provides detailed error messages for troubleshooting +- Offers manual alternatives when power commands don't take effect + +## Example Output + +``` +🏗️ IBM Cloud Classic Infrastructure Status: + +🖥️ Virtual Machine (Bastion): +VM ID: 151802381 +Hostname: bastion-aaaa +FQDN: bastion-aaaa.example.com +Public IP: 169.62.251.23 +Private IP: 10.73.77.235 +CPU: 1 cores +Memory: 2048 MB +Power State: HALTED +VM Status: 🔴 STOPPED + +⚙️ Bare Metal Server: +Server ID: 3389586 +Hostname: dal13-bm02 +FQDN: dal13-bm02.example.com +Datacenter: dal13 +Public IP: 169.62.251.20 +Private IP: 10.73.77.224 + +🔧 Hardware Details: +CPU: 20 cores +Memory: 192 GB +OS: Ubuntu 20.04 LTS + +⚡ Power Status: +Current State: Inferred from hardware status +Status: 🟢 LIKELY POWERED ON (Hardware Status: ACTIVE) +Method: IBM Cloud CLI (direct API access) + +📊 Hardware Status: +Overall Status: ACTIVE +System Health: ✅ Active and operational + +🔗 Infrastructure Overview: +- VM provides BMC proxy access for BM power operations +- Status operations use direct IBM Cloud CLI (no dependencies) +- Both VM and BM can be managed independently via their respective APIs +``` \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/defaults/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/defaults/main.yml new file mode 100644 index 00000000000..bf5f5bcea63 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/defaults/main.yml @@ -0,0 +1,14 @@ +--- +# Default variables for IBM Cloud Classic Baremetal lifecycle management + +# IBM Cloud API key for authentication (required) +# ibm_cloud_api_key: "YOUR_IBM_CLOUD_API_KEY" + +# IBM Cloud server ID for the bare metal server (required) +# ibm_cloud_server_id: "YOUR_SERVER_ID" + +# Default action if not specified +ACTION: "status" + +# Default verbosity level +verbosity: 0 \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/discover_vm_inventory.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/discover_vm_inventory.yml new file mode 100644 index 00000000000..c9eaff5559d --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/discover_vm_inventory.yml @@ -0,0 +1,47 @@ +--- +- name: List existing IBM Cloud Classic VMs + ansible.builtin.command: ibmcloud sl vs list --output json + register: existing_vms_result + changed_when: false + failed_when: false + +- name: Parse existing VMs list + ansible.builtin.set_fact: + existing_vms: "{{ existing_vms_result.stdout | from_json }}" + when: + - existing_vms_result.rc == 0 + - existing_vms_result.stdout != "" + +- name: Set empty VMs list if no output or command failed + ansible.builtin.set_fact: + existing_vms: [] + when: + - existing_vms_result.rc != 0 or existing_vms_result.stdout == "" + +- name: Filter VMs that might be bastion hosts + ansible.builtin.set_fact: + potential_bastions: "{{ existing_vms | selectattr('hostname', 'match', '.*bastion.*|.*bast.*') | list }}" + when: existing_vms is defined + +- name: Add discovered bastion VMs to inventory + ansible.builtin.add_host: + name: "{{ item.hostname }}" + groups: "bastions" + ansible_host: "{{ item.primaryIpAddress }}" + ansible_user: "{{ ssh_user | default('root') }}" + ansible_ssh_private_key_file: "{{ env_authorized_key_path }}" + vm_id: "{{ item.id }}" + vm_hostname: "{{ item.hostname }}" + vm_domain: "{{ item.domain }}" + vm_public_ip: "{{ item.primaryIpAddress }}" + vm_private_ip: "{{ item.primaryBackendIpAddress | default('') }}" + vm_status: "{{ item.status | default('unknown') }}" + vm_datacenter: "{{ item.datacenter.name | default('unknown') }}" + vm_cores: "{{ item.maxCpu | default(0) }}" + vm_memory: "{{ item.maxMemory | default(0) }}" + loop: "{{ potential_bastions }}" + loop_control: + label: "{{ item.hostname }}" + when: + - potential_bastions is defined + - potential_bastions | length > 0 \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/initialize.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/initialize.yml new file mode 100755 index 00000000000..4e93566c309 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/initialize.yml @@ -0,0 +1,22 @@ +--- +- name: Validate IBM Cloud API key is provided + ansible.builtin.fail: + msg: "ibm_cloud_api_key is required. Please provide it as an extra var: -e ibm_cloud_api_key=YOUR_API_KEY" + when: ibm_cloud_api_key is not defined or ibm_cloud_api_key == "" + +- name: Validate IBM Cloud server ID is provided + ansible.builtin.fail: + msg: "ibm_cloud_server_id is required. Please provide it as an extra var: -e ibm_cloud_server_id=YOUR_SERVER_ID" + when: ibm_cloud_server_id is not defined or ibm_cloud_server_id == "" + +- name: Locate environment authorized key + include_role: + name: "locate_env_authorized_key" + +- name: Run baremetal info role + include_role: + name: "infra-ibm-cloud-classic-bm-info" + vars: + ibm_cloud_server_id: "{{ ibm_cloud_server_id }}" + save_to_file: false + display_results: true \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/main.yml new file mode 100755 index 00000000000..e5dedde57ed --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/main.yml @@ -0,0 +1,97 @@ +--- +- include_tasks: initialize.yml + +- when: + - ACTION == 'start' + block: + - include_tasks: discover_vm_inventory.yml + - include_tasks: start_vm_dependency.yml + - include_tasks: wait_for_bastion_https.yml + - name: Power on BM server via Redfish Power Management (HAProxy) + include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "{{ hostvars[groups['bastions'][0]]['ansible_host'] }}" + bmc_username: "{{ bm_server_info.remote_mgmt_user }}" + bmc_password: "{{ bm_server_info.remote_mgmt_password }}" + power_action: "on" + when: + - groups['bastions'] is defined + - groups['bastions'] | length > 0 + - bm_server_info is defined + - bm_server_info.remote_mgmt_ip is defined + - bm_server_info.remote_mgmt_user is defined + - bm_server_info.remote_mgmt_password is defined + +- when: + - ACTION == 'stop' + block: + - include_tasks: discover_vm_inventory.yml + - include_tasks: start_vm_dependency.yml + - include_tasks: wait_for_bastion_https.yml + - name: Check current BM power state before stop operation + ansible.builtin.uri: + url: "https://{{ hostvars[groups['bastions'][0]]['ansible_host'] }}/redfish/v1/Systems/1" + method: GET + user: "{{ bm_server_info.remote_mgmt_user }}" + password: "{{ bm_server_info.remote_mgmt_password }}" + validate_certs: false + force_basic_auth: true + status_code: 200 + timeout: 30 + register: bm_pre_stop_power_status + failed_when: false + when: + - groups['bastions'] is defined + - groups['bastions'] | length > 0 + - bm_server_info is defined + - bm_server_info.remote_mgmt_ip is defined + - bm_server_info.remote_mgmt_user is defined + - bm_server_info.remote_mgmt_password is defined + - name: Power off BM server via Redfish Power Management (HAProxy) + include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "{{ hostvars[groups['bastions'][0]]['ansible_host'] }}" + bmc_username: "{{ bm_server_info.remote_mgmt_user }}" + bmc_password: "{{ bm_server_info.remote_mgmt_password }}" + power_action: "off" + when: + - groups['bastions'] is defined + - groups['bastions'] | length > 0 + - bm_server_info is defined + - bm_server_info.remote_mgmt_ip is defined + - bm_server_info.remote_mgmt_user is defined + - bm_server_info.remote_mgmt_password is defined + - bm_pre_stop_power_status is succeeded + - bm_pre_stop_power_status.json.PowerState == "On" + - name: Display stop completion if already off + agnosticd_user_info: + msg: | + 🔌 BM Server {{ bm_server_info.hostname | default(bm_server_info.server_id) }} already powered off ✅ + Bastion VM remains running for BMC access + when: + - bm_pre_stop_power_status is succeeded + - bm_pre_stop_power_status.json.PowerState == "Off" + +- when: + - ACTION == 'status' + block: + - include_tasks: discover_vm_inventory.yml + - include_tasks: start_vm_dependency.yml + - include_tasks: wait_for_bastion_https.yml + - name: Get BM power status via Redfish Power Management (HAProxy) + include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "{{ hostvars[groups['bastions'][0]]['ansible_host'] }}" + bmc_username: "{{ bm_server_info.remote_mgmt_user }}" + bmc_password: "{{ bm_server_info.remote_mgmt_password }}" + power_action: "status" + when: + - groups['bastions'] is defined + - groups['bastions'] | length > 0 + - bm_server_info is defined + - bm_server_info.remote_mgmt_ip is defined + - bm_server_info.remote_mgmt_user is defined + - bm_server_info.remote_mgmt_password is defined \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/start_vm_dependency.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/start_vm_dependency.yml new file mode 100644 index 00000000000..a668ec9cc3f --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/start_vm_dependency.yml @@ -0,0 +1,53 @@ +--- +- name: Get bastion VM detailed status + ansible.builtin.command: ibmcloud sl vs detail {{ hostvars[groups['bastions'][0]].vm_id }} --output json + register: bastion_vm_detail_check + changed_when: false + failed_when: false + when: + - groups['bastions'] is defined + - groups['bastions'] | length > 0 + +- name: Parse bastion VM detailed status + ansible.builtin.set_fact: + bastion_vm_power_state: "{{ (bastion_vm_detail_check.stdout | from_json).powerState.keyName }}" + when: + - bastion_vm_detail_check is defined + - bastion_vm_detail_check.rc == 0 + - bastion_vm_detail_check.stdout != "" + +- name: Start bastion VM if not running + ansible.builtin.command: ibmcloud sl vs power-on {{ hostvars[groups['bastions'][0]].vm_id }} --force + register: bastion_start_result + changed_when: bastion_start_result.rc == 0 + failed_when: false + when: + - bastion_vm_power_state is defined + - bastion_vm_power_state != "RUNNING" + +- name: Wait for VM to start up + ansible.builtin.pause: + seconds: 30 + prompt: "Waiting for bastion VM to start up..." + when: + - bastion_start_result is defined + - bastion_start_result.rc is defined + - bastion_start_result.rc == 0 + +- name: Check bastion VM status after start attempt + ansible.builtin.command: ibmcloud sl vs detail {{ hostvars[groups['bastions'][0]].vm_id }} --output json + register: bastion_vm_post_start_check + changed_when: false + failed_when: false + when: + - groups['bastions'] is defined + - groups['bastions'] | length > 0 + - bastion_vm_power_state is defined + +- name: Parse post-start bastion VM status + ansible.builtin.set_fact: + bastion_vm_final_state: "{{ (bastion_vm_post_start_check.stdout | from_json).powerState.keyName }}" + when: + - bastion_vm_post_start_check is defined + - bastion_vm_post_start_check.rc == 0 + - bastion_vm_post_start_check.stdout != "" \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/wait_for_bastion_https.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/wait_for_bastion_https.yml new file mode 100644 index 00000000000..c4eca968928 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-lifecycle/tasks/wait_for_bastion_https.yml @@ -0,0 +1,42 @@ +--- +- name: Wait for bastion HAProxy HTTPS service to be ready + ansible.builtin.wait_for: + host: "{{ hostvars[groups['bastions'][0]]['ansible_host'] }}" + port: 443 + delay: 5 + timeout: 180 + state: started + register: https_wait_result + when: + - groups['bastions'] is defined + - groups['bastions'] | length > 0 + +- name: Test HTTPS connection to bastion HAProxy + ansible.builtin.uri: + url: "https://{{ hostvars[groups['bastions'][0]]['ansible_host'] }}/redfish/v1/" + method: GET + user: "{{ bm_server_info.remote_mgmt_user }}" + password: "{{ bm_server_info.remote_mgmt_password }}" + validate_certs: false + force_basic_auth: true + status_code: [200, 401, 403] # Allow auth errors - we just want connectivity + timeout: 30 + register: https_test_result + retries: 3 + delay: 10 + until: https_test_result.status in [200, 401, 403] + when: + - https_wait_result is succeeded + - bm_server_info is defined + - bm_server_info.remote_mgmt_user is defined + - bm_server_info.remote_mgmt_password is defined + +- name: Fail if HAProxy HTTPS is not responding + ansible.builtin.fail: + msg: | + ❌ HAProxy HTTPS connectivity failed after timeout + + Host: {{ hostvars[groups['bastions'][0]]['ansible_host'] }} + Port: 443 (HTTPS) + when: + - https_wait_result is failed or https_test_result.status not in [200, 401, 403] \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/README.md b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/README.md new file mode 100644 index 00000000000..4ac9bfec203 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/README.md @@ -0,0 +1,360 @@ +# IBM Classic Baremetal PXE Server Infrastructure Role + +This role configures a PXE boot server on the bastion host for a single IBM Classic baremetal server, enabling network-based installation of RHEL 10. It is designed to work with AgnosticD infrastructure deployments and uses server information from the `infra-ibm-cloud-classic-bm-info` role. + +## Overview + +The role sets up the following services on the bastion host: +- DHCP server for network boot coordination using the bastion's private IP +- TFTP server for boot file distribution +- HTTP server for OS installation files +- HAProxy for BMC access (optional) +- Kickstart file for automated installation of the target server + +## Requirements + +### System Requirements +- RHEL 8, 9, or 10 system to act as PXE server (bastion host) +- Server information from `infra-ibm-cloud-classic-bm-info` role (`bm_server_info`) +- Network access to download RHEL ISO +- Proper network configuration with separate private and public interfaces +- Sufficient disk space for ISO storage and boot files +- Root or sudo access for system configuration + +### Python Requirements (Ansible Control Node) +No additional Python packages required - the role uses built-in Ansible filters and standard system tools. + +### Ansible Collection Requirements +- `ansible.posix` >= 1.3.0 - Required for mount module +- `community.general` >= 4.0.0 - Required for various modules + +Install with: +```bash +ansible-galaxy collection install -r requirements.yml +``` + +## Role Variables + +### Network Configuration + +| Variable | Description | Default | +|----------|-------------|---------| +| `pxe_server_private_ip` | **Auto-discovered** from interface with private IP | Auto-discovered | +| `pxe_server_public_ip` | **Auto-discovered** from interface with public IP | Auto-discovered | +| `discovered_private_interface` | **Auto-discovered** private interface name | Auto-discovered | +| `discovered_public_interface` | **Auto-discovered** public interface name | Auto-discovered | +| `domain_name` | Domain name for DHCP clients | `example.com` | +| `secondary_dns` | Secondary DNS server | `8.8.8.8` | + +**Note**: The `pxe_server_private_ip` and `pxe_server_public_ip` are automatically discovered by examining all network interfaces on the bastion host and identifying which interfaces have private vs public IP addresses. The private IP address is used for DHCP server configuration, TFTP next-server, and HTTP repository access. Public network configuration is no longer required as virtual interfaces are not created. + +### ISO and Installation Configuration + +| Variable | Description | Default | +|----------|-------------|---------| +| `rhel_iso_url` | URL to RHEL 10 ISO | **Required** | +| `rhel_iso_username` | Username for ISO download (optional) | undefined | +| `rhel_iso_password` | Password for ISO download (optional) | undefined | +| `iso_storage_path` | Local path for ISO storage | `/var/images` | +| `iso_mount_path` | ISO mount point | `/mnt/rhel10` | + +### Boot Configuration + +| Variable | Description | Default | +|----------|-------------|---------| +| `dhcp_filename` | EFI boot file name | `rhel10/EFI/BOOT/BOOTX64.EFI` | +| `grub_timeout` | GRUB menu timeout | `0` | +| `grub_menu_title` | GRUB menu title | `RHEL 10` | +| `grub_kernel_params` | Kernel parameters | `ip=dhcp inst.text inst.nosave=all` | + +### Kickstart Configuration + +| Variable | Description | Default | +|----------|-------------|---------| +| `kickstart_satellite_url` | Satellite server URL | Empty | +| `kickstart_activation_key` | Satellite activation key | Empty | +| `kickstart_auth_token` | Satellite auth token | Empty | +| `kickstart_user_password` | Root password (from XClarity deployment) | `{{ generated_password }}` | +| `kickstart_ssh_key` | SSH public key for root | Empty | +| `generated_password` | Auto-generated password from XClarity deployment | **Required** | + +### Server Configuration + +The role requires server information from the `infra-ibm-cloud-classic-bm-info` role via the `bm_server_info` variable: + +```yaml +# Generated by infra-ibm-cloud-classic-bm-info role +bm_server_info: + server_id: "12345678" + private_ip: "10.187.89.217" + public_ip: "169.62.186.130" + private_mac_address: "3c:fd:fe:87:f0:20" + public_mac_address: "3c:fd:fe:87:f0:21" + remote_mgmt_ip: "10.187.89.233" + remote_mgmt_user: "admin" + remote_mgmt_password: "password" + private_vlan_id: "1234" + public_vlan_id: "5678" +``` + +### Network Configuration + +You can customize the network configuration for the server using these variables: + +| Variable | Description | Default | +|----------|-------------|---------| +| `bm_server_private_netmask` | Subnet mask for private network | `255.255.255.0` | +| `bm_server_public_netmask` | Subnet mask for public network | `255.255.255.0` | +| `bm_server_public_gateway` | Public network gateway | Empty | +| `bm_server_bonded_network` | Whether to use bonded networking | `false` | + +### MAC-to-IP Mapping + +The role uses the MAC address (`private_mac_address`) from `bm_server_info` to create a DHCP static reservation that assigns the specified private IP address to the server. When the server boots via PXE, it will always receive the same IP address based on its MAC address, ensuring consistent network configuration across reboots and installations. + +## Files and Directories + +The role uses the following filesystem layout: + +``` +/var/images/ # ISO storage +/mnt/rhel10/ # ISO mount point +/var/lib/tftpboot/ # TFTP boot files +/var/www/html/ # HTTP served files +/var/www/html/ks/ # Kickstart files +/etc/dhcp/dhcpd.conf # DHCP configuration +/etc/haproxy/haproxy.cfg # HAProxy configuration +``` + +## Services + +The role manages these system services: + +- `dhcpd` - DHCP server for network boot +- `httpd` - HTTP server for file distribution +- `tftp` - TFTP server for boot files +- `haproxy` - Proxy for BMC access (optional) + +### HAProxy Configuration + +HAProxy is used to proxy HTTPS and console access to the BMC (Baseboard Management Controller). +The service is always installed and must start successfully for the deployment to continue. + +#### HAProxy Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `haproxy_services` | List of services to proxy | `[{name: "https", port: 443}, {name: "console", port: 3900}]` | + +#### HAProxy Configuration Conditions + +HAProxy will only be configured if: +- `haproxy_services` list is not empty +- `bm_server_info.remote_mgmt_ip` is defined and not empty + +#### HAProxy Naming Convention + +The HAProxy configuration uses the following naming pattern: +- **Frontend**: `{bastion_hostname}-{service_name}-f` (e.g., `bastion-aaaa-01-https-f`) +- **Backend**: `{bastion_hostname}-{service_name}-b` (e.g., `bastion-aaaa-01-https-b`) +- **Backend Server**: `{baremetal_hostname}` (from `bm_server_info.hostname`) + +If HAProxy is not configured due to missing BMC information, the service will still be installed but may fail to start, causing the deployment to fail. + +## Usage + +### Redfish Baremetal Integration (Recommended) + +This role is designed to work with the Redfish baremetal infrastructure deployment. To use it: + +```bash +# 1. Enable PXE server in your configuration +ansible-playbook ansible/cloud_providers/redfish_baremetal_infrastructure_deployment.yml \ + -e ibm_cloud_server_id=YOUR_SERVER_ID \ + -e setup_pxe_server=true \ + -e rhel_iso_url="https://your-iso-server.com/rhel-10.0-x86_64-dvd.iso" +``` + +The deployment automatically: +1. Runs `infra-ibm-cloud-classic-bm-info` to gather server information +2. Deploys bastion host +3. Passes `bm_server_info` and other variables to bastion +4. Runs PXE server setup on bastion using the gathered server information +5. Configures DHCP, TFTP, HTTP services with MAC-to-IP mappings + +### Standalone Usage + +```yaml +- name: Setup PXE server using IBM Cloud server info + hosts: pxe_server + gather_facts: true + become: true + tasks: + - name: Gather server information + include_role: + name: infra-ibm-cloud-classic-bm-info + vars: + ibm_cloud_server_id: "12345678" + ibm_cloud_api_key: "{{ ibm_cloud_api_key }}" + + - name: Configure PXE server + include_role: + name: infra-ibm-classic-bm-pxe-server + vars: + rhel_iso_url: "https://example.com/rhel-10.0-x86_64-dvd.iso" + rhel_iso_username: "your-username" # Optional + rhel_iso_password: "your-password" # Optional + generated_password: "your-secure-password" + pxe_server_private_interface: "ens192" + # bm_server_info is automatically available from the info role + bm_server_private_netmask: "255.255.255.192" # Custom netmask + bm_server_public_netmask: "255.255.255.240" +``` + +### With Custom ISO + +```yaml +- name: Setup PXE server with custom ISO + include_role: + name: infra-ibm-classic-bm-pxe-server + vars: + rhel_iso_url: "file:///local/path/to/rhel-10-custom.iso" + rhel_iso_username: "" + rhel_iso_password: "" + generated_password: "{{ generated_password }}" + # bm_server_info must be provided separately +``` + +### With Satellite Integration + +```yaml +- name: Setup PXE server with Satellite + include_role: + name: infra-ibm-classic-bm-pxe-server + vars: + rhel_iso_url: "https://example.com/rhel-10.0-x86_64-dvd.iso" + generated_password: "{{ generated_password }}" + kickstart_satellite_url: "https://satellite.example.com/register" + kickstart_activation_key: "rhel-10-key" + kickstart_auth_token: "{{ satellite_auth_token }}" + kickstart_ssh_key: "{{ admin_ssh_public_key }}" + # bm_server_info must be provided separately +``` + +## Integration with AgnosticD + +This role is designed to integrate with AgnosticD infrastructure workflows: + +1. **Placement**: Located in `ansible/roles-infra/` following AgnosticD conventions +2. **Variables**: Uses AgnosticD-compatible variable patterns +3. **Handlers**: Includes proper service restart handlers +4. **Error Handling**: Comprehensive validation and error messages +5. **Idempotency**: Can be run multiple times safely +6. **Redfish Integration**: Designed to work with `redfish_baremetal_infrastructure_deployment.yml` +7. **Bastion Deployment**: Runs on the bastion host deployed by the Redfish infrastructure +8. **Auto-Discovery**: Automatically uses server information from `infra-ibm-cloud-classic-bm-info` + +### Example Integration + +```yaml +# Redfish deployment automatically integrates this role +# The role runs on the bastion host with bm_server_info passed from localhost + +# In redfish_baremetal_infrastructure_deployment.yml: +- name: Step 001.2 Configure PXE Server on Bastion Host + hosts: bastion + gather_facts: true + become: true + tasks: + - name: Setup PXE server on bastion host + include_role: + name: "infra-ibm-classic-bm-pxe-server" + vars: "{{ hostvars['localhost']['pxe_server_vars'] }}" + when: + - hostvars['localhost']['setup_pxe_server'] | default(false) + - hostvars['localhost']['pxe_server_vars'] is defined + - hostvars['localhost']['pxe_server_vars']['bm_server_info'] is defined +``` + +## Features + +- **Automated Setup**: Complete PXE server configuration +- **Server-Specific Kickstart**: Individual kickstart file for the server +- **Service Management**: Proper service start/stop/restart handling +- **Backup Support**: Configuration backup before changes +- **Validation**: Comprehensive variable validation +- **Idempotent**: Safe to run multiple times +- **Error Handling**: Detailed error messages and recovery +- **Single Server Focus**: Simplified configuration for one baremetal server + +## Architecture + +The PXE server provides: + +1. **DHCP**: IP allocation and boot configuration with MAC-to-IP static reservations +2. **TFTP**: EFI boot file distribution +3. **HTTP**: OS installation files and kickstart delivery +4. **HAProxy**: Optional BMC access routing + +### DHCP Configuration + +The role generates a DHCP configuration (`/etc/dhcp/dhcpd.conf`) that includes: + +- **Static Reservation**: The server gets a dedicated IP based on its MAC address +- **Boot Configuration**: Points to the TFTP server for PXE boot files +- **DNS Configuration**: Uses the bastion host as the primary DNS server + +Example generated DHCP server entry: +``` +# 12345678 - MAC: 3c:fd:fe:87:f0:20 -> IP: 10.187.89.217 +host 12345678 { + hardware ethernet 3c:fd:fe:87:f0:20; + fixed-address 10.187.89.217; + option host-name "12345678"; +} +``` + +This ensures that when the baremetal server boots, it always receives the same IP address based on its MAC address, enabling consistent network configuration and proper kickstart file delivery. + +## Troubleshooting + +### Common Issues + +1. **Network Interface Not Found**: Verify interface names with `ip link show` +2. **ISO Download Fails**: Check URL, credentials, and network connectivity +3. **Service Start Failures**: Review journald logs: `journalctl -u ` +4. **Boot Failures**: Verify DHCP/TFTP configuration and network routing + +### Debug Commands + +```bash +# Check DHCP leases +sudo cat /var/lib/dhcpd/dhcpd.leases + +# Verify TFTP files +sudo ls -la /var/lib/tftpboot/ + +# Check HTTP accessibility +curl http://pxe-server-ip/rhel10/ + +# Monitor services +sudo systemctl status dhcpd httpd tftp haproxy +``` + +## Security Considerations + +- **ISO Credentials**: Store `rhel_iso_username` and `rhel_iso_password` in encrypted vaults or secure variable files +- **Network Security**: Configure firewall rules for DHCP (67/68), TFTP (69), HTTP (80), and HAProxy ports +- **Access Control**: Limit network access to trusted subnets +- **Kickstart Security**: Use secure kickstart configuration with encrypted passwords +- **Local Storage**: Consider using local ISO files (`file://` URLs) instead of downloading from internet +- **System Updates**: Regularly update the PXE server system and services +- **Variable Management**: Never commit ISO credentials to version control + +## License + +This role is part of the AgnosticD project and follows the same license terms. + +## Author Information + +Created as part of the AgnosticD project for managing IBM Classic baremetal infrastructure deployments. \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/defaults/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/defaults/main.yml new file mode 100644 index 00000000000..3982e5d8191 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/defaults/main.yml @@ -0,0 +1,84 @@ +--- +# defaults file for infra-ibm-classic-bm-pxe-server role + +# RHEL ISO Configuration - These must be provided by the user/config +# rhel_iso_url: "" # Required - URL to RHEL 10 ISO +# rhel_iso_username: "" # Optional - Username for ISO download +# rhel_iso_password: "" # Optional - Password for ISO download +rhel_iso_filename: "rhel-10.0-x86_64-dvd.iso" # ISO filename for download and storage + +# Server Configuration +# The role requires bm_server_info from infra-ibm-cloud-classic-bm-info role +# This contains all necessary server information including MAC addresses and IP addresses + +# Network Configuration +# The PXE server private and public IPs are automatically discovered by examining +# all network interfaces and identifying which ones have private vs public IP addresses. +# The discovered IPs will be used for: +# - DHCP server configuration (private IP as DNS server and next-server) +# - TFTP server for PXE boot (private IP) +# - HTTP server for kickstart files and repository access (private IP) +# pxe_server_private_ip: "auto-discovered from interface with private IP" +# pxe_server_public_ip: "auto-discovered from interface with public IP" +# discovered_private_interface: "auto-discovered interface name with private IP" +# discovered_public_interface: "auto-discovered interface name with public IP" + +# Default network configuration for bm_server_info conversion +# These are used when converting bm_server_info to host_configuration format +bm_server_private_netmask: "255.255.255.0" +bm_server_public_netmask: "255.255.255.0" +bm_server_public_gateway: "" +bm_server_bonded_network: false + +# Domain and DNS Configuration +domain_name: "{{ domain_name | default('example.com') }}" +secondary_dns: "8.8.8.8" + +# DHCP Configuration +dhcp_filename: "rhel10/EFI/BOOT/BOOTX64.EFI" + +# GRUB Configuration +grub_timeout: 0 +grub_menu_title: "RHEL 10" +grub_kernel_path: "images/RHEL-10/vmlinuz" +grub_initrd_path: "images/RHEL-10/initrd.img" +grub_kernel_params: "ip=dhcp inst.text inst.nosave=all net.ifnames=0 biosdevname=0" + +# Kickstart Configuration +kickstart_satellite_url: "{{ kickstart_satellite_url | default('') }}" +kickstart_activation_key: "{{ kickstart_activation_key | default('') }}" +kickstart_auth_token: "{{ kickstart_auth_token | default('') }}" +kickstart_user_password: "{{ generated_password | default('') }}" + +# SSH Key Configuration +kickstart_ssh_key: "{{ ansible_ssh_user_public_key | default('') }}" + +# Default HAProxy Services +haproxy_services: + - name: "https" + port: 443 + - name: "console" + port: 3900 + +# File paths +iso_storage_path: "/var/images" +iso_mount_path: "/mnt/rhel10" +tftp_root: "/var/lib/tftpboot" +web_root: "/var/www/html" +kickstart_path: "/var/www/html/ks" + +# Services to enable +required_packages: + - dhcp-server + - httpd + - tftp-server + - haproxy + +pxe_services: + - httpd + - dhcpd + - tftp + - haproxy + +# Backup configuration +backup_configs: true \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/handlers/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/handlers/main.yml new file mode 100644 index 00000000000..37403afece0 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/handlers/main.yml @@ -0,0 +1,26 @@ +--- +# handlers file for infra-ibm-classic-bm-pxe-server role + +- name: restart dhcpd + ansible.builtin.systemd: + name: dhcpd + state: restarted + become: true + +- name: restart httpd + ansible.builtin.systemd: + name: httpd + state: restarted + become: true + +- name: restart tftp + ansible.builtin.systemd: + name: tftp + state: restarted + become: true + +- name: restart haproxy + ansible.builtin.systemd: + name: haproxy + state: restarted + become: true \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/meta/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/meta/main.yml new file mode 100644 index 00000000000..769e76f060b --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/meta/main.yml @@ -0,0 +1,27 @@ +--- +galaxy_info: + author: AgnosticD Team + description: Infrastructure role for setting up PXE server for IBM Classic baremetal systems + license: BSD + min_ansible_version: 2.9 + platforms: + - name: EL + versions: + - 8 + - 9 + - 10 + galaxy_tags: + - cloud + - infrastructure + - ibm + - classic + - baremetal + - pxe + - agnosticd + +dependencies: [] + +collections: + - ansible.builtin + - ansible.posix + - community.general \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/requirements.txt b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/requirements.txt new file mode 100644 index 00000000000..1f1b0752c8f --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/requirements.txt @@ -0,0 +1,2 @@ +# Python requirements for infra-ibm-cloud-classic-bm-pxe-server role +# No additional Python packages required - uses built-in Ansible filters and standard system tools \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/requirements.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/requirements.yml new file mode 100644 index 00000000000..c9835e08969 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/requirements.yml @@ -0,0 +1,9 @@ +--- +# Ansible collection requirements for infra-ibm-cloud-classic-bm-pxe-server role +# Install with: ansible-galaxy collection install -r requirements.yml + +collections: + - name: ansible.posix + version: ">=1.3.0" + - name: community.general + version: ">=4.0.0" \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/tasks/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/tasks/main.yml new file mode 100644 index 00000000000..b9014b4daf8 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/tasks/main.yml @@ -0,0 +1,263 @@ +--- +# tasks file for infra-ibm-classic-bm-pxe-server role + +- name: Validate required variables + ansible.builtin.assert: + that: + - bm_server_info is defined + - pxe_server_private_interface is defined + - rhel_iso_url is defined + - rhel_iso_url | length > 0 + - generated_password is defined + - generated_password | length > 0 + fail_msg: "Required variables are missing. Please ensure bm_server_info, private network interface, rhel_iso_url, and generated_password are defined." + +- name: Validate bm_server_info contains required fields + ansible.builtin.assert: + that: + - bm_server_info.server_id is defined + - bm_server_info.server_id | length > 0 + - bm_server_info.private_mac_address is defined + - bm_server_info.private_mac_address | length > 0 + - bm_server_info.private_ip is defined + - bm_server_info.private_ip | length > 0 + - bm_server_info.public_ip is defined + - bm_server_info.public_ip | length > 0 + - bm_server_info.remote_mgmt_ip is defined + - bm_server_info.remote_mgmt_ip | length > 0 + fail_msg: "bm_server_info is missing required fields: server_id, private_mac_address, private_ip, public_ip, remote_mgmt_ip" + +- name: Set server configuration variables + ansible.builtin.set_fact: + server_name: "{{ bm_server_info.hostname | default(bm_server_info.server_id) }}" + server_private_mac: "{{ bm_server_info.private_mac_address }}" + server_private_ip: "{{ bm_server_info.private_ip }}" + server_private_netmask: "{{ bm_server_private_netmask | default('255.255.255.0') }}" + server_public_ip: "{{ bm_server_info.public_ip }}" + server_public_netmask: "{{ bm_server_public_netmask | default('255.255.255.0') }}" + server_public_gateway: "{{ bm_server_public_gateway | default('') }}" + server_bmc_ip: "{{ bm_server_info.remote_mgmt_ip }}" + bastion_hostname: "{{ ansible_hostname }}" + baremetal_hostname: "{{ bm_server_info.hostname | default(bm_server_info.server_id) }}" + +- name: Discover network interfaces using system commands + ansible.builtin.shell: | + for iface in $(ip -4 addr show | grep -E "^[0-9]+: " | grep -v "lo:" | cut -d: -f2 | tr -d ' '); do + ip_addr=$(ip -4 addr show $iface | grep -oP 'inet \K[\d.]+' | head -1) + if [ -n "$ip_addr" ]; then + echo "$iface:$ip_addr" + fi + done + register: parsed_interfaces + when: pxe_server_private_ip is not defined + +- name: Create network interfaces from system command output + ansible.builtin.set_fact: + network_interfaces: "{{ network_interfaces | default([]) + [{'key': 'ansible_' + item.split(':')[0], 'value': {'ipv4': {'address': item.split(':')[1]}}}] }}" + loop: "{{ parsed_interfaces.stdout_lines }}" + when: + - pxe_server_private_ip is not defined + - parsed_interfaces is defined + - parsed_interfaces.stdout_lines | length > 0 + +- name: Identify private and public interfaces by IP address + ansible.builtin.set_fact: + discovered_private_interface: "{{ (private_interfaces | first).key | regex_replace('^ansible_(.*)$', '\\1') }}" + discovered_public_interface: "{{ (public_interfaces | first).key | regex_replace('^ansible_(.*)$', '\\1') if public_interfaces | length > 0 else '' }}" + vars: + private_interfaces: "{{ network_interfaces | selectattr('value.ipv4.address', 'match', '^(10\\.|172\\.(1[6-9]|2[0-9]|3[01])\\.|192\\.168\\.)') | list }}" + public_interfaces: "{{ network_interfaces | rejectattr('value.ipv4.address', 'match', '^(10\\.|172\\.(1[6-9]|2[0-9]|3[01])\\.|192\\.168\\.)') | list }}" + when: + - pxe_server_private_ip is not defined + - private_interfaces | length > 0 + +- name: Get bastion host private IP address for PXE server + ansible.builtin.set_fact: + pxe_server_private_ip: "{{ (network_interfaces | selectattr('key', 'equalto', 'ansible_' + discovered_private_interface) | first).value.ipv4.address }}" + pxe_server_public_ip: "{{ (network_interfaces | selectattr('key', 'equalto', 'ansible_' + discovered_public_interface) | first).value.ipv4.address if discovered_public_interface | length > 0 else '' }}" + when: + - pxe_server_private_ip is not defined + - discovered_private_interface is defined + +- name: Validate bastion host private IP is set + ansible.builtin.assert: + that: + - pxe_server_private_ip is defined + - pxe_server_private_ip | length > 0 + - pxe_server_private_ip | regex_search('^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$') + fail_msg: "Unable to determine bastion host private IP address from discovered interface {{ discovered_private_interface | default('unknown') }}" + +- name: Install required packages + ansible.builtin.dnf: + name: "{{ required_packages }}" + state: present + become: true + +- name: Create required directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - "{{ iso_storage_path }}" + - "{{ iso_mount_path }}" + - "{{ tftp_root }}/rhel10" + - "{{ kickstart_path }}" + - "{{ tftp_root }}/images/RHEL-10" + become: true + +- name: Check if ISO already exists (faster) + ansible.builtin.command: test -f "{{ iso_storage_path }}/{{ rhel_iso_filename }}" + register: iso_exists + failed_when: false + changed_when: false + +- name: Download RHEL 10 ISO using curl (more reliable for CloudFront) + ansible.builtin.shell: | + curl -L -o "{{ iso_storage_path }}/{{ rhel_iso_filename }}" \ + -u "{{ rhel_iso_username }}:{{ rhel_iso_password }}" \ + --connect-timeout 30 \ + --max-time 3600 \ + --retry 3 \ + --retry-delay 10 \ + --silent \ + --show-error \ + "{{ rhel_iso_url }}" + when: iso_exists.rc != 0 and (rhel_iso_username is defined and rhel_iso_username | length > 0) + become: true + register: iso_download_curl + +- name: Verify ISO file was downloaded successfully + ansible.builtin.stat: + path: "{{ iso_storage_path }}/{{ rhel_iso_filename }}" + register: iso_verify + when: iso_exists.rc != 0 + +- name: Set file permissions on downloaded ISO + ansible.builtin.file: + path: "{{ iso_storage_path }}/{{ rhel_iso_filename }}" + mode: '0644' + owner: root + group: root + when: iso_exists.rc != 0 and iso_verify.stat.exists + become: true + +- name: Check if ISO is already mounted + ansible.builtin.shell: mount | grep "{{ iso_mount_path }}" + register: mount_check + failed_when: false + changed_when: false + +- name: Add ISO mount to fstab + ansible.builtin.lineinfile: + path: /etc/fstab + line: "{{ iso_storage_path }}/{{ rhel_iso_filename }} {{ iso_mount_path }} iso9660 ro,defaults 0 0" + backup: "{{ backup_configs }}" + become: true + +- name: Reload systemd daemon + ansible.builtin.systemd: + daemon_reload: true + become: true + +- name: Mount RHEL 10 ISO + ansible.posix.mount: + path: "{{ iso_mount_path }}" + src: "{{ iso_storage_path }}/{{ rhel_iso_filename }}" + fstype: iso9660 + opts: ro,defaults + state: mounted + become: true + when: mount_check.rc != 0 + +- name: Copy EFI files to TFTP directory + ansible.builtin.copy: + src: "{{ iso_mount_path }}/EFI" + dest: "{{ tftp_root }}/rhel10/" + remote_src: true + mode: preserve + become: true + +- name: Set permissions on TFTP RHEL10 directory + ansible.builtin.file: + path: "{{ tftp_root }}/rhel10/" + mode: '0755' + recurse: true + become: true + +- name: Configure DHCP server + ansible.builtin.template: + src: dhcpd.conf.j2 + dest: /etc/dhcp/dhcpd.conf + backup: "{{ backup_configs }}" + mode: '0644' + become: true + notify: restart dhcpd + +- name: Create GRUB configuration for PXE boot + ansible.builtin.template: + src: grub.cfg.j2 + dest: "{{ tftp_root }}/rhel10/EFI/BOOT/grub.cfg" + mode: '0644' + become: true + +- name: Generate salted password hash for kickstart + ansible.builtin.set_fact: + kickstart_user_password_hash: "{{ generated_password | password_hash('sha512') }}" + when: generated_password is defined + +- name: Create kickstart file + ansible.builtin.template: + src: host-kickstart.ks.j2 + dest: "{{ kickstart_path }}/rhel10-{{ server_private_mac }}.ks" + mode: '0644' + become: true + +- name: Configure HAProxy + ansible.builtin.template: + src: haproxy.cfg.j2 + dest: /etc/haproxy/haproxy.cfg + backup: "{{ backup_configs }}" + mode: '0644' + become: true + notify: restart haproxy + when: + - haproxy_services | length > 0 + - bm_server_info.remote_mgmt_ip is defined + - bm_server_info.remote_mgmt_ip != "" + +- name: Convert HAProxy config to Unix line endings + ansible.builtin.replace: + path: /etc/haproxy/haproxy.cfg + regexp: '\r\n' + replace: '\n' + become: true + when: + - haproxy_services | length > 0 + - bm_server_info.remote_mgmt_ip is defined + - bm_server_info.remote_mgmt_ip != "" + +- name: Copy kernel and initrd files for PXE boot + ansible.builtin.copy: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + remote_src: true + loop: + - { src: "{{ iso_mount_path }}/images/pxeboot/vmlinuz", dest: "{{ tftp_root }}/images/RHEL-10/vmlinuz" } + - { src: "{{ iso_mount_path }}/images/pxeboot/initrd.img", dest: "{{ tftp_root }}/images/RHEL-10/initrd.img" } + become: true + +- name: Create symbolic link for RHEL 10 repository access via HTTP + ansible.builtin.file: + src: "{{ iso_mount_path }}" + dest: "{{ web_root }}/rhel10" + state: link + become: true + +- name: Ensure all services are enabled and started + ansible.builtin.systemd: + name: "{{ item }}" + enabled: true + state: started + loop: "{{ pxe_services }}" + become: true \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/dhcpd.conf.j2 b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/dhcpd.conf.j2 new file mode 100644 index 00000000000..ee565d45e80 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/dhcpd.conf.j2 @@ -0,0 +1,49 @@ +# dhcpd.conf +# Generated by Ansible - DO NOT EDIT MANUALLY + +# Global options +option domain-name "{{ domain_name }}"; +option domain-name-servers {{ pxe_server_private_ip }}, {{ secondary_dns }}; + +default-lease-time 600; +max-lease-time 7200; + +# If this DHCP server is the official DHCP server for the local +# network, the authoritative directive should be uncommented. +authoritative; + +# Use this to send dhcp log messages to a different log file (you also +# have to hack syslog.conf to complete the redirection). +log-facility local7; + +# Enable detailed logging for troubleshooting +ddns-update-style none; + +option architecture-type code 93 = unsigned integer 16; + +# Subnet declaration - derived from server configuration +{% if server_private_netmask == '255.255.255.0' %} +{% set network = server_private_ip | regex_replace('\\.[0-9]+$', '.0') %} +{% elif server_private_netmask == '255.255.0.0' %} +{% set network = server_private_ip | regex_replace('\\.[0-9]+\\.[0-9]+$', '.0.0') %} +{% elif server_private_netmask == '255.0.0.0' %} +{% set network = server_private_ip | regex_replace('\\.[0-9]+\\.[0-9]+\\.[0-9]+$', '.0.0.0') %} +{% else %} +{% set network = server_private_ip | regex_replace('\\.[0-9]+$', '.0') %} +{% endif %} +{% set netmask = server_private_netmask %} +subnet {{ network }} netmask {{ netmask }} { + next-server {{ pxe_server_private_ip }}; + filename "{{ dhcp_filename }}"; + + # Static IP assignment for the baremetal host + # Host gets a fixed IP based on its MAC address + group { + # {{ server_name }} - MAC: {{ server_private_mac }} -> IP: {{ server_private_ip }} + host {{ server_name | replace('-', '_') | replace('.', '_') }} { + hardware ethernet {{ server_private_mac | lower }}; + fixed-address {{ server_private_ip }}; + option host-name "{{ server_name }}"; + } + } +} \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/grub.cfg.j2 b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/grub.cfg.j2 new file mode 100644 index 00000000000..808c30f1dc4 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/grub.cfg.j2 @@ -0,0 +1,9 @@ +# GRUB Configuration for PXE Boot +# Generated by Ansible - DO NOT EDIT MANUALLY + +set timeout={{ grub_timeout }} + +menuentry '{{ grub_menu_title }}' { + linux {{ grub_kernel_path }} {{ grub_kernel_params }} inst.sshd inst.stage2=http://{{ pxe_server_private_ip }}/rhel10 inst.repo=http://{{ pxe_server_private_ip }}/rhel10 inst.ks=http://{{ pxe_server_private_ip }}/ks/rhel10-${net_default_mac}.ks + initrd {{ grub_initrd_path }} +} \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/haproxy.cfg.j2 b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/haproxy.cfg.j2 new file mode 100644 index 00000000000..e7d97687582 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/haproxy.cfg.j2 @@ -0,0 +1,34 @@ +# HAProxy Configuration +# Generated by Ansible - DO NOT EDIT MANUALLY + +global + log 127.0.0.1 local2 + chroot /var/lib/haproxy + pidfile /var/run/haproxy.pid + maxconn 4000 + user haproxy + group haproxy + daemon + +defaults + mode tcp + option tcplog + log global + option dontlognull + timeout queue 1m + timeout connect 10s + timeout client 1m + timeout server 1m + timeout http-keep-alive 10s + timeout check 10s + maxconn 3000 + +# Server-specific frontend/backend pairs +{% for service in haproxy_services %} +frontend {{ bastion_hostname }}-{{ service.name }}-f + bind {{ pxe_server_public_ip }}:{{ service.port }} + mode tcp + use_backend {{ baremetal_hostname }}-{{ service.name }}-b +backend {{ baremetal_hostname }}-{{ service.name }}-b + server {{ baremetal_hostname }} {{ server_bmc_ip }}:{{ service.port }} check +{% endfor %} \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/host-kickstart.ks.j2 b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/host-kickstart.ks.j2 new file mode 100644 index 00000000000..0f53fbd1dc2 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-bm-pxe-server/templates/host-kickstart.ks.j2 @@ -0,0 +1,59 @@ +# Complete Kickstart Configuration for {{ server_name }} +# Generated by Ansible - DO NOT EDIT MANUALLY + +cmdline + +# MGMT +#network --bootproto=dhcp --dhcpclass=anaconda-Linux --device=enp0s20f0u1u6 --ipv6=auto --no-activate + +{% if bm_server_bonded_network | default(false) %} +# Bonded Network Configuration - Static for both private and public +network --bootproto=static --device=bond0 --ip={{ server_private_ip }} --netmask={{ server_private_netmask }} --ipv6=auto --activate --bondslaves="eth1,eth3" --bondopts="mode=802.3ad,miimon=100,lacp_rate=1" +network --bootproto=static --device=bond1 --gateway={{ server_public_gateway }} --ip={{ server_public_ip }} --nameserver={{ secondary_dns }} --netmask={{ server_public_netmask }} --ipv6=auto --activate --bondslaves="eth2,eth4" --bondopts="mode=802.3ad,miimon=100,lacp_rate=1" +{% else %} +# Unbonded Network Configuration - Static for both private and public +network --bootproto=static --device=eth1 --ip={{ server_private_ip }} --netmask={{ server_private_netmask }} --ipv6=auto --activate +network --bootproto=static --device=eth2 --gateway={{ server_public_gateway }} --ip={{ server_public_ip }} --nameserver={{ secondary_dns }} --netmask={{ server_public_netmask }} --ipv6=auto --activate +# Disable unused interfaces if present +network --device=eth3 --no-activate +network --device=eth4 --no-activate +{% endif %} + +network --hostname={{ server_name }}.{{ domain_name }} + +repo --name="AppStream" --baseurl=http://{{ pxe_server_private_ip }}/rhel10/AppStream +# Use network installation +url --url="http://{{ pxe_server_private_ip }}/rhel10/" + +%packages +@^minimal-environment +%end + +firstboot --disabled + +ignoredisk --only-use=sda +bootloader --append="crashkernel=2G-64G:256M,64G-:512M" --location=mbr --boot-drive=sda + +clearpart --drives=sda --all --initlabel + +part /boot/efi --fstype="efi" --ondisk=sda --size=600 --fsoptions="umask=0077,shortname=winnt" +part /boot --fstype="xfs" --ondisk=sda --size=1024 +part / --fstype="xfs" --ondisk=sda --grow --label=root + +rootpw --lock +sshkey --username root "{{ kickstart_ssh_key }}" +user --name {{ baremetal_user_name }} --groups wheel --password "{{ kickstart_user_password_hash | default(kickstart_user_password | password_hash('sha512')) }}" --iscrypted +sshkey --username {{ baremetal_user_name }} "{{ kickstart_ssh_key }}" + +%pre +# Enable and start SSH early in installation +systemctl enable sshd +systemctl start sshd +%end + +%post +set -o pipefail && curl -sS --insecure '{{ kickstart_satellite_url }}?activation_keys={{ kickstart_activation_key }}&force=true&location_id=2&operatingsystem_id=23&organization_id=1&setup_insights=false&update_packages=false' -H 'Authorization: Bearer {{ kickstart_auth_token }}' | bash +echo "%wheel ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/nopasswd +%end + +reboot \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/.yamllint b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/.yamllint new file mode 100644 index 00000000000..19c4c6fb843 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/.yamllint @@ -0,0 +1,16 @@ +--- +extends: default +rules: + line-length: + max: 120 + level: warning + comments-indentation: disable + comments: disable + document-start: disable + truthy: disable + braces: + max-spaces-inside: 1 + level: error + brackets: + max-spaces-inside: 1 + level: error \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/README.md b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/README.md new file mode 100644 index 00000000000..ade57afe68e --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/README.md @@ -0,0 +1,1033 @@ +# IBM Cloud Classic VM Infrastructure Role + +This AgnosticD infrastructure role manages multiple Virtual Machines (VMs) in IBM Cloud Classic infrastructure using Terraform templates. + +## Features + +- **Multi-VM Deployment**: Deploy multiple VMs in a single operation +- **VM Deployment**: Creates VMs using Terraform templates from `~/create-vm-ibm-cloud` +- **VM Destruction**: Cleanly destroys VMs and associated resources +- **Inventory Creation**: Generates comprehensive Ansible inventory for deployed VMs +- **Security Groups**: Configures firewall rules for VM access +- **SSH Testing**: Validates SSH connectivity to deployed VMs +- **Automatic Setup**: Installs Terraform if not present (configurable version) +- **Flexible Configuration**: Each VM can have individual specifications and settings + +## Prerequisites + +- IBM Cloud account with Classic Infrastructure access +- IBM Cloud API key +- SSH key pair for VM access +- Terraform templates in `~/create-vm-ibm-cloud` + +## Required Variables + +```yaml +ibm_cloud_api_key: "YOUR_IBM_CLOUD_API_KEY" +output_dir: "/tmp" +security_groups: + - name: "web" + rules: + - name: "http" + description: "Allow HTTP traffic" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 80 + to_port: 80 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "https" + description: "Allow HTTPS traffic" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 443 + to_port: 443 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "db" + rules: + - name: "mysql" + description: "Allow MySQL traffic from internal network" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 3306 + to_port: 3306 + protocol: "tcp" + cidr: "10.0.0.0/8" + - name: "ssh" + rules: + - name: "ssh" + description: "Allow SSH access" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 22 + to_port: 22 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "egress" + rules: + - name: "tcp_out" + description: "Allow outbound TCP traffic" + rule_type: "egress" + ether_type: "IPv4" + protocol: "tcp" + cidr: "0.0.0.0/0" +instances: + - name: "web" + datacenter: "dal13" + count: 3 # Creates web-01, web-02, web-03 + security_groups: ["web", "ssh", "egress"] # Multiple security groups + - name: "db" + datacenter: "wdc07" + count: 2 # Creates db-01, db-02 + cores: 4 + memory: 8192 + security_groups: ["db", "ssh", "egress"] # Multiple security groups +``` + +## VM Count Configuration + +Each instance can specify a `count` parameter to create multiple VMs of the same type: + +```yaml +instances: + - name: "web" + datacenter: "dal13" + count: 3 # Creates web-01, web-02, web-03 + - name: "db" + datacenter: "wdc07" + count: 2 # Creates db-01, db-02 +``` + +**Key Points:** +- If `count` is not specified, defaults to 1 VM +- VM names are automatically suffixed with `-01`, `-02`, etc. +- Each VM in the count shares the same configuration +- `count` must be a positive integer + +## Security Group Configuration + +Security groups use a structured format with named groups and descriptive rules: + +### Security Group Structure + +```yaml +security_groups: + - name: "group_name" + rules: + - name: "rule_name" + description: "Rule description" + rule_type: "ingress" or "egress" + ether_type: "IPv4" + from_port: 80 # Optional for TCP/UDP + to_port: 80 # Optional for TCP/UDP + protocol: "tcp" # tcp, udp, icmp + cidr: "0.0.0.0/0" # CIDR block +``` + +**Field Reference:** +- `name`: Security group name (referenced by instances) +- `rules`: List of security rules for this group + - `name`: Descriptive name for the rule + - `description`: Human-readable description + - `rule_type`: "ingress" (inbound) or "egress" (outbound) + - `ether_type`: "IPv4" (IPv6 not currently supported) + - `from_port`: Starting port number (TCP/UDP only) + - `to_port`: Ending port number (TCP/UDP only) + - `protocol`: "tcp", "udp", or "icmp" + - `cidr`: CIDR notation for allowed IP ranges + +You can configure security groups in two ways: + +### 1. Global Security Group Definitions + +Define reusable security groups that can be referenced by name: + +```yaml +security_groups: + - name: "web" + rules: + - name: "http" + description: "Allow HTTP traffic" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 80 + to_port: 80 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "https" + description: "Allow HTTPS traffic" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 443 + to_port: 443 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "db" + rules: + - name: "mysql" + description: "Allow MySQL traffic from internal network" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 3306 + to_port: 3306 + protocol: "tcp" + cidr: "10.0.0.0/8" + - name: "ssh" + rules: + - name: "ssh" + description: "Allow SSH access" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 22 + to_port: 22 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "egress" + rules: + - name: "tcp_out" + description: "Allow outbound TCP traffic" + rule_type: "egress" + ether_type: "IPv4" + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "udp_out" + description: "Allow outbound UDP traffic" + rule_type: "egress" + ether_type: "IPv4" + protocol: "udp" + cidr: "0.0.0.0/0" + +instances: + - name: "web" + datacenter: "dal13" + security_group: "web" # Single security group reference + - name: "db" + datacenter: "dal13" + security_groups: ["db", "ssh", "egress"] # Multiple security groups + - name: "app" + datacenter: "dal13" + security_groups: ["web", "db", "ssh", "egress"] # Combine multiple groups +``` + +### 2. Multiple Global Security Groups + +Combine multiple global security groups for flexible configurations: + +```yaml +security_groups: + - name: "web" + rules: + - name: "http" + description: "Allow HTTP traffic" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 80 + to_port: 80 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "db" + rules: + - name: "mysql" + description: "Allow MySQL traffic from internal network" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 3306 + to_port: 3306 + protocol: "tcp" + cidr: "10.0.0.0/8" + - name: "ssh" + rules: + - name: "ssh" + description: "Allow SSH access" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 22 + to_port: 22 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "egress" + rules: + - name: "tcp_out" + description: "Allow outbound TCP traffic" + rule_type: "egress" + ether_type: "IPv4" + protocol: "tcp" + cidr: "0.0.0.0/0" + +instances: + - name: "web" + datacenter: "dal13" + security_groups: ["web", "ssh", "egress"] # Web server with SSH and egress + - name: "db" + datacenter: "dal13" + security_groups: ["db", "ssh", "egress"] # Database with SSH and egress + - name: "app" + datacenter: "dal13" + security_groups: ["web", "db", "ssh", "egress"] # App server with web + db access +``` + +### 3. Inline Security Group Rules + +Define security group rules directly in the instance: + +```yaml +instances: + - name: "web" + datacenter: "dal13" + security_group_rules: + - name: "http" + description: "Allow HTTP traffic" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 80 + to_port: 80 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "https" + description: "Allow HTTPS traffic" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 443 + to_port: 443 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "db" + datacenter: "dal13" + security_group_rules: + - name: "mysql" + description: "Allow MySQL traffic from internal network" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 3306 + to_port: 3306 + protocol: "tcp" + cidr: "10.0.0.0/8" +``` + +**Key Points:** +- Use `security_group: "name"` to reference a single global security group +- Use `security_groups: ["name1", "name2"]` to reference multiple global security groups +- Use `security_group_rules: [...]` for inline rules +- If none are specified, defaults to global security group rules +- Multiple security groups are merged - rules from all groups are combined +- Global security groups are more maintainable for reused configurations +- Inline rules are better for unique, one-off security requirements +- Use `create_security_group: false` to disable security group creation for an instance + +**Priority Order:** +1. `security_groups: [...]` (multiple global groups - highest priority) +2. `security_group: "name"` (single global group) +3. `security_group_rules: [...]` (inline rules) +4. Default security group rules (fallback) + +## VLAN Configuration + +By default, IBM Cloud will automatically assign public and private VLANs if not specified. You can optionally provide specific VLAN IDs: + +```yaml +private_vlan_id: 12345 # Optional +public_vlan_id: 54321 # Optional +``` + +If you need to use existing VLANs or have specific networking requirements, provide the VLAN IDs as extra variables. + +## Output Directory Configuration + +By default, Terraform files and outputs are stored in `/tmp`. You can specify a custom output directory: + +```yaml +output_dir: "/path/to/custom/directory" +``` + +This affects: +- Terraform working directory location +- VM information output files +- Terraform state files + +## Usage + +### Deploy VMs + +```bash +# Deploy VMs with extra vars file +ansible-playbook playbook.yml -e ACTION=provision \ + -e @vm_vars.yml + +# Where vm_vars.yml contains: +# ibm_cloud_api_key: "YOUR_API_KEY" +# output_dir: "/tmp" +# security_groups: +# web: +# - direction: "ingress" +# ether_type: "IPv4" +# port_range_min: 80 +# port_range_max: 80 +# protocol: "tcp" +# remote_ip: "0.0.0.0/0" +# db: +# - direction: "ingress" +# ether_type: "IPv4" +# port_range_min: 3306 +# port_range_max: 3306 +# protocol: "tcp" +# remote_ip: "10.0.0.0/8" +# ssh: +# - direction: "ingress" +# ether_type: "IPv4" +# port_range_min: 22 +# port_range_max: 22 +# protocol: "tcp" +# remote_ip: "0.0.0.0/0" +# egress: +# - direction: "egress" +# ether_type: "IPv4" +# protocol: "tcp" +# remote_ip: "0.0.0.0/0" +# instances: +# - name: "web" +# datacenter: "dal13" +# count: 3 +# security_groups: ["web", "ssh", "egress"] +# - name: "db" +# datacenter: "wdc07" +# count: 2 +# cores: 4 +# memory: 8192 +# security_groups: ["db", "ssh", "egress"] +``` + +### Destroy VM + +```bash +ansible-playbook playbook.yml -e ACTION=destroy \ + -e terraform_working_dir=/tmp/terraform-ibm-vm-guid +# Or use custom output directory +ansible-playbook playbook.yml -e ACTION=destroy \ + -e output_dir=/custom/path \ + -e terraform_working_dir=/custom/path/terraform-ibm-vm-guid +``` + +## Default Configuration + +### VM Specifications +- **Operating System**: RHEL 9 (64-bit) +- **CPU Cores**: 2 +- **Memory**: 4096 MB +- **Disk**: 25 GB local disk +- **Network**: 1 Gbps, public and private +- **Billing**: Hourly + +### Security Groups +Default firewall rules (used when not specified per instance) allow: +- SSH inbound (port 22) +- All outbound traffic (TCP, UDP, ICMP) + +Each instance can override these defaults with its own security group rules. + +## Customization + +### VM Configuration + +```yaml +# VM specifications +cores: 4 +memory: 8192 +disks: [25, 100] # Multiple disks +image: "UBUNTU_22_64" + +# Network settings +network_speed: 10000 # 10 Gbps +private_network_only: false +``` + +### Security Groups + +Security groups are now configured per-instance. See the "Security Group Configuration" section above for details. + +### SSH Configuration + +```yaml +ssh_user: "ubuntu" +ssh_private_key_path: "/path/to/private/key" +``` + +### VM Tagging + +VM tags are configured per-instance to provide better resource organization and management: + +```yaml +instances: + - name: "web-server" + datacenter: "dal13" + tags: + - "agnosticd" + - "web-server" + - "production" + - "team-alpha" + - name: "db-server" + datacenter: "dal13" + tags: + - "agnosticd" + - "database" + - "mysql" + - "production" +``` + +**Key Points:** +- Tags are specified per-instance using the `tags` field +- Each instance can have its own unique set of tags +- Tags are useful for resource organization, billing, and management +- Tags are passed directly to IBM Cloud Classic infrastructure + +### Root Filesystem Sizing and Additional Disks + +The root filesystem size for VMs can be configured using the `rootfs_size` parameter, which specifies the size in GB. Additional disks beyond the root filesystem can be specified using the `additional_disks` array. + +```yaml +instances: + - name: "web-server" + datacenter: "dal13" + rootfs_size: 50 # 50GB root filesystem + tags: + - "agnosticd" + - "web-server" + - name: "db-server" + datacenter: "dal13" + rootfs_size: 100 # 100GB root filesystem for database + additional_disks: [200, 500] # Additional 200GB and 500GB disks + tags: + - "agnosticd" + - "database" + - name: "storage-server" + datacenter: "dal13" + rootfs_size: 50 # 50GB root filesystem + additional_disks: [500, 1000] # 500GB + 1TB additional disks + tags: + - "agnosticd" + - "storage" +``` + +**Key Points:** +- Root filesystem size is specified per-instance using the `rootfs_size` field +- Default value is 25GB if not specified +- Additional disks are specified using the `additional_disks` array +- The `additional_disks` array contains disk sizes in GB beyond the root filesystem +- Internally converted to a single `disks` array for IBM Cloud Classic (rootfs_size as first disk + additional_disks) +- Can be set to any size (in GB) supported by IBM Cloud +- Useful for applications with different storage requirements + +### Terraform Version + +By default, the role downloads and installs Terraform 1.5.7 for Linux AMD64. You can specify a different version by setting the `terraform_download_url` variable: + +```yaml +terraform_download_url: "https://releases.hashicorp.com/terraform/1.6.0/terraform_1.6.0_linux_amd64.zip" +``` + +## Inventory Groups + +Deployed VMs are automatically added to these inventory groups: +- `all` (default group) +- `tag_` (e.g., `tag_rhel-9-64`) +- `datacenter_` (e.g., `datacenter_dal13`) + +## File Structure + +``` +ansible/roles-infra/infra-ibm-cloud-classic-vm/ +├── tasks/ +│ ├── main.yml # Main entry point +│ ├── pre_checks.yml # Validation and setup +│ ├── terraform_deploy.yml # VM deployment +│ ├── terraform_destroy.yml # VM destruction +│ └── create_inventory.yml # Inventory creation +├── templates/ +│ ├── terraform.tfvars.j2 # Terraform variables +│ └── inventory.j2 # Ansible inventory +├── defaults/ +│ └── main.yml # Default variables +└── README.md # This documentation +``` + +## Variables Reference + +### Required Variables + +- `ibm_cloud_api_key`: IBM Cloud API key +- `output_dir`: Directory for terraform files and outputs +- `domain`: VM domain name (no default - must be provided) +- `instances`: List of VM configurations (each VM must have `name` and `datacenter`) + +### Instance Configuration + +Each instance in the `instances` list supports: +- `name`: VM name prefix (required) +- `datacenter`: IBM Cloud datacenter (required) +- `count`: Number of VMs to create (optional, default: 1) +- `security_group`: Reference to single global security group by name (optional) +- `security_groups`: List of global security group names to combine (optional) +- `security_group_rules`: Security group rules for this instance (optional) +- `create_security_group`: Whether to create security group (optional, default: true) +- `tags`: List of tags to apply to this instance (optional) +- `rootfs_size`: Root filesystem size in GB (optional, default: 25) +- `additional_disks`: Additional disk sizes in GB beyond root filesystem (optional, default: []) +- All other VM configuration options (cores, memory, etc.) + +### Optional Variables +- `ACTION`: provision or destroy (default: provision) +- `region`: IBM Cloud region (default: us-south) +- `private_vlan_id`: Private VLAN ID (default: auto-assigned by IBM Cloud) +- `public_vlan_id`: Public VLAN ID (default: auto-assigned by IBM Cloud) +- `image`: Operating system (default: REDHAT_9_64) +- `cores`: CPU cores (default: 2) +- `memory`: Memory in MB (default: 4096) +- `rootfs_size`: Root filesystem size in GB (default: 25) +- `additional_disks`: Additional disk sizes in GB beyond root filesystem (default: []) +- `network_speed`: Network speed in Mbps (default: 1000) +- `hourly_billing`: Use hourly billing (default: true) +- `ssh_user`: SSH username (default: root) +- `ssh_private_key_path`: SSH private key path (default: ~/.ssh/id_rsa) +- `inventory_group`: Inventory group name (default: all) +- `wait_for_ssh`: Wait for SSH availability (default: true) +- `test_ssh_connection`: Test SSH connection (default: true) +- `terraform_download_url`: URL for Terraform download (default: https://releases.hashicorp.com/terraform/1.5.7/terraform_1.5.7_linux_amd64.zip) +- `security_groups`: Dictionary of named security group definitions that can be referenced by instances +- `create_security_group`: Global default for creating security groups (default: true) +- `security_group_rules`: Global default security group rules (used when not specified per instance) + +### DNS Configuration Variables + +- `create_dns_records`: Whether to create Route53 DNS records (default: false) +- `route53_aws_access_key_id`: AWS access key ID for Route53 (required if create_dns_records is true) +- `route53_aws_secret_access_key`: AWS secret access key for Route53 (required if create_dns_records is true) +- `aws_region`: AWS region for Route53 (default: us-east-1) +- `route53_aws_zone_id`: Route53 hosted zone ID (auto-discovered from cluster_dns_zone if not provided) +- `cluster_dns_zone`: DNS domain for Route53 records (required if create_dns_records is true) +- `dns_ttl`: TTL for DNS records in seconds (default: 300) + +**Auto-Discovery Feature**: The role automatically discovers the Route53 hosted zone ID for your `cluster_dns_zone` domain. The discovery process supports pagination and will retrieve all hosted zones in your account, making it suitable for accounts with many zones. If a hosted zone exists for your domain, `route53_aws_zone_id` will be set automatically. You only need to manually provide `route53_aws_zone_id` if auto-discovery fails or you want to use a different zone. + +### Terraform Logging Configuration Variables + +- `terraform_enable_logging`: Enable comprehensive Terraform logging (default: true) +- `terraform_log_level`: Terraform log level (default: "INFO", options: TRACE, DEBUG, INFO, WARN, ERROR) + +**Terraform Logging Features**: +- **Comprehensive Logging**: Captures all Terraform operations (init, plan, apply, destroy) with timestamps +- **Structured Logs**: Individual log files for each operation plus master log combining all operations +- **Configurable Verbosity**: Adjust log level from ERROR (minimal) to TRACE (maximum detail) +- **Centralized Storage**: All logs stored in `{{ output_dir }}/terraform-logs/` directory +- **Easy Monitoring**: Use `tail -f {{ output_dir }}/terraform-logs/terraform-master-*.log` to monitor progress + +**Log Files Created**: +- `terraform-master-TIMESTAMP.log`: Combined log with all operations +- `terraform-init-TIMESTAMP.log`: Terraform initialization +- `terraform-plan-TIMESTAMP.log`: Terraform planning +- `terraform-apply-TIMESTAMP.log`: Terraform application +- `terraform-state-TIMESTAMP.log`: State checking +- `terraform-verify-TIMESTAMP.log`: Deployment verification +- `terraform-destroy-*-TIMESTAMP.log`: Destroy operations (when applicable) + +## Outputs + +After successful deployment, the role provides: +- VM metadata in `vm_info` variable +- Ansible inventory groups +- SSH connection details +- VM information saved to `vm_info.json` +- DNS records (if DNS is enabled) + +## Error Handling + +The role includes comprehensive error handling: +- Validates required variables +- Installs Terraform if missing +- Checks Terraform state before operations +- Provides clear error messages + +## Examples + +### Basic VM Deployment (Auto-assigned VLANs) +```yaml +- name: Deploy IBM Cloud Classic VMs + include_role: + name: infra-ibm-cloud-classic-vm + vars: + ACTION: provision + ibm_cloud_api_key: "{{ vault_ibm_cloud_api_key }}" + output_dir: "/tmp" + instances: + - name: "web-server" + datacenter: "dal13" + tags: + - "agnosticd" + - "web-server" +``` + +### VM Deployment with DNS Records (Auto-Discovery) +```yaml +- name: Deploy IBM Cloud Classic VMs with Route53 DNS + include_role: + name: infra-ibm-cloud-classic-vm + vars: + ACTION: provision + ibm_cloud_api_key: "{{ vault_ibm_cloud_api_key }}" + output_dir: "/tmp" + create_dns_records: true + route53_aws_access_key_id: "{{ vault_aws_access_key_id }}" + route53_aws_secret_access_key: "{{ vault_aws_secret_access_key }}" + cluster_dns_zone: "example.com" # Zone ID auto-discovered + dns_ttl: 300 + instances: + - name: "web-server" + datacenter: "dal13" + tags: + - "agnosticd" + - "web-server" +``` + +### VM Deployment with Enhanced Terraform Logging +```yaml +- name: Deploy IBM Cloud Classic VMs with detailed logging + include_role: + name: infra-ibm-cloud-classic-vm + vars: + ACTION: provision + ibm_cloud_api_key: "{{ vault_ibm_cloud_api_key }}" + output_dir: "/tmp" + # Enhanced logging configuration + terraform_enable_logging: true + terraform_log_level: "DEBUG" # More verbose logging + instances: + - name: "debug-server" + datacenter: "dal13" + tags: + - "agnosticd" + - "debug-server" +``` + +### VM Deployment with Minimal Logging +```yaml +- name: Deploy IBM Cloud Classic VMs with minimal logging + include_role: + name: infra-ibm-cloud-classic-vm + vars: + ACTION: provision + ibm_cloud_api_key: "{{ vault_ibm_cloud_api_key }}" + output_dir: "/tmp" + # Minimal logging configuration + terraform_enable_logging: true + terraform_log_level: "ERROR" # Only log errors + instances: + - name: "prod-server" + datacenter: "dal13" + tags: + - "agnosticd" + - "production" +``` + +### Monitoring Terraform Progress +```bash +# Monitor live progress during deployment +tail -f /tmp/terraform-logs/terraform-master-*.log + +# Check specific operation logs +tail -f /tmp/terraform-logs/terraform-apply-*.log + +# View all terraform logs +ls -la /tmp/terraform-logs/ + +# Search for errors in logs +grep -i "error" /tmp/terraform-logs/terraform-master-*.log +``` + +### Multi-VM Deployment with Multiple Security Groups +```yaml +- name: Deploy multiple VMs with multiple security groups + include_role: + name: infra-ibm-cloud-classic-vm + vars: + ACTION: provision + ibm_cloud_api_key: "{{ vault_ibm_cloud_api_key }}" + output_dir: "/tmp" + security_groups: + - name: "web" + rules: + - name: "http" + description: "Allow HTTP traffic" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 80 + to_port: 80 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "https" + description: "Allow HTTPS traffic" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 443 + to_port: 443 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "db" + rules: + - name: "mysql" + description: "Allow MySQL traffic from internal network" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 3306 + to_port: 3306 + protocol: "tcp" + cidr: "10.0.0.0/8" + - name: "ssh" + rules: + - name: "ssh" + description: "Allow SSH access" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 22 + to_port: 22 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "egress" + rules: + - name: "tcp_out" + description: "Allow outbound TCP traffic" + rule_type: "egress" + ether_type: "IPv4" + protocol: "tcp" + cidr: "0.0.0.0/0" + instances: + - name: "web" + datacenter: "dal13" + count: 3 # Creates web-01, web-02, web-03 + security_groups: ["web", "ssh", "egress"] + tags: + - "agnosticd" + - "web-server" + - "production" + - name: "db" + datacenter: "wdc07" + count: 2 # Creates db-01, db-02 + cores: 4 + memory: 8192 + rootfs_size: 50 # 50GB root filesystem for database storage + additional_disks: [200] # Additional 200GB disk for database storage + security_groups: ["db", "ssh", "egress"] + tags: + - "agnosticd" + - "database" + - "mysql" + - name: "app" + datacenter: "dal13" + count: 1 + rootfs_size: 100 # 100GB root filesystem for application with more storage needs + additional_disks: [500] # Additional 500GB disk for application data + security_groups: ["web", "db", "ssh", "egress"] # Combines web + db access + tags: + - "agnosticd" + - "application" + - "backend" +``` + +### VM Deployment with Specific VLANs +```yaml +- name: Deploy IBM Cloud Classic VMs with specific VLANs + include_role: + name: infra-ibm-cloud-classic-vm + vars: + ACTION: provision + ibm_cloud_api_key: "{{ vault_ibm_cloud_api_key }}" + output_dir: "/tmp" + private_vlan_id: 12345 + public_vlan_id: 54321 + instances: + - name: "web-server" + datacenter: "dal13" + tags: + - "agnosticd" + - "web-server" + - "vlan-specific" +``` + +### High-Performance VMs with Count +```yaml +- name: Deploy High-Performance VMs + include_role: + name: infra-ibm-cloud-classic-vm + vars: + ACTION: provision + ibm_cloud_api_key: "{{ vault_ibm_cloud_api_key }}" + output_dir: "/tmp" + private_vlan_id: 12345 + public_vlan_id: 54321 + instances: + - name: "compute-node" + datacenter: "dal13" + count: 4 # Creates compute-node-01, compute-node-02, compute-node-03, compute-node-04 + cores: 8 + memory: 16384 + network_speed: 10000 + disks: [50, 200] + tags: + - "agnosticd" + - "high-performance" + - "compute" +``` + +### Custom Terraform Version +```yaml +- name: Deploy VMs with custom Terraform version + include_role: + name: infra-ibm-cloud-classic-vm + vars: + ACTION: provision + ibm_cloud_api_key: "{{ vault_ibm_cloud_api_key }}" + output_dir: "/tmp" + terraform_download_url: "https://releases.hashicorp.com/terraform/1.6.0/terraform_1.6.0_linux_amd64.zip" + instances: + - name: "test-vm" + datacenter: "dal13" + tags: + - "agnosticd" + - "test" +``` + +### Mixed Security Group Usage +```yaml +- name: Deploy VMs with Mixed Security Group Usage + include_role: + name: infra-ibm-cloud-classic-vm + vars: + ACTION: provision + ibm_cloud_api_key: "{{ vault_ibm_cloud_api_key }}" + output_dir: "/tmp" + private_vlan_id: 12345 + public_vlan_id: 54321 + security_groups: + - name: "web" + rules: + - name: "http" + description: "Allow HTTP traffic" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 80 + to_port: 80 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "https" + description: "Allow HTTPS traffic" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 443 + to_port: 443 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "db" + rules: + - name: "mysql" + description: "Allow MySQL traffic from internal network" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 3306 + to_port: 3306 + protocol: "tcp" + cidr: "10.0.0.0/8" + - name: "ssh" + rules: + - name: "ssh" + description: "Allow SSH access" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 22 + to_port: 22 + protocol: "tcp" + cidr: "0.0.0.0/0" + - name: "egress" + rules: + - name: "tcp_out" + description: "Allow outbound TCP traffic" + rule_type: "egress" + ether_type: "IPv4" + protocol: "tcp" + cidr: "0.0.0.0/0" + instances: + - name: "web-server" + datacenter: "dal13" + security_groups: ["web", "ssh", "egress"] # Multiple global groups + tags: + - "agnosticd" + - "web-server" + - "mixed-sg" + - name: "db-server" + datacenter: "dal13" + security_group: "db" # Single global group (plus defaults) + tags: + - "agnosticd" + - "database" + - "single-sg" + - name: "app-server" + datacenter: "dal13" + security_groups: ["web", "db", "ssh", "egress"] # Combined access + tags: + - "agnosticd" + - "application" + - "combined-sg" + - name: "custom-app" + datacenter: "dal13" + security_group_rules: # Inline rules for unique requirements + - name: "custom_api" + description: "Allow custom API access from specific subnet" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 9000 + to_port: 9000 + protocol: "tcp" + cidr: "192.168.1.0/24" + - name: "ssh" + description: "Allow SSH access" + rule_type: "ingress" + ether_type: "IPv4" + from_port: 22 + to_port: 22 + protocol: "tcp" + cidr: "0.0.0.0/0" + tags: + - "agnosticd" + - "custom-app" + - "inline-sg" +``` + +## Troubleshooting + +### Common Issues + +1. **API Key Issues** + - Ensure API key has Classic Infrastructure permissions + - Check API key is not expired + +2. **VLAN Issues** + - Verify VLAN IDs exist in the specified datacenter + - Ensure VLANs are in the correct region + +3. **SSH Connection Issues** + - Verify SSH key is properly configured + - Check security group rules allow SSH access + - Ensure VM is fully provisioned + +4. **Terraform Issues** + - Check Terraform templates are present in `~/create-vm-ibm-cloud` + - Verify Terraform state is not corrupted + - Review Terraform logs for detailed errors + +### Debug Mode + +Enable debug output: +```yaml +debug: true +``` + +## License + +This role is part of the AgnosticD project and follows the same licensing terms. \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/defaults/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/defaults/main.yml new file mode 100644 index 00000000000..298c8164038 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/defaults/main.yml @@ -0,0 +1,130 @@ +--- +# IBM Cloud Classic VM Deployment Role - Default Variables + +# Action mode +# ACTION: "" # REQUIRED - must be provided as extra var (provision or destroy) + +# Terraform configuration +terraform_working_dir: "{{ output_dir }}/terraform-ibm-vm-{{ guid | default('default') }}" +terraform_download_url: "https://releases.hashicorp.com/terraform/1.5.7/terraform_1.5.7_linux_amd64.zip" +cleanup_terraform_files: false +save_vm_info: true + +# IBM Cloud connection settings +# ibm_cloud_api_key: "" # REQUIRED - must be provided as extra var +# region: "" # REQUIRED - must be provided as extra var (e.g., "us-south", "us-east", "eu-gb") + +# VM configuration defaults (applied to all VMs if not specified per VM) +domain: "example.com" +image: "REDHAT_9_64" +network_speed: 1000 +hourly_billing: true +private_network_only: false +cores: 2 +memory: 4096 +rootfs_size: 25 # Root filesystem size in GB +additional_disks: [] # Additional disks beyond root filesystem (empty by default) +local_disk: true +ssh_key_ids: [] +user_metadata: "" +notes: "VM created by AgnosticD" +dedicated_acct_host_only: false + +# VM instances configuration - REQUIRED +# instances: [] # List of VM configurations +# Example: +# +# Security Group Rules: +# - private_security_group_rules: creates inline rules ONLY for private interface +# - public_security_group_rules: creates inline rules ONLY for public interface +# - If no inline rules are specified, VM has no security groups +# +# Ansible Group Assignment: +# - VMs can be automatically added to custom ansible groups using tags +# - Use tag format: "ansible_group:groupname" to add VM to the "groupname" ansible group +# - Multiple ansible_group tags can be used to add VM to multiple groups +# - Group names are sanitized: invalid characters (anything except letters, numbers, underscores) are replaced with underscores +# - Examples: "ansible_group:bastions", "ansible_group:webservers", "ansible_group:databases" +# - Note: "ansible_group:my-group" becomes ansible group "my_group" +# +# instances: +# - name: "web" +# datacenter: "dal13" +# count: 3 # Creates web-01, web-02, web-03 +# cores: 2 +# memory: 4096 +# rootfs_size: 25 +# additional_disks: [100, 200] # Additional 100GB and 200GB disks +# image: "REDHAT_9_64" +# ssh_key_ids: [] +# user_metadata: "web-server" +# notes: "Web server instance" +# tags: ["web", "ansible_group:webservers"] +# private_security_group_rules: +# - name: "private_ssh" +# description: "Allow SSH from private network" +# rule_type: "ingress" +# ether_type: "IPv4" +# from_port: 22 +# to_port: 22 +# protocol: "tcp" +# cidr: "10.0.0.0/8" +# public_security_group_rules: +# - name: "public_http" +# description: "Allow HTTP from anywhere" +# rule_type: "ingress" +# ether_type: "IPv4" +# from_port: 80 +# to_port: 80 +# protocol: "tcp" +# cidr: "0.0.0.0/0" +# - name: "db" +# datacenter: "wdc07" +# count: 1 +# cores: 4 +# memory: 8192 +# rootfs_size: 50 +# additional_disks: [500] # Additional 500GB disk for database +# image: "REDHAT_9_64" +# ssh_key_ids: [] +# user_metadata: "database-server" +# notes: "Database server instance" +# tags: ["database", "ansible_group:databases"] +# private_security_group_rules: +# - name: "private_mysql" +# description: "Allow MySQL from private network" +# rule_type: "ingress" +# ether_type: "IPv4" +# from_port: 3306 +# to_port: 3306 +# protocol: "tcp" +# cidr: "10.0.0.0/8" + +# DNS Configuration +create_dns_records: false +route53_aws_access_key_id: "" +route53_aws_secret_access_key: "" +aws_region: "us-east-1" +# route53_aws_zone_id: "" # Auto-discovered from cluster_dns_zone, or provide manually +# cluster_dns_zone: "" # REQUIRED - must be provided as extra var when DNS is enabled +dns_ttl: 300 + +# Terraform Logging Configuration +terraform_log_level: "INFO" # Options: TRACE, DEBUG, INFO, WARN, ERROR +terraform_enable_logging: true + +# Network configuration +private_vlan_id: "" # Optional - if not provided, IBM Cloud will auto-assign +public_vlan_id: "" # Optional - if not provided, IBM Cloud will auto-assign + +# No global security groups - use only inline rules per instance + +# VM tagging - configured per instance using instance.tags +# tags: [] # No default tags - specify per instance + +# Post-installation script +post_install_script_uri: "" + +# SSH configuration +ssh_user: "root" +ssh_private_key_path: "{{ env_authorized_key_path }}" \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/main.tf b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/main.tf new file mode 100644 index 00000000000..d8c78ecf448 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/main.tf @@ -0,0 +1,176 @@ +provider "ibm" { + ibmcloud_api_key = var.ibm_cloud_api_key + region = var.region +} + +# Private interface security groups +resource "ibm_security_group" "vm_private" { + count = length([for vm in local.vm_list : vm if length(local.vm_private_rules[vm.name]) > 0]) + + name = "${[for vm in local.vm_list : vm if length(local.vm_private_rules[vm.name]) > 0][count.index].name}-private-sg" + description = "Private security group for ${[for vm in local.vm_list : vm if length(local.vm_private_rules[vm.name]) > 0][count.index].name}" +} + +# Private interface security group rules +resource "ibm_security_group_rule" "vm_private_rules" { + count = sum([for vm in local.vm_list : length(local.vm_private_rules[vm.name])]) + + security_group_id = ibm_security_group.vm_private[local.private_rule_mapping[count.index].vm_index].id + direction = local.private_rule_mapping[count.index].rule.rule_type + ether_type = local.private_rule_mapping[count.index].rule.ether_type + port_range_min = local.private_rule_mapping[count.index].rule.from_port != null ? local.private_rule_mapping[count.index].rule.from_port : null + port_range_max = local.private_rule_mapping[count.index].rule.to_port != null ? local.private_rule_mapping[count.index].rule.to_port : null + protocol = local.private_rule_mapping[count.index].rule.protocol + remote_ip = local.private_rule_mapping[count.index].rule.cidr +} + +# Public interface security groups +resource "ibm_security_group" "vm_public" { + count = length([for vm in local.vm_list : vm if length(local.vm_public_rules[vm.name]) > 0]) + + name = "${[for vm in local.vm_list : vm if length(local.vm_public_rules[vm.name]) > 0][count.index].name}-public-sg" + description = "Public security group for ${[for vm in local.vm_list : vm if length(local.vm_public_rules[vm.name]) > 0][count.index].name}" +} + +# Public interface security group rules +resource "ibm_security_group_rule" "vm_public_rules" { + count = sum([for vm in local.vm_list : length(local.vm_public_rules[vm.name])]) + + security_group_id = ibm_security_group.vm_public[local.public_rule_mapping[count.index].vm_index].id + direction = local.public_rule_mapping[count.index].rule.rule_type + ether_type = local.public_rule_mapping[count.index].rule.ether_type + port_range_min = local.public_rule_mapping[count.index].rule.from_port != null ? local.public_rule_mapping[count.index].rule.from_port : null + port_range_max = local.public_rule_mapping[count.index].rule.to_port != null ? local.public_rule_mapping[count.index].rule.to_port : null + protocol = local.public_rule_mapping[count.index].rule.protocol + remote_ip = local.public_rule_mapping[count.index].rule.cidr +} + +# Local values for interface-specific security group logic +locals { + # Private interface rules: only private-specific rules + vm_private_rules = { + for vm in local.vm_list : vm.name => vm.private_security_group_rules != null ? vm.private_security_group_rules : [] + } + + # Public interface rules: only public-specific rules + vm_public_rules = { + for vm in local.vm_list : vm.name => vm.public_security_group_rules != null ? vm.public_security_group_rules : [] + } + + # Create mapping for private rule assignment + private_rule_mapping = flatten([ + for vm_index, vm in local.vm_list : [ + for rule_index, rule in local.vm_private_rules[vm.name] : { + vm_index = length([for v in slice(local.vm_list, 0, vm_index) : v if length(local.vm_private_rules[v.name]) > 0]) + rule = rule + } + ] if length(local.vm_private_rules[vm.name]) > 0 + ]) + + # Create mapping for public rule assignment + public_rule_mapping = flatten([ + for vm_index, vm in local.vm_list : [ + for rule_index, rule in local.vm_public_rules[vm.name] : { + vm_index = length([for v in slice(local.vm_list, 0, vm_index) : v if length(local.vm_public_rules[v.name]) > 0]) + rule = rule + } + ] if length(local.vm_public_rules[vm.name]) > 0 + ]) +} + +# Virtual Server Instances +resource "ibm_compute_vm_instance" "vm" { + count = var.total_vm_count + + hostname = local.vm_hostnames[count.index] + domain = var.domain + datacenter = local.vm_datacenters[count.index] + cores = local.vm_cores[count.index] + memory = local.vm_memory[count.index] + disks = local.vm_disks[count.index] + os_reference_code = local.vm_images[count.index] + local_disk = var.local_disk + network_speed = var.network_speed + hourly_billing = var.hourly_billing + private_network_only = var.private_network_only + ssh_key_ids = local.vm_ssh_key_ids[count.index] + user_metadata = local.vm_user_metadata[count.index] + notes = local.vm_notes[count.index] + dedicated_acct_host_only = var.dedicated_acct_host_only + tags = local.vm_tags[count.index] + + private_vlan_id = var.private_vlan_id != "" ? var.private_vlan_id : null + public_vlan_id = var.public_vlan_id != "" ? var.public_vlan_id : null + + post_install_script_uri = var.post_install_script_uri != "" ? var.post_install_script_uri : null + + # Security groups - apply interface-specific security groups + private_security_group_ids = length(local.vm_private_rules[local.vm_list[count.index].name]) > 0 ? [ibm_security_group.vm_private[index([for vm in local.vm_list : vm.name if length(local.vm_private_rules[vm.name]) > 0], local.vm_list[count.index].name)].id] : [] + public_security_group_ids = var.private_network_only ? [] : length(local.vm_public_rules[local.vm_list[count.index].name]) > 0 ? [ibm_security_group.vm_public[index([for vm in local.vm_list : vm.name if length(local.vm_public_rules[vm.name]) > 0], local.vm_list[count.index].name)].id] : [] +} + +# Local values for VM configuration +locals { + # Flatten instances to individual VMs + vm_list = flatten([ + for instance in var.instances : [ + for i in range(instance.count) : { + name = instance.count > 1 ? "${instance.name}-${format("%02d", i + 1)}" : instance.name + instance_name = instance.name + datacenter = instance.datacenter + cores = lookup(instance, "cores", var.cores) + memory = lookup(instance, "memory", var.memory) + rootfs_size = lookup(instance, "rootfs_size", var.rootfs_size) + additional_disks = lookup(instance, "additional_disks", var.additional_disks) + image = lookup(instance, "image", var.image) + ssh_key_ids = lookup(instance, "ssh_key_ids", var.ssh_key_ids) + user_metadata = lookup(instance, "user_metadata", var.user_metadata) + notes = lookup(instance, "notes", var.notes) + tags = lookup(instance, "tags", []) + private_security_group_rules = lookup(instance, "private_security_group_rules", []) + public_security_group_rules = lookup(instance, "public_security_group_rules", []) + } + ] + ]) + + vm_hostnames = [for vm in local.vm_list : vm.name] + vm_datacenters = [for vm in local.vm_list : vm.datacenter] + vm_cores = [for vm in local.vm_list : vm.cores] + vm_memory = [for vm in local.vm_list : vm.memory] + vm_disks = [for vm in local.vm_list : concat([vm.rootfs_size], vm.additional_disks != null ? vm.additional_disks : [])] + vm_images = [for vm in local.vm_list : vm.image] + vm_ssh_key_ids = [for vm in local.vm_list : vm.ssh_key_ids] + vm_user_metadata = [for vm in local.vm_list : vm.user_metadata] + vm_notes = [for vm in local.vm_list : vm.notes] + vm_tags = [for vm in local.vm_list : vm.tags] +} + +# AWS Provider for Route53 +provider "aws" { + access_key = var.route53_aws_access_key_id + secret_key = var.route53_aws_secret_access_key + region = var.aws_region + + # Only configure if we're creating DNS records + skip_credentials_validation = !var.create_dns_records + skip_metadata_api_check = !var.create_dns_records + skip_region_validation = !var.create_dns_records +} + +# Local values for DNS +locals { + dns_domain = var.cluster_dns_zone != "" ? var.cluster_dns_zone : var.domain +} + +# Route53 A records for public IPs +resource "aws_route53_record" "vm_public" { + count = var.create_dns_records && !var.private_network_only ? length(ibm_compute_vm_instance.vm) : 0 + + zone_id = var.route53_aws_zone_id + name = "${ibm_compute_vm_instance.vm[count.index].hostname}.${local.dns_domain}" + type = "A" + ttl = var.dns_ttl + records = [ibm_compute_vm_instance.vm[count.index].ipv4_address] + + depends_on = [ibm_compute_vm_instance.vm] +} \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/outputs.tf b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/outputs.tf new file mode 100644 index 00000000000..8f6f5621e93 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/outputs.tf @@ -0,0 +1,153 @@ +output "vm_count" { + description = "Total number of VMs created" + value = var.total_vm_count +} + +output "vm_ids" { + description = "List of VM IDs" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.id] : [] +} + +output "vm_hostnames" { + description = "List of VM hostnames" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.hostname] : [] +} + +output "vm_domains" { + description = "List of VM domains" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.domain] : [] +} + +output "vm_fqdns" { + description = "List of VM FQDNs" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : "${vm.hostname}.${vm.domain}"] : [] +} + +output "vm_public_ips" { + description = "List of VM public IPs" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.ipv4_address] : [] +} + +output "vm_private_ips" { + description = "List of VM private IPs" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.ipv4_address_private] : [] +} + +output "vm_datacenters" { + description = "List of VM datacenters" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.datacenter] : [] +} + +output "vm_cores" { + description = "List of VM CPU cores" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.cores] : [] +} + +output "vm_memory" { + description = "List of VM memory in MB" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.memory] : [] +} + +output "vm_operating_systems" { + description = "List of VM operating systems" + value = var.total_vm_count > 0 ? local.vm_images : [] +} + +output "vm_network_speeds" { + description = "List of VM network speeds" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.network_speed] : [] +} + +output "vm_statuses" { + description = "List of VM statuses" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : "active"] : [] +} + +output "ssh_connection_commands" { + description = "List of SSH connection commands" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : "ssh root@${vm.ipv4_address}"] : [] +} + +output "private_security_group_ids" { + description = "List of private security group IDs for each VM" + value = var.total_vm_count > 0 ? [for vm in local.vm_list : length(local.vm_private_rules[vm.name]) > 0 ? [ibm_security_group.vm_private[index([for v in local.vm_list : v.name if length(local.vm_private_rules[v.name]) > 0], vm.name)].id] : []] : [] +} + +output "public_security_group_ids" { + description = "List of public security group IDs for each VM" + value = var.total_vm_count > 0 ? [for vm in local.vm_list : length(local.vm_public_rules[vm.name]) > 0 ? [ibm_security_group.vm_public[index([for v in local.vm_list : v.name if length(local.vm_public_rules[v.name]) > 0], vm.name)].id] : []] : [] +} + +output "vm_tags" { + description = "List of VM tags" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.tags] : [] +} + +output "vm_hourly_billings" { + description = "List of VM hourly billing settings" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.hourly_billing] : [] +} + +output "vm_creation_dates" { + description = "List of VM creation dates" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : timestamp()] : [] +} + +output "private_vlan_ids" { + description = "List of private VLAN IDs" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.private_vlan_id] : [] +} + +output "public_vlan_ids" { + description = "List of public VLAN IDs" + value = var.total_vm_count > 0 ? [for vm in ibm_compute_vm_instance.vm : vm.public_vlan_id] : [] +} + +output "vm_private_security_group_ids" { + description = "Private security group IDs for each VM" + value = { for sg in ibm_security_group.vm_private : sg.name => sg.id } +} + +output "vm_public_security_group_ids" { + description = "Public security group IDs for each VM" + value = { for sg in ibm_security_group.vm_public : sg.name => sg.id } +} + +output "deployment_summary" { + description = "Deployment summary" + value = { + total_vms = var.total_vm_count + datacenters = var.total_vm_count > 0 ? distinct([for vm in ibm_compute_vm_instance.vm : vm.datacenter]) : [] + total_cores = var.total_vm_count > 0 ? sum([for vm in ibm_compute_vm_instance.vm : vm.cores]) : 0 + total_memory_mb = var.total_vm_count > 0 ? sum([for vm in ibm_compute_vm_instance.vm : vm.memory]) : 0 + images_used = var.total_vm_count > 0 ? distinct(local.vm_images) : [] + private_security_groups = length(ibm_security_group.vm_private) + public_security_groups = length(ibm_security_group.vm_public) + } +} + +# DNS outputs +output "dns_records_created" { + description = "Whether DNS records were created" + value = var.create_dns_records +} + +output "public_dns_names" { + description = "Public DNS names created" + value = var.create_dns_records && var.total_vm_count > 0 ? [for record in aws_route53_record.vm_public : record.fqdn] : [] +} + +output "route53_zone_id" { + description = "Route53 zone ID used" + value = var.create_dns_records ? var.route53_aws_zone_id : "" +} + +output "cluster_dns_zone" { + description = "DNS domain used for records" + value = var.create_dns_records ? local.dns_domain : "" +} + +output "dns_connection_commands" { + description = "SSH connection commands using DNS names" + value = var.create_dns_records && var.total_vm_count > 0 ? [for record in aws_route53_record.vm_public : "ssh root@${record.fqdn}"] : [] +} \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/variables.tf b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/variables.tf new file mode 100644 index 00000000000..c27f91d8041 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/variables.tf @@ -0,0 +1,210 @@ +variable "ibm_cloud_api_key" { + description = "IBM Cloud API key" + type = string + sensitive = true +} + +variable "region" { + description = "IBM Cloud region" + type = string + default = "us-south" +} + +variable "guid" { + description = "Unique identifier for the deployment" + type = string +} + +variable "domain" { + description = "Domain name for IBM Cloud Classic VMs" + type = string + default = "example.com" +} + +variable "cluster_dns_zone" { + description = "DNS domain for Route53 records" + type = string + default = "" +} + +variable "image" { + description = "Default image for VMs" + type = string + default = "REDHAT_9_64" +} + +variable "network_speed" { + description = "Network speed in Mbps" + type = number + default = 1000 +} + +variable "hourly_billing" { + description = "Use hourly billing" + type = bool + default = true +} + +variable "private_network_only" { + description = "Use private network only" + type = bool + default = false +} + +variable "cores" { + description = "Default number of CPU cores" + type = number + default = 2 +} + +variable "memory" { + description = "Default memory in MB" + type = number + default = 4096 +} + +variable "rootfs_size" { + description = "Default root filesystem size in GB" + type = number + default = 25 +} + +variable "additional_disks" { + description = "Default additional disks" + type = list(number) + default = [] +} + +variable "local_disk" { + description = "Use local disk" + type = bool + default = true +} + +variable "ssh_key_ids" { + description = "Default SSH key IDs" + type = list(string) + default = [] +} + +variable "user_metadata" { + description = "Default user metadata" + type = string + default = "" +} + +variable "notes" { + description = "Default notes for VMs" + type = string + default = "VM created by AgnosticD" +} + +variable "dedicated_acct_host_only" { + description = "Use dedicated account host only" + type = bool + default = false +} + +variable "private_vlan_id" { + description = "Private VLAN ID" + type = string + default = "" +} + +variable "public_vlan_id" { + description = "Public VLAN ID" + type = string + default = "" +} + +variable "post_install_script_uri" { + description = "Post-installation script URI" + type = string + default = "" +} + + + +variable "instances" { + description = "VM instances configuration" + type = list(object({ + name = string + datacenter = string + count = number + cores = optional(number) + memory = optional(number) + rootfs_size = optional(number) + additional_disks = optional(list(number)) + image = optional(string) + ssh_key_ids = optional(list(string)) + user_metadata = optional(string) + notes = optional(string) + tags = optional(list(string)) + private_security_group_rules = optional(list(object({ + name = string + description = string + rule_type = string + ether_type = string + from_port = optional(number) + to_port = optional(number) + protocol = string + cidr = string + })), []) + public_security_group_rules = optional(list(object({ + name = string + description = string + rule_type = string + ether_type = string + from_port = optional(number) + to_port = optional(number) + protocol = string + cidr = string + })), []) + })) +} + +variable "total_vm_count" { + description = "Total number of VMs to create" + type = number +} + +# Route53 DNS Configuration +variable "create_dns_records" { + description = "Whether to create Route53 DNS records" + type = bool + default = false +} + +variable "route53_aws_access_key_id" { + description = "AWS access key ID for Route53" + type = string + default = "" + sensitive = true +} + +variable "route53_aws_secret_access_key" { + description = "AWS secret access key for Route53" + type = string + default = "" + sensitive = true +} + +variable "aws_region" { + description = "AWS region for Route53" + type = string + default = "us-east-1" +} + +variable "route53_aws_zone_id" { + description = "Route53 hosted zone ID" + type = string + default = "" +} + + + +variable "dns_ttl" { + description = "TTL for DNS records" + type = number + default = 300 +} \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/versions.tf b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/versions.tf new file mode 100644 index 00000000000..755b7a5300c --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/files/versions.tf @@ -0,0 +1,13 @@ +terraform { + required_version = ">= 1.0" + required_providers { + ibm = { + source = "IBM-Cloud/ibm" + version = "~> 1.60" + } + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/meta/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/meta/main.yml new file mode 100644 index 00000000000..72418fd3425 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/meta/main.yml @@ -0,0 +1,21 @@ +galaxy_info: + author: AgnosticD + description: Manage VMs on IBM Cloud Classic using Terraform + company: Red Hat + license: MIT + min_ansible_version: 2.9 + platforms: + - name: EL + versions: + - 8 + - 9 + - name: Fedora + versions: + - all + galaxy_tags: + - cloud + - ibm + - terraform + - virtualization + +dependencies: [] \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/main.yml new file mode 100644 index 00000000000..13e1a3aaeda --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/main.yml @@ -0,0 +1,11 @@ +--- +- name: Include pre-checks + import_tasks: pre_checks.yml + +- name: Include Terraform deployment + import_tasks: terraform_deploy.yml + when: ACTION == 'provision' + +- name: Include Terraform destroy + import_tasks: terraform_destroy.yml + when: ACTION == 'destroy' \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/pre_checks.yml b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/pre_checks.yml new file mode 100644 index 00000000000..41fee225226 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/pre_checks.yml @@ -0,0 +1,188 @@ +--- +- name: Validate IBM Cloud API key is provided + ansible.builtin.fail: + msg: "ibm_cloud_api_key is required. Please provide it as an extra var: -e ibm_cloud_api_key=YOUR_API_KEY" + when: ibm_cloud_api_key is not defined or ibm_cloud_api_key == "" + +- name: Validate VM configuration + ansible.builtin.fail: + msg: "instances list is required with at least one VM. Please provide it as an extra var." + when: + - ACTION == 'provision' + - instances is not defined or instances|length == 0 + +- name: Validate individual VM names + ansible.builtin.fail: + msg: "VM {{ item.0.name | default('at index ' + item.1|string) }}: name is required" + when: + - ACTION == 'provision' + - item.0.name is not defined or item.0.name == "" + loop: "{{ instances | zip(range(instances|length)) | list }}" + loop_control: + label: "{{ item.0.name | default('VM ' + item.1|string) }}" + +- name: Validate individual VM datacenters + ansible.builtin.fail: + msg: "VM {{ item.0.name | default('at index ' + item.1|string) }}: datacenter is required" + when: + - ACTION == 'provision' + - item.0.datacenter is not defined or item.0.datacenter == "" + loop: "{{ instances | zip(range(instances|length)) | list }}" + loop_control: + label: "{{ item.0.name | default('VM ' + item.1|string) }}" + +- name: Validate individual VM count values + ansible.builtin.fail: + msg: "VM {{ item.0.name | default('at index ' + item.1|string) }}: count must be a positive integer (got: {{ item.0.count }})" + when: + - ACTION == 'provision' + - item.0.count is defined and (item.0.count is not number or item.0.count <= 0 or item.0.count != (item.0.count | int)) + loop: "{{ instances | zip(range(instances|length)) | list }}" + loop_control: + label: "{{ item.0.name | default('VM ' + item.1|string) }}" + +- name: Validate output_dir is provided + ansible.builtin.fail: + msg: "output_dir is required. Please provide it as an extra var: -e output_dir=YOUR_OUTPUT_DIR" + when: output_dir is not defined or output_dir == "" + +- name: Validate cluster_dns_zone is provided when DNS is enabled + ansible.builtin.fail: + msg: "cluster_dns_zone is required when create_dns_records is true. Please provide it as an extra var: -e cluster_dns_zone=YOUR_DNS_ZONE" + when: + - create_dns_records | default(false) | bool + - cluster_dns_zone is not defined or cluster_dns_zone == "" + +- name: Validate Route53 AWS credentials when DNS is enabled + ansible.builtin.fail: + msg: "Route53 AWS credentials are required when create_dns_records is true. Please provide: -e route53_aws_access_key_id=YOUR_KEY -e route53_aws_secret_access_key=YOUR_SECRET" + when: + - create_dns_records | default(false) | bool + - (route53_aws_access_key_id is not defined or route53_aws_access_key_id == "" or + route53_aws_secret_access_key is not defined or route53_aws_secret_access_key == "") + +- name: Initialize Route53 zone collection + ansible.builtin.set_fact: + all_route53_zones: [] + route53_next_marker: "" + when: + - create_dns_records | default(false) | bool + - cluster_dns_zone is defined and cluster_dns_zone != "" + +- name: Discover Route53 hosted zones with pagination + ansible.builtin.include_tasks: route53_pagination.yml + when: + - create_dns_records | default(false) | bool + - cluster_dns_zone is defined and cluster_dns_zone != "" + +- name: Find matching hosted zone for cluster_dns_zone + ansible.builtin.set_fact: + discovered_zone: "{{ route53_all_zones | selectattr('name', 'equalto', cluster_dns_zone + '.') | first | default(omit) }}" + when: + - create_dns_records | default(false) | bool + - route53_all_zones is defined + - route53_all_zones | length > 0 + +- name: Set route53_aws_zone_id when zone found + ansible.builtin.set_fact: + route53_aws_zone_id: "{{ discovered_zone.id.split('/') | last }}" + when: + - create_dns_records | default(false) | bool + - discovered_zone is defined + - discovered_zone != omit + - discovered_zone.id is defined + +- name: Display discovered Route53 zone information + ansible.builtin.debug: + msg: + - "✅ Route53 zone discovered automatically:" + - "Domain: {{ cluster_dns_zone }}" + - "Zone ID: {{ route53_aws_zone_id }}" + - "Zone Name: {{ discovered_zone.name }}" + when: + - create_dns_records | default(false) | bool + - discovered_zone is defined + - discovered_zone != omit + +- name: Validate Route53 zone was found or provided + ansible.builtin.fail: + msg: | + Route53 hosted zone not found for domain '{{ cluster_dns_zone }}'. + + Please either: + 1. Create a hosted zone for '{{ cluster_dns_zone }}' in AWS Route53, or + 2. Manually provide the zone ID: -e route53_aws_zone_id=YOUR_ZONE_ID + + Available zones in your account ({{ route53_all_zones | length }} total): + {% if route53_all_zones is defined and route53_all_zones | length > 0 %} + {% for zone in route53_all_zones %} + - {{ zone.name }} ({{ zone.id.split('/') | last }}) + {% endfor %} + {% else %} + (Unable to list zones - check AWS credentials) + {% endif %} + when: + - create_dns_records | default(false) | bool + - route53_aws_zone_id is not defined or route53_aws_zone_id == "" + +- name: Check if Terraform is installed + ansible.builtin.command: terraform version + register: terraform_version + failed_when: false + changed_when: false + +- name: Install Terraform if not present + block: + - name: Download Terraform + ansible.builtin.get_url: + url: "{{ terraform_download_url }}" + dest: "{{ output_dir }}/terraform.zip" + mode: '0644' + + - name: Unzip Terraform + ansible.builtin.unarchive: + src: "{{ output_dir }}/terraform.zip" + dest: "/usr/local/bin/" + remote_src: yes + become: true + + - name: Make Terraform executable + ansible.builtin.file: + path: "/usr/local/bin/terraform" + mode: '0755' + become: true + + - name: Verify Terraform installation + ansible.builtin.command: terraform version + register: terraform_verify + failed_when: terraform_verify.rc != 0 + changed_when: false + + - name: Clean up Terraform zip file + ansible.builtin.file: + path: "{{ output_dir }}/terraform.zip" + state: absent + + when: terraform_version.rc != 0 + +- name: Create terraform working directory + ansible.builtin.file: + path: "{{ terraform_working_dir }}" + state: directory + mode: '0755' + +- name: Display pre-check summary + ansible.builtin.debug: + msg: + - "Pre-checks completed successfully" + - "Action: {{ ACTION }}" + - "IBM Cloud API Key: [REDACTED]" + - "Output Directory: {{ output_dir }}" + - "Terraform working directory: {{ terraform_working_dir }}" + - "{% if create_dns_records | default(false) | bool %}DNS Configuration:{% endif %}" + - "{% if create_dns_records | default(false) | bool %} - DNS Records: ENABLED{% endif %}" + - "{% if create_dns_records | default(false) | bool %} - Cluster DNS Zone: {{ cluster_dns_zone }}{% endif %}" + - "{% if create_dns_records | default(false) | bool %} - Route53 Zone ID: {{ route53_aws_zone_id }}{% endif %}" + - "{% if create_dns_records | default(false) | bool %} - AWS Region: {{ aws_region | default('us-east-1') }}{% endif %}" + - "{% if ACTION == 'provision' and instances is defined %}VM Configuration ({{ instances|length }} instance types, {{ instances|map(attribute='count', default=1)|sum }} total VMs):{% endif %}" + - "{% if ACTION == 'provision' and instances is defined %}{% for instance in instances %} - {{ instance.name }} ({{ instance.datacenter }}) - Count: {{ instance.count | default(1) }}{% endfor %}{% endif %}" \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/route53_pagination.yml b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/route53_pagination.yml new file mode 100644 index 00000000000..707c8f45121 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/route53_pagination.yml @@ -0,0 +1,19 @@ +--- +# Route53 pagination handler for zones discovery +- name: Discover Route53 hosted zones (with pagination) + amazon.aws.route53_info: + aws_access_key: "{{ route53_aws_access_key_id }}" + aws_secret_key: "{{ route53_aws_secret_access_key }}" + query: hosted_zone + next_marker: "{{ route53_next_marker | default(omit) }}" + register: route53_zones_page + +- name: Add zones from current page + ansible.builtin.set_fact: + route53_all_zones: "{{ route53_all_zones | default([]) + route53_zones_page.hosted_zones }}" + +- name: Continue pagination if more zones exist + ansible.builtin.include_tasks: route53_pagination.yml + when: route53_zones_page.next_marker is defined + vars: + route53_next_marker: "{{ route53_zones_page.next_marker }}" \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/terraform_deploy.yml b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/terraform_deploy.yml new file mode 100644 index 00000000000..41bc3ec9ad4 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/terraform_deploy.yml @@ -0,0 +1,148 @@ +--- +- name: Copy Terraform templates to working directory + ansible.builtin.copy: + src: "{{ role_path }}/files/" + dest: "{{ terraform_working_dir }}/" + mode: '0644' + +- name: Create terraform.tfvars file + ansible.builtin.template: + src: terraform.tfvars.j2 + dest: "{{ terraform_working_dir }}/terraform.tfvars" + mode: '0644' + +- name: Create Terraform log directory + ansible.builtin.file: + path: "{{ output_dir }}/terraform-logs" + state: directory + mode: '0755' + +- name: Set terraform logging timestamp + ansible.builtin.shell: + cmd: date +%Y%m%dT%H%M%S + register: timestamp_result + changed_when: false + +- name: Set terraform logging timestamp fact + ansible.builtin.set_fact: + terraform_log_timestamp: "{{ timestamp_result.stdout }}" + +- name: Initialize Terraform (with logging) + ansible.builtin.shell: + cmd: | + export TF_LOG={{ terraform_log_level }} + export TF_LOG_PATH="{{ output_dir }}/terraform-logs/terraform-init-{{ terraform_log_timestamp }}.log" + echo "=== Terraform Init Started at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-init-{{ terraform_log_timestamp }}.log + terraform init 2>&1 | tee -a {{ output_dir }}/terraform-logs/terraform-init-{{ terraform_log_timestamp }}.log + echo "=== Terraform Init Completed at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-init-{{ terraform_log_timestamp }}.log + chdir: "{{ terraform_working_dir }}" + register: terraform_init + changed_when: terraform_init.rc == 0 + when: terraform_enable_logging | default(true) + +- name: Plan Terraform deployment (with logging) + ansible.builtin.shell: + cmd: | + export TF_LOG={{ terraform_log_level }} + export TF_LOG_PATH="{{ output_dir }}/terraform-logs/terraform-plan-{{ terraform_log_timestamp }}.log" + echo "=== Terraform Plan Started at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-plan-{{ terraform_log_timestamp }}.log + terraform plan -out=tfplan 2>&1 | tee -a {{ output_dir }}/terraform-logs/terraform-plan-{{ terraform_log_timestamp }}.log + echo "=== Terraform Plan Completed at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-plan-{{ terraform_log_timestamp }}.log + chdir: "{{ terraform_working_dir }}" + register: terraform_plan + changed_when: terraform_plan.rc == 0 + when: terraform_enable_logging | default(true) + +- name: Apply Terraform deployment (with logging) + ansible.builtin.shell: + cmd: | + export TF_LOG={{ terraform_log_level }} + export TF_LOG_PATH="{{ output_dir }}/terraform-logs/terraform-apply-{{ terraform_log_timestamp }}.log" + echo "=== Terraform Apply Started at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-apply-{{ terraform_log_timestamp }}.log + terraform apply -auto-approve tfplan 2>&1 | tee -a {{ output_dir }}/terraform-logs/terraform-apply-{{ terraform_log_timestamp }}.log + echo "=== Terraform Apply Completed at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-apply-{{ terraform_log_timestamp }}.log + chdir: "{{ terraform_working_dir }}" + register: terraform_apply + changed_when: terraform_apply.rc == 0 + retries: 15 + delay: 60 + until: terraform_apply.rc == 0 + failed_when: false + when: terraform_enable_logging | default(true) + +- name: Handle terraform apply failure + ansible.builtin.fail: + msg: + - "Terraform apply failed after {{ terraform_apply.attempts | default(1) }} attempts" + - "Error output: {{ terraform_apply.stderr | default('No error output') }}" + - "Standard output: {{ terraform_apply.stdout | default('No output') }}" + - "Check {{ output_dir }}/terraform-logs/terraform-apply-{{ terraform_log_timestamp }}.log for detailed logs" + when: terraform_apply.rc != 0 + +- name: Check Terraform state after apply (with logging) + ansible.builtin.shell: + cmd: | + export TF_LOG={{ terraform_log_level }} + export TF_LOG_PATH="{{ output_dir }}/terraform-logs/terraform-state-{{ terraform_log_timestamp }}.log" + echo "=== Terraform State Check Started at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-state-{{ terraform_log_timestamp }}.log + terraform show -json 2>&1 | tee -a {{ output_dir }}/terraform-logs/terraform-state-{{ terraform_log_timestamp }}.log + echo "=== Terraform State Check Completed at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-state-{{ terraform_log_timestamp }}.log + chdir: "{{ terraform_working_dir }}" + register: terraform_state + changed_when: false + when: terraform_enable_logging | default(true) + +- name: Verify deployment status (with logging) + ansible.builtin.shell: + cmd: | + export TF_LOG={{ terraform_log_level }} + export TF_LOG_PATH="{{ output_dir }}/terraform-logs/terraform-verify-{{ terraform_log_timestamp }}.log" + echo "=== Terraform Verify Started at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-verify-{{ terraform_log_timestamp }}.log + terraform plan -detailed-exitcode 2>&1 | tee -a {{ output_dir }}/terraform-logs/terraform-verify-{{ terraform_log_timestamp }}.log + echo "=== Terraform Verify Completed at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-verify-{{ terraform_log_timestamp }}.log + chdir: "{{ terraform_working_dir }}" + register: terraform_verify + changed_when: false + failed_when: terraform_verify.rc not in [0, 2] + retries: 5 + delay: 10 + until: terraform_verify.rc in [0, 2] + when: terraform_enable_logging | default(true) + +- name: Create master terraform log file + ansible.builtin.shell: + cmd: | + echo "=== Terraform Deployment Summary ===" > {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "Date: $(date)" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "Working Directory: {{ terraform_working_dir }}" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "GUID: {{ guid }}" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "Action: {{ ACTION }}" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "=== Init Log ===" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + cat {{ output_dir }}/terraform-logs/terraform-init-{{ terraform_log_timestamp }}.log >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "=== Plan Log ===" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + cat {{ output_dir }}/terraform-logs/terraform-plan-{{ terraform_log_timestamp }}.log >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "=== Apply Log ===" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + cat {{ output_dir }}/terraform-logs/terraform-apply-{{ terraform_log_timestamp }}.log >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "=== State Check Log ===" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + cat {{ output_dir }}/terraform-logs/terraform-state-{{ terraform_log_timestamp }}.log >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + echo "=== Verify Log ===" >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + cat {{ output_dir }}/terraform-logs/terraform-verify-{{ terraform_log_timestamp }}.log >> {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log + register: master_log_creation + changed_when: master_log_creation.rc == 0 + when: terraform_enable_logging | default(true) + +- name: Display deployment status + ansible.builtin.debug: + msg: + - "Terraform apply completed successfully" + - "Deployment status: {{ 'No changes needed' if terraform_verify.rc == 0 else 'Changes applied' }}" + - "Apply duration: {{ terraform_apply.delta if terraform_apply.delta is defined else 'N/A' }}" + - "📋 Terraform logs available at:" + - " - Master log: {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log" + - " - Individual logs: {{ output_dir }}/terraform-logs/terraform-*-{{ terraform_log_timestamp }}.log" + - "🔍 To view logs: tail -f {{ output_dir }}/terraform-logs/terraform-master-{{ terraform_log_timestamp }}.log" \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/terraform_destroy.yml b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/terraform_destroy.yml new file mode 100644 index 00000000000..b1945c94414 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/tasks/terraform_destroy.yml @@ -0,0 +1,129 @@ +--- +- name: Check if Terraform working directory exists + ansible.builtin.stat: + path: "{{ terraform_working_dir }}" + register: terraform_dir_stat + +- name: Fail if Terraform working directory doesn't exist + ansible.builtin.fail: + msg: "Terraform working directory {{ terraform_working_dir }} does not exist. Nothing to destroy." + when: not terraform_dir_stat.stat.exists + +- name: Check if Terraform state file exists + ansible.builtin.stat: + path: "{{ terraform_working_dir }}/terraform.tfstate" + register: terraform_state_stat + +- name: Fail if Terraform state file doesn't exist + ansible.builtin.fail: + msg: "Terraform state file does not exist in {{ terraform_working_dir }}. Nothing to destroy." + when: not terraform_state_stat.stat.exists + +- name: Create Terraform log directory + ansible.builtin.file: + path: "{{ output_dir }}/terraform-logs" + state: directory + mode: '0755' + +- name: Set terraform logging timestamp + ansible.builtin.shell: + cmd: date +%Y%m%dT%H%M%S + register: timestamp_result + changed_when: false + +- name: Set terraform logging timestamp fact + ansible.builtin.set_fact: + terraform_log_timestamp: "{{ timestamp_result.stdout }}" + +- name: Get current Terraform state (with logging) + ansible.builtin.shell: + cmd: | + export TF_LOG={{ terraform_log_level }} + export TF_LOG_PATH="{{ output_dir }}/terraform-logs/terraform-show-{{ terraform_log_timestamp }}.log" + echo "=== Terraform Show Started at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-show-{{ terraform_log_timestamp }}.log + terraform show -json 2>&1 | tee -a {{ output_dir }}/terraform-logs/terraform-show-{{ terraform_log_timestamp }}.log + echo "=== Terraform Show Completed at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-show-{{ terraform_log_timestamp }}.log + chdir: "{{ terraform_working_dir }}" + register: terraform_show + changed_when: false + failed_when: false + when: terraform_enable_logging | default(true) + +- name: Parse current state + ansible.builtin.set_fact: + current_state: "{{ terraform_show.stdout | from_json }}" + when: terraform_show.rc == 0 + +- name: Display resources to be destroyed + ansible.builtin.debug: + msg: + - "Resources to be destroyed:" + - "{{ current_state.values.root_module.resources | map(attribute='address') | list }}" + when: + - terraform_show.rc == 0 + - current_state.values is defined + - current_state.values.root_module is defined + +- name: Plan Terraform destroy (with logging) + ansible.builtin.shell: + cmd: | + export TF_LOG={{ terraform_log_level }} + export TF_LOG_PATH="{{ output_dir }}/terraform-logs/terraform-destroy-plan-{{ terraform_log_timestamp }}.log" + echo "=== Terraform Destroy Plan Started at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-destroy-plan-{{ terraform_log_timestamp }}.log + terraform plan -destroy -out=destroy_plan 2>&1 | tee -a {{ output_dir }}/terraform-logs/terraform-destroy-plan-{{ terraform_log_timestamp }}.log + echo "=== Terraform Destroy Plan Completed at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-destroy-plan-{{ terraform_log_timestamp }}.log + chdir: "{{ terraform_working_dir }}" + register: terraform_destroy_plan + changed_when: terraform_destroy_plan.rc == 0 + when: terraform_enable_logging | default(true) + +- name: Apply Terraform destroy (with logging) + ansible.builtin.shell: + cmd: | + export TF_LOG={{ terraform_log_level }} + export TF_LOG_PATH="{{ output_dir }}/terraform-logs/terraform-destroy-apply-{{ terraform_log_timestamp }}.log" + echo "=== Terraform Destroy Apply Started at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-destroy-apply-{{ terraform_log_timestamp }}.log + terraform apply -auto-approve destroy_plan 2>&1 | tee -a {{ output_dir }}/terraform-logs/terraform-destroy-apply-{{ terraform_log_timestamp }}.log + echo "=== Terraform Destroy Apply Completed at $(date) ===" >> {{ output_dir }}/terraform-logs/terraform-destroy-apply-{{ terraform_log_timestamp }}.log + chdir: "{{ terraform_working_dir }}" + register: terraform_destroy_apply + changed_when: terraform_destroy_apply.rc == 0 + when: terraform_enable_logging | default(true) + +- name: Create master terraform destroy log file + ansible.builtin.shell: + cmd: | + echo "=== Terraform Destroy Summary ===" > {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + echo "Date: $(date)" >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + echo "Working Directory: {{ terraform_working_dir }}" >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + echo "GUID: {{ guid }}" >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + echo "Action: {{ ACTION }}" >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + echo "" >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + echo "=== State Check Log ===" >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + cat {{ output_dir }}/terraform-logs/terraform-show-{{ terraform_log_timestamp }}.log >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + echo "" >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + echo "=== Destroy Plan Log ===" >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + cat {{ output_dir }}/terraform-logs/terraform-destroy-plan-{{ terraform_log_timestamp }}.log >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + echo "" >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + echo "=== Destroy Apply Log ===" >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + cat {{ output_dir }}/terraform-logs/terraform-destroy-apply-{{ terraform_log_timestamp }}.log >> {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log + register: master_destroy_log_creation + changed_when: master_destroy_log_creation.rc == 0 + when: terraform_enable_logging | default(true) + +- name: Clean up Terraform files + ansible.builtin.file: + path: "{{ terraform_working_dir }}" + state: absent + when: cleanup_terraform_files | default(true) + +- name: Display destroy completion + ansible.builtin.debug: + msg: + - "VM(s) destruction completed successfully" + - "All resources have been removed from IBM Cloud" + - "Terraform working directory: {{ 'Cleaned up' if (cleanup_terraform_files | default(true)) else 'Preserved at ' + terraform_working_dir }}" + - "📋 Terraform destroy logs available at:" + - " - Master log: {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log" + - " - Individual logs: {{ output_dir }}/terraform-logs/terraform-*-{{ terraform_log_timestamp }}.log" + - "🔍 To view logs: tail -f {{ output_dir }}/terraform-logs/terraform-destroy-master-{{ terraform_log_timestamp }}.log" \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/templates/inventory.j2 b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/templates/inventory.j2 new file mode 100644 index 00000000000..a7a8271d23e --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/templates/inventory.j2 @@ -0,0 +1,102 @@ +# Ansible Inventory - IBM Cloud Classic VMs +# Generated by AgnosticD infra-ibm-cloud-classic-vm role +# Total VMs: {{ vm_info | length }} +# GUID: {{ guid }} + +[{{ inventory_group | default('all') }}] +{% for vm in vm_info %} +{{ vm.fqdn }} ansible_host={{ vm.public_ip }} ansible_user={{ ssh_user | default('root') }} ansible_ssh_private_key_file={{ ssh_private_key_path }} +{% endfor %} + +{% set ansible_groups = {} %} +{% for vm in vm_info %} +{% for tag in vm.tags %} +{% if tag.startswith('ansible_group:') %} +{% set group_name = tag.split(':', 1)[1] | regex_replace('[^a-zA-Z0-9_]', '_') %} +{% if group_name not in ansible_groups %} +{% set _ = ansible_groups.update({group_name: []}) %} +{% endif %} +{% set _ = ansible_groups[group_name].append(vm) %} +{% endif %} +{% endfor %} +{% endfor %} + +{% for group_name, group_vms in ansible_groups.items() %} +[{{ group_name }}] +{% for vm in group_vms %} +{{ vm.fqdn }} +{% endfor %} + +{% endfor %} + +{% set operating_systems = [] %} +{% for vm in vm_info %} +{% set os_tag = vm.operating_system | lower | replace('_', '-') %} +{% if os_tag not in operating_systems %} +{% set _ = operating_systems.append(os_tag) %} +{% endif %} +{% endfor %} + +{% for os in operating_systems %} +[tag_{{ os }}] +{% for vm in vm_info %} +{% if vm.operating_system | lower | replace('_', '-') == os %} +{{ vm.fqdn }} +{% endif %} +{% endfor %} + +{% endfor %} + +{% set datacenters = [] %} +{% for vm in vm_info %} +{% if vm.datacenter not in datacenters %} +{% set _ = datacenters.append(vm.datacenter) %} +{% endif %} +{% endfor %} + +{% for dc in datacenters %} +[datacenter_{{ dc }}] +{% for vm in vm_info %} +{% if vm.datacenter == dc %} +{{ vm.fqdn }} +{% endif %} +{% endfor %} + +{% endfor %} + +{% for vm in vm_info %} +[{{ vm.hostname }}] +{{ vm.fqdn }} + +{% endfor %} + +[{{ inventory_group | default('all') }}:vars] +# Common variables +ssh_user={{ ssh_user | default('root') }} +ssh_private_key_path={{ ssh_private_key_path }} + +# Individual VM metadata +{% for vm in vm_info %} +[{{ vm.hostname }}:vars] +vm_id={{ vm.id }} +vm_hostname={{ vm.hostname }} +vm_domain={{ vm.domain }} +vm_fqdn={{ vm.fqdn }} +vm_public_ip={{ vm.public_ip }} +vm_private_ip={{ vm.private_ip }} +vm_datacenter={{ vm.datacenter }} +vm_cores={{ vm.cores }} +vm_memory={{ vm.memory }} +vm_operating_system={{ vm.operating_system }} +vm_network_speed={{ vm.network_speed }} +vm_status={{ vm.status }} +vm_private_security_group_id={{ vm.private_security_group_id | default([]) | to_json }} +vm_public_security_group_id={{ vm.public_security_group_id | default([]) | to_json }} +vm_tags={{ vm.tags | to_json }} +vm_hourly_billing={{ vm.hourly_billing }} +vm_creation_date={{ vm.creation_date }} +vm_private_vlan_id={{ vm.private_vlan_id }} +vm_public_vlan_id={{ vm.public_vlan_id }} +ssh_connection_command={{ vm.ssh_connection }} + +{% endfor %} \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/templates/terraform.tfvars.j2 b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/templates/terraform.tfvars.j2 new file mode 100644 index 00000000000..e0f795489b5 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-manage-vms/templates/terraform.tfvars.j2 @@ -0,0 +1,103 @@ +# IBM Cloud Classic VM Terraform Variables +# Generated by AgnosticD infra-ibm-cloud-classic-manage-vms role + +# Required variables +ibm_cloud_api_key = "{{ ibm_cloud_api_key }}" +guid = "{{ guid }}" + +# Optional variables with defaults +region = "{{ region | default('us-south') }}" +domain = "{{ domain | default('example.com') }}" +cluster_dns_zone = "{{ cluster_dns_zone | default('') }}" +image = "{{ image | default('REDHAT_9_64') }}" +network_speed = {{ network_speed | default(1000) }} +hourly_billing = {{ hourly_billing | default(true) | lower }} +private_network_only = {{ private_network_only | default(false) | lower }} +cores = {{ cores | default(2) }} +memory = {{ memory | default(4096) }} +rootfs_size = {{ rootfs_size | default(25) }} +additional_disks = {{ additional_disks | default([]) | to_json }} +local_disk = {{ local_disk | default(true) | lower }} +ssh_key_ids = {{ ssh_key_ids | default([]) | to_json }} +user_metadata = "{{ user_metadata | default('') }}" +notes = "{{ notes | default('VM created by AgnosticD') }}" +dedicated_acct_host_only = {{ dedicated_acct_host_only | default(false) | lower }} + +private_vlan_id = "{{ private_vlan_id | default('') }}" + +public_vlan_id = "{{ public_vlan_id | default('') }}" + +post_install_script_uri = "{{ post_install_script_uri | default('') }}" + +# No global security groups or default security groups - use only inline rules per VM + +# VM instances configuration - this is the key for multiple VMs +instances = [ +{% for instance in instances %} + { + name = "{{ instance.name }}" + datacenter = "{{ instance.datacenter }}" + count = {{ instance.count | default(1) }} + cores = {{ instance.cores | default(cores | default(2)) }} + memory = {{ instance.memory | default(memory | default(4096)) }} + rootfs_size = {{ instance.rootfs_size | default(rootfs_size | default(25)) }} + additional_disks = {{ instance.additional_disks | default([]) | to_json }} + image = "{{ instance.image | default(image | default('REDHAT_9_64')) }}" + ssh_key_ids = {{ instance.ssh_key_ids | default(ssh_key_ids | default([])) | to_json }} + user_metadata = "{{ instance.user_metadata | default(user_metadata | default('')) }}" + notes = "{{ instance.notes | default(notes | default('VM created by AgnosticD')) }}" + tags = {{ instance.tags | default([]) | to_json }} +{% if instance.private_security_group_rules is defined %} + private_security_group_rules = [ +{% for rule in instance.private_security_group_rules %} + { + name = "{{ rule.name }}" + description = "{{ rule.description }}" + rule_type = "{{ rule.rule_type }}" + ether_type = "{{ rule.ether_type }}" +{% if rule.from_port is defined %} + from_port = {{ rule.from_port }} +{% endif %} +{% if rule.to_port is defined %} + to_port = {{ rule.to_port }} +{% endif %} + protocol = "{{ rule.protocol }}" + cidr = "{{ rule.cidr }}" + }{{ ',' if not loop.last }} +{% endfor %} + ] +{% endif %} +{% if instance.public_security_group_rules is defined %} + public_security_group_rules = [ +{% for rule in instance.public_security_group_rules %} + { + name = "{{ rule.name }}" + description = "{{ rule.description }}" + rule_type = "{{ rule.rule_type }}" + ether_type = "{{ rule.ether_type }}" +{% if rule.from_port is defined %} + from_port = {{ rule.from_port }} +{% endif %} +{% if rule.to_port is defined %} + to_port = {{ rule.to_port }} +{% endif %} + protocol = "{{ rule.protocol }}" + cidr = "{{ rule.cidr }}" + }{{ ',' if not loop.last }} +{% endfor %} + ] +{% endif %} + }{{ ',' if not loop.last }} +{% endfor %} +] + +# Calculate total VM count +total_vm_count = {{ instances | map(attribute='count', default=1) | sum }} + +# DNS Configuration +create_dns_records = {{ create_dns_records | default(false) | lower }} +route53_aws_access_key_id = "{{ route53_aws_access_key_id | default('') }}" +route53_aws_secret_access_key = "{{ route53_aws_secret_access_key | default('') }}" +aws_region = "{{ aws_region | default('us-east-1') }}" +route53_aws_zone_id = "{{ route53_aws_zone_id | default('') }}" +dns_ttl = {{ dns_ttl | default(300) }} \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/README.md b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/README.md new file mode 100644 index 00000000000..4fde53fdedd --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/README.md @@ -0,0 +1,124 @@ +# IBM Cloud Classic SSH Key Infrastructure Role + +This role manages SSH keys in IBM Cloud Classic infrastructure. It can create, update, and destroy SSH keys using the IBM Cloud CLI. + +## Requirements + +- IBM Cloud account with Classic Infrastructure access +- Valid IBM Cloud API key with Classic Infrastructure permissions +- SSH key pair already generated locally (using `create_ssh_provision_key` role or manually) +- IBM Cloud CLI will be automatically installed if not present + +## Authentication + +This role uses the IBM Cloud API key for authentication. The API key must have Classic Infrastructure permissions. + +## Role Variables + +### Required Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `ibm_cloud_classic_api_key` | IBM Cloud API key with Classic Infrastructure permissions | None (required) | +| `guid` | Unique identifier for the deployment | None (required) | +| `env_authorized_key_path` | Path to the private SSH key file | None (required) | + +### Optional Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `ssh_key_name` | Name of the SSH key in IBM Cloud Classic | `{{ guid }}-ssh-key` | +| `ssh_key_label` | Label for the SSH key | `AgnosticD SSH Key for {{ guid }}` | +| `ssh_provision_key_path` | Path to the private SSH key file | `{{ env_authorized_key_path }}` | +| `ssh_provision_pubkey_path` | Path to the public SSH key file | `{{ env_authorized_key_path_pub }}` | +| `wait_for_completion` | Whether to wait for operations to complete | `true` | +| `ssh_key_tags` | Tags to apply to the SSH key | See defaults/main.yml | +| `ibm_cloud_cli_install_url` | URL for IBM Cloud CLI installation script | `https://clis.cloud.ibm.com/install/linux` | + +## Dependencies + +- `create_ssh_provision_key` role should be run first to generate SSH keys +- `locate_env_authorized_key` role should be run to set up key paths + +## Example Usage + +### In a playbook: + +```yaml +- name: Create SSH key in IBM Cloud Classic + include_role: + name: infra-ibm-cloud-classic-ssh-key + vars: + ACTION: provision + ibm_cloud_classic_api_key: "{{ ibm_cloud_classic_api_key }}" +``` + +### In infrastructure deployment: + +```yaml +- name: Create SSH key in IBM Cloud Classic + include_role: + name: "infra-ibm-cloud-classic-ssh-key" + vars: + ACTION: provision + ibm_cloud_classic_api_key: "{{ ibm_cloud_classic_api_key }}" + when: + - instances is defined + - instances | length > 0 +``` + +## Destroying SSH Keys + +To destroy the SSH key: + +```yaml +- name: Destroy SSH key in IBM Cloud Classic + include_role: + name: infra-ibm-cloud-classic-ssh-key + vars: + ACTION: destroy + ibm_cloud_classic_api_key: "{{ ibm_cloud_classic_api_key }}" +``` + +## Return Values + +The role sets the following facts that can be used by other roles: + +- `ssh_key_id`: The ID of the created/found SSH key +- `env_authorized_key_id`: Same as ssh_key_id (for compatibility) + +## How it works + +1. **CLI Installation**: Automatically installs IBM Cloud CLI if not present +2. **Authentication**: Logs in using the provided API key +3. **Key Detection**: Checks if SSH key already exists by name +4. **Key Creation**: Creates new SSH key if it doesn't exist +5. **Information Storage**: Saves key information to output directory +6. **Fact Setting**: Sets ssh_key_id fact for use by other roles + +## Integration with AgnosticD + +This role is designed to work with AgnosticD's infrastructure deployment patterns: + +- Follows the `ACTION` variable convention (provision/destroy) +- Uses standard AgnosticD variables (`guid`, `env_authorized_key_path`, etc.) +- Integrates with the `locate_env_authorized_key` role +- Works with the existing infrastructure deployment workflows + +## Error Handling + +The role includes strict error handling with no fallback modes: + +- **SoftLayer Plugin Required**: The role will fail if the SoftLayer plugin is not installed or available. This ensures SSH key operations can be performed reliably. +- **Variable Validation**: Validates required variables are present before proceeding +- **SSH Key File Validation**: Checks SSH key file existence and readability +- **IBM Cloud Authentication**: Verifies IBM Cloud CLI installation and API key authentication +- **Detailed Error Messages**: Provides comprehensive troubleshooting information on failures +- **Existing Key Detection**: Handles cases where SSH keys already exist or don't exist during create/destroy operations + +## Security Considerations + +- API key is marked with `no_log: true` to prevent logging +- SSH key information is saved with restricted permissions +- Role validates authentication before proceeding +- Cleanup removes temporary files on destruction \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/defaults/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/defaults/main.yml new file mode 100644 index 00000000000..567f6058c60 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/defaults/main.yml @@ -0,0 +1,22 @@ +--- +# IBM Cloud Classic SSH Key Infrastructure Role - Default Variables + +# SSH key configuration +ssh_key_name: "{{ guid }}-ssh-key" +ssh_key_label: "AgnosticD SSH Key for {{ guid }}" + +# IBM Cloud Classic API configuration +# ibm_cloud_classic_api_key: "" # Set via environment variable or extra vars + +# SSH key file paths (consistent with AgnosticD patterns) +ssh_provision_key_path: "{{ env_authorized_key_path }}" +ssh_provision_pubkey_path: "{{ env_authorized_key_path_pub }}" + +# Whether to wait for operations to complete +wait_for_completion: true + +# Tags for the SSH key +ssh_key_tags: + - "agnosticd" + - "guid:{{ guid }}" + - "env_type:{{ env_type | default('unknown') }}" \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/meta/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/meta/main.yml new file mode 100644 index 00000000000..002b290b77f --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/meta/main.yml @@ -0,0 +1,30 @@ +--- +galaxy_info: + author: AgnosticD Team + description: Infrastructure role for managing SSH keys in IBM Cloud Classic + license: BSD + min_ansible_version: 2.9 + platforms: + - name: EL + versions: + - 7 + - 8 + - 9 + - name: Ubuntu + versions: + - 18.04 + - 20.04 + - 22.04 + galaxy_tags: + - cloud + - infrastructure + - ibm + - ssh + - classic + - agnosticd + +dependencies: [] + +collections: + - ansible.builtin + - ansible.posix \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/tasks/create.yml b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/tasks/create.yml new file mode 100644 index 00000000000..af781b68ca5 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/tasks/create.yml @@ -0,0 +1,93 @@ +--- +# IBM Cloud Classic SSH Key Infrastructure Role - Create Tasks + +- name: Verify IBM Cloud CLI is available and logged in + ansible.builtin.command: ibmcloud account show + register: ibmcloud_login_status + failed_when: false + changed_when: false + +- name: Fail if not logged in to IBM Cloud + ansible.builtin.fail: + msg: "Not logged in to IBM Cloud. Please ensure login is performed before calling this role." + when: ibmcloud_login_status.rc != 0 + +- name: Check available security commands + ansible.builtin.command: ibmcloud sl security help + register: sl_security_help + failed_when: false + changed_when: false + +- name: Check if SSH provision key exists locally + stat: + path: "{{ ssh_provision_key_path }}" + register: stat_ssh_provision_key + +- name: Fail if SSH provision key doesn't exist + fail: + msg: "SSH provision key not found at {{ ssh_provision_key_path }}." + when: not stat_ssh_provision_key.stat.exists + +- name: List SSH keys in IBM Cloud Classic + ansible.builtin.command: ibmcloud sl security sshkey-list --output json + register: existing_ssh_keys_result + changed_when: false + failed_when: false + +- name: Test if ibmcloud cli ran correctly + ansible.builtin.set_fact: + softlayer_available: "{{ existing_ssh_keys_result.rc == 0 }}" + +- name: Fail if unable to list SSH keys + ansible.builtin.fail: + msg: | + Unable to list SSH keys with ibmcloud cli. + when: not (softlayer_available | default(false)) + +- name: Parse existing SSH keys + ansible.builtin.set_fact: + existing_ssh_keys: "{{ existing_ssh_keys_result.stdout | from_json }}" + when: existing_ssh_keys_result.stdout != "" + +- name: Set empty SSH keys list if no output + ansible.builtin.set_fact: + existing_ssh_keys: [] + when: existing_ssh_keys_result.stdout == "" + +- name: Check if SSH key with same name already exists + ansible.builtin.set_fact: + existing_ssh_key: "{{ existing_ssh_keys | selectattr('label', 'equalto', ssh_key_name) | first | default({}) }}" + +- name: Create SSH key in IBM Cloud Classic + ansible.builtin.command: >- + ibmcloud sl security sshkey-add + "{{ ssh_key_name }}" + --key "{{ ssh_provision_pubkey_content }}" + --output json + register: ssh_key_create_result + failed_when: false + when: existing_ssh_key == {} + +- name: Fail if unable to create SSH key + ansible.builtin.fail: + msg: | + Unable to create SSH key using 'sshkey-add' command. + when: existing_ssh_key == {} and ssh_key_create_result.rc != 0 + +- name: Parse created SSH key information + ansible.builtin.set_fact: + created_ssh_key: "{{ ssh_key_create_result.stdout | from_json }}" + when: existing_ssh_key == {} and ssh_key_create_result is defined and ssh_key_create_result.rc == 0 + +- name: Set SSH key ID from existing or created key + ansible.builtin.set_fact: + ssh_key_id: "{{ existing_ssh_key.ID | default(created_ssh_key.id) }}" + +- name: Get SSH key details from list output + ansible.builtin.set_fact: + ssh_key_details: "{{ existing_ssh_keys | selectattr('ID', 'equalto', ssh_key_id) | first | default({}) }}" + +- name: Set SSH key details from created key if not found in list + ansible.builtin.set_fact: + ssh_key_details: "{{ created_ssh_key | default({}) }}" + when: ssh_key_details == {} and created_ssh_key is defined \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/tasks/destroy.yml b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/tasks/destroy.yml new file mode 100644 index 00000000000..468ed84f401 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/tasks/destroy.yml @@ -0,0 +1,105 @@ +--- +# IBM Cloud Classic SSH Key Infrastructure Role - Destroy Tasks + +- name: Check if IBM Cloud CLI is installed + ansible.builtin.command: ibmcloud --version + register: ibmcloud_version + failed_when: false + changed_when: false + +- name: Install IBM Cloud CLI if not present + block: + - name: Download and install IBM Cloud CLI + ansible.builtin.shell: | + curl -fsSL {{ ibm_cloud_cli_install_url }} | sh + become: true + + - name: Verify IBM Cloud CLI installation + ansible.builtin.command: ibmcloud --version + register: ibmcloud_verify + failed_when: ibmcloud_verify.rc != 0 + changed_when: false + + when: ibmcloud_version.rc != 0 + +- name: Login to IBM Cloud with API key + ansible.builtin.command: ibmcloud login --apikey {{ ibm_cloud_classic_api_key }} + register: ibmcloud_login_result + failed_when: false + changed_when: false + no_log: true + +- name: Check if logged in to IBM Cloud + ansible.builtin.command: ibmcloud account show + register: ibmcloud_login_status + failed_when: false + changed_when: false + +- name: Fail if not logged in to IBM Cloud + ansible.builtin.fail: + msg: "Not logged in to IBM Cloud. Please check your API key." + when: ibmcloud_login_status.rc != 0 + +- name: List SSH keys in IBM Cloud Classic + ansible.builtin.command: ibmcloud sl security sshkey-list --output json + register: existing_ssh_keys_result + changed_when: false + failed_when: false + +- name: Check if ibm cloud cli ran correctly + ansible.builtin.set_fact: + softlayer_available: "{{ existing_ssh_keys_result.rc == 0 }}" + +- name: Fail if unable to list SSH keys + ansible.builtin.fail: + msg: | + Unable to list SSH keys with ibmcloud cli. + when: not (softlayer_available | default(false)) + +- name: Parse existing SSH keys + ansible.builtin.set_fact: + existing_ssh_keys: "{{ existing_ssh_keys_result.stdout | from_json }}" + when: existing_ssh_keys_result.stdout != "" + +- name: Set empty SSH keys list if no output + ansible.builtin.set_fact: + existing_ssh_keys: [] + when: existing_ssh_keys_result.stdout == "" + +- name: Find SSH key to delete by name + ansible.builtin.set_fact: + ssh_key_to_delete: "{{ existing_ssh_keys | selectattr('label', 'equalto', ssh_key_name) | first | default({}) }}" + +- name: Display SSH key to be deleted + ansible.builtin.debug: + msg: + - "Found SSH key to delete" + - "ID: {{ ssh_key_to_delete.ID }}" + - "Label: {{ ssh_key_to_delete.label }}" + - "Fingerprint: {{ ssh_key_to_delete.fingerprint | default('N/A') }}" + when: ssh_key_to_delete != {} + +- name: Delete SSH key from IBM Cloud Classic + ansible.builtin.command: ibmcloud sl security sshkey-remove {{ ssh_key_to_delete.ID }} --force + register: ssh_key_delete_result + failed_when: false + when: ssh_key_to_delete != {} + +- name: Fail if unable to delete SSH key + ansible.builtin.fail: + msg: | + Unable to delete SSH key using 'sshkey-remove' command. + when: ssh_key_to_delete != {} and ssh_key_delete_result.rc != 0 + +- name: Display deletion result + ansible.builtin.debug: + msg: "SSH key '{{ ssh_key_name }}' (ID: {{ ssh_key_to_delete.ID }}) deleted successfully" + when: ssh_key_to_delete != {} and ssh_key_delete_result.rc == 0 + +- name: Warn if SSH key not found + ansible.builtin.debug: + msg: + - "Warning: SSH key '{{ ssh_key_name }}' not found in IBM Cloud Classic." + - "It may have already been deleted or never existed." + - "Searched by name: {{ ssh_key_name }}" + when: ssh_key_to_delete == {} \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/tasks/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/tasks/main.yml new file mode 100644 index 00000000000..d8242afedcf --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-ssh-key/tasks/main.yml @@ -0,0 +1,30 @@ +--- +# IBM Cloud Classic SSH Key Infrastructure Role - Main Tasks + +- name: Validate required variables for provision + assert: + that: + - guid is defined + - env_authorized_key_path is defined + fail_msg: "Required variables are missing: guid, env_authorized_key_path" + when: ACTION == 'provision' + +- name: Validate required variables for destroy + assert: + that: + - guid is defined + fail_msg: "Required variable is missing: guid" + when: ACTION == 'destroy' + +- name: Set SSH key name based on guid + set_fact: + ssh_key_name: "{{ guid }}-ssh-key" + when: ssh_key_name is not defined or ssh_key_name == '' + +- name: Include create tasks + include_tasks: create.yml + when: ACTION == 'provision' + +- name: Include destroy tasks + include_tasks: destroy.yml + when: ACTION == 'destroy' \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-vm-inventory/README.md b/ansible/roles-infra/infra-ibm-cloud-classic-vm-inventory/README.md new file mode 100644 index 00000000000..54859459358 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-vm-inventory/README.md @@ -0,0 +1,149 @@ +# IBM Cloud Classic VM Inventory Role + +This role extracts VM information from Terraform outputs and manages in-memory inventory for IBM Cloud Classic VMs. It processes Terraform state, creates VM metadata, and adds VMs to Ansible's in-memory inventory without creating physical inventory files. + +## Purpose + +- **Terraform integration**: Extracts VM information from Terraform outputs after deployment +- **Separated from deployment**: Extracted from `infra-ibm-cloud-classic-deploy-vms` for cleaner separation of concerns +- **In-memory only**: Adds hosts to Ansible's in-memory inventory instead of creating files +- **Group management**: Automatically assigns VMs to inventory groups based on tags and metadata +- **SSH testing**: Verifies SSH connectivity to deployed VMs + +## Features + +### Automatic Group Assignment + +VMs are automatically added to multiple inventory groups: +- **Main group**: `all` (configurable via `inventory_group`) +- **OS-based**: `tag_rhel_9`, `tag_ubuntu_22`, etc. +- **Datacenter-based**: `datacenter_dal13`, `datacenter_wdc07`, etc. +- **Custom groups**: Via `ansible_group:groupname` tags + +### VM Metadata + +Each host gets comprehensive VM metadata as host variables: +- `vm_id`, `vm_hostname`, `vm_fqdn` +- `vm_public_ip`, `vm_private_ip` +- `vm_cores`, `vm_memory`, `vm_datacenter` +- `vm_operating_system`, `vm_status` +- `vm_private_security_group_id`, `vm_public_security_group_id` +- `vm_tags`, `vm_creation_date` +- `vm_private_vlan_id`, `vm_public_vlan_id` + +### SSH Configuration + +Automatically configures SSH access: +- Sets `ansible_host` to public IP +- Sets `ansible_user` and `ansible_ssh_private_key_file` +- Tests SSH connectivity and reports results + +## Usage + +### Basic Usage + +```yaml +- name: Create VM inventory + include_role: + name: "infra-ibm-cloud-classic-vm-inventory" +``` + +### With Custom SSH Configuration + +```yaml +- name: Create VM inventory + include_role: + name: "infra-ibm-cloud-classic-vm-inventory" + vars: + ssh_user: "cloud-user" + ssh_private_key_path: "/path/to/private/key" + inventory_group: "vms" +``` + +### Disable SSH Testing + +```yaml +- name: Create VM inventory + include_role: + name: "infra-ibm-cloud-classic-vm-inventory" + vars: + wait_for_ssh: false + test_ssh_connection: false +``` + +## Required Variables + +None - the role automatically extracts VM information from Terraform outputs in the configured working directory. + +## Optional Variables + +| Variable | Type | Default | Description | +|----------|------|---------|-------------| +| `terraform_working_dir` | String | `{{ output_dir }}/terraform-ibm-vm-{{ guid }}` | Terraform working directory | +| `save_vm_info` | Boolean | `true` | Save VM info to JSON file | +| `ssh_user` | String | `root` | SSH username for VMs | +| `ssh_private_key_path` | String | `{{ env_authorized_key_path }}` | Path to SSH private key | +| `inventory_group` | String | `all` | Main inventory group name | +| `wait_for_ssh` | Boolean | `true` | Wait for SSH to be available | +| `test_ssh_connection` | Boolean | `true` | Test SSH connections | +| `ssh_wait_delay` | Integer | `10` | Seconds to wait before SSH checks | +| `ssh_wait_timeout` | Integer | `300` | Total SSH availability timeout | +| `ssh_connect_timeout` | Integer | `5` | Individual connection timeout | +| `ssh_retry_interval` | Integer | `2` | Seconds between connection retries | + +## VM Info Format + +The role automatically creates the `vm_info` variable by extracting data from Terraform outputs: + +```yaml +vm_info: + - id: "12345678" + hostname: "web-01" + domain: "example.com" + fqdn: "web-01.example.com" + public_ip: "169.62.x.x" + private_ip: "10.x.x.x" + datacenter: "dal13" + cores: 2 + memory: 4096 + operating_system: "REDHAT_9_64" + network_speed: 1000 + status: "RUNNING" + ssh_connection: "ssh -i /path/to/key root@169.62.x.x" + private_security_group_id: ["sg-123"] + public_security_group_id: ["sg-456"] + tags: ["agnosticd", "ansible_group:webservers"] + hourly_billing: true + creation_date: "2024-01-01T00:00:00Z" + private_vlan_id: "vlan-123" + public_vlan_id: "vlan-456" +``` + +## Integration + +This role is automatically called by `infra-ibm-cloud-classic-deploy-vms` when `ACTION == 'provision'`. It can also be used independently for any IBM Cloud Classic VMs. + +## Example Output + +``` +TASK [infra-ibm-cloud-classic-vm-inventory : Display inventory information] +ok: [localhost] => { + "msg": [ + "In-memory inventory created successfully", + "Total VMs added: 3", + "Main inventory group: all", + "Custom ansible groups detected: 2", + " webservers: web-01, web-02", + " databases: db-01", + "Access all VMs via: ansible all -m ping", + "Access webservers group via: ansible webservers -m ping", + "Access databases group via: ansible databases -m ping" + ] +} +``` + +## Dependencies + +- IBM Cloud Classic VMs must be deployed via Terraform +- Terraform working directory must exist with valid outputs +- SSH keys must be properly configured for deployed VMs \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-vm-inventory/defaults/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-vm-inventory/defaults/main.yml new file mode 100644 index 00000000000..ad5581c79ba --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-vm-inventory/defaults/main.yml @@ -0,0 +1,23 @@ +--- +# IBM Cloud Classic VM Inventory Role - Default Variables + +# Terraform configuration +terraform_working_dir: "{{ output_dir }}/terraform-ibm-vm-{{ guid | default('default') }}" +save_vm_info: true + +# SSH configuration +ssh_user: "root" +ssh_private_key_path: "{{ env_authorized_key_path }}" + +# SSH wait timing configuration +ssh_wait_delay: 10 # Seconds to wait before starting SSH checks +ssh_wait_timeout: 300 # Total timeout for SSH to become available +ssh_connect_timeout: 5 # Timeout for individual connection attempts +ssh_retry_interval: 2 # Seconds to wait between connection retries + +# Inventory configuration +inventory_group: "all" + +# Connection testing +wait_for_ssh: true +test_ssh_connection: true \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-vm-inventory/tasks/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-vm-inventory/tasks/main.yml new file mode 100644 index 00000000000..c436bc8f286 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-vm-inventory/tasks/main.yml @@ -0,0 +1,187 @@ +--- +- name: Get Terraform outputs + ansible.builtin.command: + cmd: terraform output -json + chdir: "{{ terraform_working_dir }}" + register: terraform_outputs + changed_when: false + +- name: Parse Terraform outputs + ansible.builtin.set_fact: + vm_outputs: "{{ terraform_outputs.stdout | from_json }}" + +- name: Initialize VM info list + ansible.builtin.set_fact: + vm_info: [] + +- name: Get VM count from outputs + ansible.builtin.set_fact: + vm_count: "{{ vm_outputs.vm_count.value | default(0) | int }}" + when: vm_outputs.vm_count is defined + +- name: Set VM count to 0 if not defined + ansible.builtin.set_fact: + vm_count: 0 + when: vm_outputs.vm_count is not defined + +- name: Store VM information (multiple VMs) + ansible.builtin.set_fact: + vm_info: "{{ vm_info | default([]) + [vm_item] }}" + vars: + vm_item: + id: "{{ vm_outputs.vm_ids.value[item] }}" + hostname: "{{ vm_outputs.vm_hostnames.value[item] }}" + domain: "{{ vm_outputs.vm_domains.value[item] }}" + fqdn: "{{ vm_outputs.vm_fqdns.value[item] }}" + public_ip: "{{ vm_outputs.vm_public_ips.value[item] }}" + private_ip: "{{ vm_outputs.vm_private_ips.value[item] }}" + datacenter: "{{ vm_outputs.vm_datacenters.value[item] }}" + cores: "{{ vm_outputs.vm_cores.value[item] }}" + memory: "{{ vm_outputs.vm_memory.value[item] }}" + operating_system: "{{ vm_outputs.vm_operating_systems.value[item] }}" + network_speed: "{{ vm_outputs.vm_network_speeds.value[item] }}" + status: "{{ vm_outputs.vm_statuses.value[item] }}" + ssh_connection: "{{ vm_outputs.ssh_connection_commands.value[item] }}" + private_security_group_id: "{{ vm_outputs.private_security_group_ids.value[item] }}" + public_security_group_id: "{{ vm_outputs.public_security_group_ids.value[item] }}" + tags: "{{ vm_outputs.vm_tags.value[item] }}" + hourly_billing: "{{ vm_outputs.vm_hourly_billings.value[item] }}" + creation_date: "{{ vm_outputs.vm_creation_dates.value[item] }}" + private_vlan_id: "{{ vm_outputs.private_vlan_ids.value[item] }}" + public_vlan_id: "{{ vm_outputs.public_vlan_ids.value[item] }}" + loop: "{{ range(vm_count | int) | list }}" + when: vm_count | int > 0 + +- name: Display VM deployment results + ansible.builtin.debug: + msg: + - "VM deployment completed successfully" + - "Total VMs deployed: {{ vm_info | length }}" + - "{% if vm_info | length > 0 %}VMs:{% endif %}" + - "{% for vm in vm_info %} - {{ vm.hostname }}.{{ vm.domain }} ({{ vm.fqdn }}) - {{ vm.public_ip }} - {{ vm.status }}{% endfor %}" + - "{% if vm_info | length == 0 %}No VMs were created - check your instances configuration{% endif %}" + +- name: Display individual VM details + ansible.builtin.debug: + msg: + - "VM {{ item.hostname }}.{{ item.domain }} Details:" + - " ID: {{ item.id }}" + - " FQDN: {{ item.fqdn }}" + - " Public IP: {{ item.public_ip }}" + - " Private IP: {{ item.private_ip }}" + - " SSH Command: {{ item.ssh_connection }}" + - " Status: {{ item.status }}" + - " Datacenter: {{ item.datacenter }}" + - " Specs: {{ item.cores }} cores, {{ item.memory }}MB RAM" + loop: "{{ vm_info }}" + loop_control: + label: "{{ item.hostname }}" + when: vm_info | length > 0 + +- name: Save VM information to file + ansible.builtin.copy: + content: "{{ vm_info | to_nice_json }}" + dest: "{{ terraform_working_dir }}/vm_info.json" + mode: '0644' + when: save_vm_info | default(true) + +- name: Add VMs to in-memory inventory + ansible.builtin.add_host: + name: "{{ item.fqdn }}" + groups: "{{ vm_groups }}" + ansible_host: "{{ item.public_ip }}" + ansible_user: "{{ ssh_user | default('root') }}" + ansible_ssh_private_key_file: "{{ ssh_private_key_path }}" + vars: + vm_groups: "{{ base_groups + ansible_groups }}" + base_groups: + - "{{ inventory_group | default('all') }}" + - "tag_{{ item.operating_system | lower | regex_replace('[^a-zA-Z0-9_]', '_') }}" + - "datacenter_{{ item.datacenter }}" + ansible_groups: "{{ item.tags | select('match', '^ansible_group:.*') | map('regex_replace', '^ansible_group:', '') | map('regex_replace', '[^a-zA-Z0-9_]', '_') | list }}" + # VM metadata + vm_id: "{{ item.id }}" + vm_hostname: "{{ item.hostname }}" + vm_domain: "{{ item.domain }}" + vm_fqdn: "{{ item.fqdn }}" + vm_public_ip: "{{ item.public_ip }}" + vm_private_ip: "{{ item.private_ip }}" + vm_datacenter: "{{ item.datacenter }}" + vm_cores: "{{ item.cores }}" + vm_memory: "{{ item.memory }}" + vm_operating_system: "{{ item.operating_system }}" + vm_network_speed: "{{ item.network_speed }}" + vm_status: "{{ item.status }}" + vm_private_security_group_id: "{{ item.private_security_group_id | default([]) }}" + vm_public_security_group_id: "{{ item.public_security_group_id | default([]) }}" + vm_tags: "{{ item.tags }}" + vm_hourly_billing: "{{ item.hourly_billing }}" + vm_creation_date: "{{ item.creation_date }}" + vm_private_vlan_id: "{{ item.private_vlan_id }}" + vm_public_vlan_id: "{{ item.public_vlan_id }}" + loop: "{{ vm_info }}" + loop_control: + label: "{{ item.hostname }}" + +- name: Wait for SSH to be available on all VMs + ansible.builtin.wait_for: + host: "{{ item.public_ip }}" + port: 22 + delay: "{{ ssh_wait_delay | default(10) }}" + timeout: "{{ ssh_wait_timeout | default(300) }}" + connect_timeout: "{{ ssh_connect_timeout | default(5) }}" + sleep: "{{ ssh_retry_interval | default(2) }}" + state: started + loop: "{{ vm_info }}" + loop_control: + label: "{{ item.hostname }}" + when: wait_for_ssh | default(true) + +- name: Test SSH connection to all VMs + ansible.builtin.command: + cmd: "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i {{ ssh_private_key_path }} {{ ssh_user | default('root') }}@{{ item.public_ip }} 'echo SSH connection successful'" + register: ssh_tests + failed_when: false + changed_when: false + loop: "{{ vm_info }}" + loop_control: + label: "{{ item.hostname }}" + when: test_ssh_connection | default(true) + +- name: Display SSH test results + ansible.builtin.debug: + msg: + - "SSH Test for {{ item.item.hostname }}: {{ 'PASSED' if item.rc == 0 else 'FAILED' }}" + - "{{ item.stdout if item.rc == 0 else item.stderr }}" + loop: "{{ ssh_tests.results }}" + loop_control: + label: "{{ item.item.hostname }}" + when: test_ssh_connection | default(true) and ssh_tests is defined + +- name: Initialize display ansible groups + ansible.builtin.set_fact: + display_ansible_groups: {} + +- name: Build ansible groups for display + ansible.builtin.set_fact: + display_ansible_groups: "{{ display_ansible_groups | default({}) | combine({group_name: (display_ansible_groups[group_name] | default([])) + [item]}) }}" + vars: + group_name: "{{ ansible_group_tag.split(':', 1)[1] | regex_replace('[^a-zA-Z0-9_]', '_') }}" + ansible_group_tag: "{{ item.tags | select('match', '^ansible_group:.*') | first }}" + when: item.tags | select('match', '^ansible_group:.*') | list | length > 0 + loop: "{{ vm_info }}" + loop_control: + label: "{{ item.hostname }}" + +- name: Display inventory information + ansible.builtin.debug: + msg: + - "In-memory inventory created successfully" + - "Total VMs added: {{ vm_info | length }}" + - "Main inventory group: {{ inventory_group | default('all') }}" + - "Custom ansible groups detected: {{ display_ansible_groups.keys() | list | length | default(0) }}" + - "{% if display_ansible_groups is defined %}{% for group_name, group_vms in display_ansible_groups.items() %} {{ group_name }}: {{ group_vms | map(attribute='hostname') | join(', ') }}{% endfor %}{% endif %}" + - "Access all VMs via: ansible {{ inventory_group | default('all') }} -m ping" + - "{% if display_ansible_groups is defined %}{% for group_name in display_ansible_groups.keys() %}Access {{ group_name }} group via: ansible {{ group_name }} -m ping{% endfor %}{% endif %}" + - "Individual VM access:" + - "{% for vm in vm_info %} {{ vm.hostname }}: ssh -i {{ ssh_private_key_path }} {{ ssh_user | default('root') }}@{{ vm.public_ip }}{% endfor %}" \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/README.md b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/README.md new file mode 100644 index 00000000000..732e070258f --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/README.md @@ -0,0 +1,204 @@ +# IBM Cloud Classic VM Lifecycle Management Role + +This AgnosticD infrastructure role manages the lifecycle of IBM Cloud Classic Virtual Machines (VMs) by reading VM information from Terraform state and using IBM Cloud CLI for power management operations. + +## Features + +- **VM Lifecycle Management**: Start, stop, and check status of IBM Cloud Classic VMs +- **Terraform State Integration**: Automatically discovers VMs from existing Terraform deployments +- **Action-based Operations**: Supports ACTION variable for different lifecycle operations +- **IBM Cloud CLI Integration**: Uses IBM Cloud CLI for direct VM power management +- **Intelligent Error Handling**: Detects "AlreadyHalted" conditions and treats them as success +- **Pre-operation Status Checking**: Checks VM state before operations to provide context +- **User Information**: Provides detailed status information via agnosticd_user_info + +## Prerequisites + +- IBM Cloud account with Classic Infrastructure access +- IBM Cloud API key with appropriate permissions +- IBM Cloud CLI installed and available in PATH +- Existing VM deployment with Terraform state in output directory +- VMs created with `infra-ibm-cloud-classic-manage-vms` role + +## Required Variables + +```yaml +ibm_cloud_api_key: "YOUR_IBM_CLOUD_API_KEY" +output_dir: "/path/to/output/directory" +ACTION: "start|stop|status" +``` + +## Usage + +### Start VMs +```yaml +- name: Start IBM Cloud Classic VMs + include_role: + name: infra-ibm-cloud-classic-vm-lifecycle + vars: + ibm_cloud_api_key: "{{ ibm_cloud_api_key }}" + output_dir: "{{ output_dir }}" + ACTION: "start" +``` + +### Stop VMs +```yaml +- name: Stop IBM Cloud Classic VMs + include_role: + name: infra-ibm-cloud-classic-vm-lifecycle + vars: + ibm_cloud_api_key: "{{ ibm_cloud_api_key }}" + output_dir: "{{ output_dir }}" + ACTION: "stop" +``` + +### Check VM Status +```yaml +- name: Check IBM Cloud Classic VM Status + include_role: + name: infra-ibm-cloud-classic-vm-lifecycle + vars: + ibm_cloud_api_key: "{{ ibm_cloud_api_key }}" + output_dir: "{{ output_dir }}" + ACTION: "status" +``` + +## Command Line Usage + +```bash +# Start VMs +ansible-playbook lifecycle.yml -e ACTION=start -e ibm_cloud_api_key=YOUR_API_KEY -e output_dir=/path/to/output + +# Stop VMs +ansible-playbook lifecycle.yml -e ACTION=stop -e ibm_cloud_api_key=YOUR_API_KEY -e output_dir=/path/to/output + +# Check status +ansible-playbook lifecycle.yml -e ACTION=status -e ibm_cloud_api_key=YOUR_API_KEY -e output_dir=/path/to/output +``` + +## Optional Variables + +```yaml +# Verbosity level (default: 0) +verbosity: 1 + +# Default action if not specified +ACTION: "status" +``` + +## How It Works + +1. **Initialize**: Validates output directory and Terraform working directory +2. **Read State**: Extracts VM information from Terraform outputs +3. **Action**: Performs the specified action using IBM Cloud CLI: + - **Start**: Checks current state, then uses `ibmcloud sl vs power-on` for each VM + - Handles "Already running" errors as success conditions + - Provides detailed before/after state information + - **Stop**: Checks current state, then uses `ibmcloud sl vs power-off` for each VM + - Handles "AlreadyHalted" errors as success conditions + - Provides detailed before/after state information + - **Status**: Uses `ibmcloud sl vs detail` to get current state +4. **Report**: Provides detailed status information and completion messages + +## Terraform Integration + +The role reads VM information from: +- Terraform working directory: `{{ output_dir }}/terraform-ibm-vm-{{ guid }}` +- Terraform state file in the working directory +- Terraform outputs containing VM details: + - `vm_ids`: VM instance IDs for CLI operations + - `vm_hostnames`: VM hostnames for reporting + - `vm_public_ips` and `vm_private_ips`: IP addresses + - `vm_datacenters`: Datacenter locations + - `deployment_summary`: Overall deployment information + +## Dependencies + +This role has no external role dependencies and works directly with: +- Terraform state files +- IBM Cloud CLI +- IBM Cloud Classic Infrastructure API + +## Output + +### Status Action +The status action provides detailed information about: +- VM instance IDs and hostnames +- IP addresses (public and private) +- Datacenter locations +- Current power states +- Deployment summary + +### Start/Stop Actions +The start and stop actions provide: +- Per-VM operation results with initial state information +- Success/failure status for each VM (including "AlreadyHalted" and "Already running" as success) +- Detailed error analysis and recommended actions +- Total operation summary with final state confirmation + +## Error Handling + +The role includes validation for: +- Required API key presence +- Output directory existence +- Terraform working directory presence +- Terraform state file existence +- VM existence in Terraform state + +## Security + +- API keys are handled securely through environment variables +- No sensitive information is logged in debug output +- Uses IBM Cloud CLI authentication mechanisms + +## Troubleshooting + +### Common Issues + +1. **No VMs Found** + - Ensure `output_dir` points to a directory with Terraform state + - Verify VMs were deployed using the `infra-ibm-cloud-classic-manage-vms` role + - Check that Terraform deployment completed successfully + +2. **API Authentication Errors** + - Verify `ibm_cloud_api_key` has appropriate permissions + - Check API key is not expired + - Ensure IBM Cloud CLI is properly installed + +3. **Terraform State Not Found** + - Confirm the Terraform working directory `{{ output_dir }}/terraform-ibm-vm-{{ guid }}` exists + - Verify `terraform.tfstate` file exists in the Terraform working directory + - Check that the deployment was not cleaned up + +4. **VM Operation Failures** + - Verify IBM Cloud CLI is installed and accessible + - Check that VMs still exist in IBM Cloud + - Confirm API key has VM management permissions + +5. **"AlreadyHalted" Errors on Stop Operations** + - These are **expected and indicate success** - the VM was already stopped + - The role automatically detects this condition and treats it as successful + - Example error: `SoftLayer_Exception_Virtual_Guest_AlreadyHalted: Failed to halt guest as it has already been halted` + - **No action required** - this means the target state was already achieved + +6. **"Already Running" Errors on Start Operations** + - These are **expected and indicate success** - the VM was already running + - The role automatically detects this condition and treats it as successful + - Example error: `SoftLayer_Exception_Virtual_Guest_AlreadyRunning: Failed to start guest as it is already running` + - **No action required** - this means the target state was already achieved + +## Integration + +This role is designed to work with: +- AgnosticD lifecycle management system +- IBM Cloud Classic infrastructure +- Terraform-based VM deployments +- Existing output directory structure + +## Version Compatibility + +- Ansible 2.9+ +- IBM Cloud Classic Infrastructure +- IBM Cloud CLI +- AgnosticD framework +- Terraform (any version that creates compatible state files) \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/defaults/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/defaults/main.yml new file mode 100644 index 00000000000..01398f70f8f --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/defaults/main.yml @@ -0,0 +1,26 @@ +--- +# Default variables for IBM Cloud Classic VM lifecycle management + +# IBM Cloud API key for authentication (required) +# ibm_cloud_api_key: "YOUR_IBM_CLOUD_API_KEY" + +# Output directory containing Terraform state and outputs (required) +# output_dir: "/path/to/output/directory" + +# Default action if not specified +ACTION: "status" + +# Default verbosity level +verbosity: 0 + +# Terraform working directory (consistent with deployment pattern) +terraform_working_dir: "{{ output_dir }}/terraform-ibm-vm-{{ guid | default('default') }}" + +# VM information (extracted from Terraform outputs) +vm_count: 0 +vm_ids: [] +vm_hostnames: [] +vm_public_ips: [] +vm_private_ips: [] +vm_datacenters: [] +deployment_summary: {} \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/meta/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/meta/main.yml new file mode 100644 index 00000000000..55de5328dba --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/meta/main.yml @@ -0,0 +1,22 @@ +galaxy_info: + author: AgnosticD + description: Manage IBM Cloud Classic VM lifecycle (start, stop, status) using Terraform state + company: Red Hat + license: MIT + min_ansible_version: 2.9 + platforms: + - name: EL + versions: + - 8 + - 9 + - name: Fedora + versions: + - all + galaxy_tags: + - cloud + - ibm + - lifecycle + - virtualization + - terraform + +dependencies: [] \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/initialize.yml b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/initialize.yml new file mode 100644 index 00000000000..9c261941c5e --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/initialize.yml @@ -0,0 +1,74 @@ +--- +- name: Initialize IBM Cloud Classic VM lifecycle management + ansible.builtin.debug: + msg: "Initializing IBM Cloud Classic VM lifecycle management for ACTION: {{ ACTION }}" + +- name: Validate IBM Cloud API key is provided + ansible.builtin.fail: + msg: "ibm_cloud_api_key is required. Please provide it as an extra var: -e ibm_cloud_api_key=YOUR_API_KEY" + when: ibm_cloud_api_key is not defined or ibm_cloud_api_key == "" + +- name: Check if output directory exists + ansible.builtin.stat: + path: "{{ output_dir }}" + register: output_dir_stat + when: output_dir is defined + +- name: Validate output directory is provided and exists + ansible.builtin.fail: + msg: "output_dir is required and must exist. Current value: {{ output_dir | default('not defined') }}" + when: output_dir is not defined or not output_dir_stat.stat.exists + +- name: Validate Terraform directory exists + ansible.builtin.stat: + path: "{{ terraform_working_dir }}" + register: terraform_dir_stat + +- name: Fail if Terraform directory does not exist + ansible.builtin.fail: + msg: "Terraform working directory {{ terraform_working_dir }} does not exist. VM deployment may not have completed successfully." + when: not terraform_dir_stat.stat.exists + +- name: Check if Terraform state exists + ansible.builtin.stat: + path: "{{ terraform_working_dir }}/terraform.tfstate" + register: terraform_state_stat + +- name: Validate Terraform state exists + ansible.builtin.fail: + msg: "No Terraform state found in {{ terraform_working_dir }}. VM deployment may not have completed successfully." + when: not terraform_state_stat.stat.exists + +- name: Get Terraform outputs + ansible.builtin.command: + cmd: terraform output -json + chdir: "{{ terraform_working_dir }}" + register: terraform_outputs_raw + changed_when: false + +- name: Parse Terraform outputs + ansible.builtin.set_fact: + terraform_outputs: "{{ terraform_outputs_raw.stdout | from_json }}" + +- name: Extract VM information from Terraform outputs + ansible.builtin.set_fact: + vm_count: "{{ terraform_outputs.vm_count.value | default(0) }}" + vm_ids: "{{ terraform_outputs.vm_ids.value | default([]) }}" + vm_hostnames: "{{ terraform_outputs.vm_hostnames.value | default([]) }}" + vm_public_ips: "{{ terraform_outputs.vm_public_ips.value | default([]) }}" + vm_private_ips: "{{ terraform_outputs.vm_private_ips.value | default([]) }}" + vm_datacenters: "{{ terraform_outputs.vm_datacenters.value | default([]) }}" + deployment_summary: "{{ terraform_outputs.deployment_summary.value | default({}) }}" + +- name: Display VM information + ansible.builtin.debug: + msg: + - "Found {{ vm_count }} VMs in Terraform state" + - "VM hostnames: {{ vm_hostnames }}" + - "VM datacenters: {{ vm_datacenters | unique }}" + verbosity: 1 + +- name: Validate VMs exist for lifecycle operations + ansible.builtin.fail: + msg: "No VMs found in Terraform state. Cannot perform lifecycle operations." + when: vm_count | int == 0 \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/main.yml b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/main.yml new file mode 100644 index 00000000000..e9c264801cd --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/main.yml @@ -0,0 +1,14 @@ +--- +- import_tasks: initialize.yml + +- when: + - ACTION == 'stop' + import_tasks: stop.yml + +- when: + - ACTION == 'start' + import_tasks: start.yml + +- when: + - ACTION == 'status' + import_tasks: status.yml \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/start.yml b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/start.yml new file mode 100644 index 00000000000..25806172f95 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/start.yml @@ -0,0 +1,91 @@ +--- +- name: Check VM status before starting + ansible.builtin.command: + cmd: ibmcloud sl vs detail {{ item }} --output json + loop: "{{ vm_ids }}" + register: vm_status_check + changed_when: false + failed_when: false + environment: + IBMCLOUD_API_KEY: "{{ ibm_cloud_api_key }}" + +- name: Parse VM status information + ansible.builtin.set_fact: + vm_status_info: "{{ vm_status_check.results | map(attribute='stdout') | map('from_json') | list }}" + when: vm_status_check.results is defined + +- name: Display VM status before start operation + ansible.builtin.debug: + msg: + - "VM {{ vm_hostnames[ansible_loop.index0] }} ({{ item }}) current status:" + - " Power State: {{ vm_status_info[ansible_loop.index0].powerState.name if vm_status_info[ansible_loop.index0].powerState is defined else 'Unknown' }}" + - " Action: {{ 'Skip (already running)' if (vm_status_info[ansible_loop.index0].powerState.name | default('') | upper) in ['RUNNING', 'ACTIVE'] else 'Will attempt to start' }}" + loop: "{{ vm_ids }}" + loop_control: + extended: true + when: vm_status_info is defined + +- name: Start IBM Cloud Classic VMs + ansible.builtin.command: + cmd: ibmcloud sl vs power-on {{ item }} --force + loop: "{{ vm_ids }}" + register: vm_start_results + changed_when: vm_start_results.rc == 0 + failed_when: false + environment: + IBMCLOUD_API_KEY: "{{ ibm_cloud_api_key }}" + loop_control: + extended: true + +- name: Display detailed start results for each VM + ansible.builtin.debug: + msg: + - "VM {{ vm_hostnames[ansible_loop.index0] }} ({{ item.item }}) start result:" + - " Initial State: {{ vm_status_info[ansible_loop.index0].powerState.name if vm_status_info is defined and vm_status_info[ansible_loop.index0].powerState is defined else 'Unknown' }}" + - " Action Taken: Start command attempted" + - " Result: {{ 'Success' if item.rc == 0 else ('Already running' if ('AlreadyRunning' in (item.stderr | default('')) or 'already running' in (item.stderr | default('')) or 'Already running' in (item.stderr | default(''))) else 'Failed: ' + (item.stderr | default('Unknown error'))) }}" + - " Return Code: {{ item.rc }}" + loop: "{{ vm_start_results.results }}" + loop_control: + extended: true + when: vm_start_results.results is defined + +- name: Display start action completion + agnosticd_user_info: + msg: | + IBM Cloud Classic VMs Start Operation Summary: + + Total VMs: {{ vm_count }} + {% if vm_start_results.results is defined %} + {% for result in vm_start_results.results %} + {% set vm_name = vm_hostnames[loop.index0] %} + {% set vm_id = result.item %} + {% set initial_state = vm_status_info[loop.index0].powerState.name if vm_status_info is defined and vm_status_info[loop.index0].powerState is defined else 'Unknown' %} + + VM {{ vm_name }} ({{ vm_id }}): + {% if result.rc == 0 %} + - Initial State: {{ initial_state }} + - Action: Start command sent successfully + - Result: ✅ VM started + {% elif 'AlreadyRunning' in (result.stderr | default('')) or 'already running' in (result.stderr | default('')) or 'Already running' in (result.stderr | default('')) %} + - Initial State: {{ initial_state }} + - Action: Start command attempted + - Result: ✅ VM already running (target state achieved) + {% else %} + - Initial State: {{ initial_state }} + - Action: Start command failed + - Result: ❌ Failed: {{ result.stderr | default('Unknown error') }} + {% endif %} + {% endfor %} + {% endif %} + + ✅ Operation completed. All VMs are now in running state. + + Note: "Already running" errors are expected and indicate success - the VM + was already in the desired running state. + when: vm_count | int > 0 + +- name: Display no VMs message + ansible.builtin.debug: + msg: "No IBM Cloud Classic VMs found to start" + when: vm_count | int == 0 \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/status.yml b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/status.yml new file mode 100644 index 00000000000..46c5c28506c --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/status.yml @@ -0,0 +1,57 @@ +--- +- name: Get IBM Cloud Classic VM status + ansible.builtin.command: + cmd: ibmcloud sl vs detail {{ item }} --output json + loop: "{{ vm_ids }}" + register: vm_status_results + changed_when: false + failed_when: false + environment: + IBMCLOUD_API_KEY: "{{ ibm_cloud_api_key }}" + +- name: Parse VM status information + ansible.builtin.set_fact: + vm_status_info: "{{ vm_status_info | default([]) + [item.stdout | from_json] }}" + loop: "{{ vm_status_results.results }}" + when: item.rc == 0 + +- name: Report VM status information + agnosticd_user_info: + msg: | + IBM Cloud Classic VM Status: + + Total VMs: {{ vm_count }} + Datacenters: {{ vm_datacenters | unique | join(', ') }} + + {% for i in range(vm_count | int) %} + VM {{ loop.index }}: + - Hostname: {{ vm_hostnames[i] }} + - ID: {{ vm_ids[i] }} + - Public IP: {{ vm_public_ips[i] | default('N/A') }} + - Private IP: {{ vm_private_ips[i] | default('N/A') }} + - Datacenter: {{ vm_datacenters[i] }} + - Status: {{ vm_status_info[i].powerState.name | default('Unknown') if vm_status_info is defined and vm_status_info[i] is defined and vm_status_info[i].powerState is defined else 'Unknown' }} + {% endfor %} + +- name: Display detailed VM status + ansible.builtin.debug: + msg: + - "IBM Cloud Classic VM Status Check:" + - "Total VMs found: {{ vm_count }}" + - "VM hostnames: {{ vm_hostnames }}" + - "VM datacenters: {{ vm_datacenters | unique }}" + - "Deployment summary: {{ deployment_summary }}" + when: vm_count | int > 0 + +- name: Display VM status details + ansible.builtin.debug: + msg: "VM {{ vm_hostnames[ansible_loop.index0] }} ({{ item }}): {{ vm_status_info[ansible_loop.index0].powerState.name | default('Unknown') if vm_status_info is defined and vm_status_info[ansible_loop.index0] is defined and vm_status_info[ansible_loop.index0].powerState is defined else 'Status check failed' }}" + loop: "{{ vm_ids }}" + loop_control: + extended: true + when: vm_count | int > 0 + +- name: Display no VMs message + ansible.builtin.debug: + msg: "No IBM Cloud Classic VMs found to check status" + when: vm_count | int == 0 \ No newline at end of file diff --git a/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/stop.yml b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/stop.yml new file mode 100644 index 00000000000..2edbcbc9036 --- /dev/null +++ b/ansible/roles-infra/infra-ibm-cloud-classic-vm-lifecycle/tasks/stop.yml @@ -0,0 +1,91 @@ +--- +- name: Check VM status before stopping + ansible.builtin.command: + cmd: ibmcloud sl vs detail {{ item }} --output json + loop: "{{ vm_ids }}" + register: vm_status_check + changed_when: false + failed_when: false + environment: + IBMCLOUD_API_KEY: "{{ ibm_cloud_api_key }}" + +- name: Parse VM status information + ansible.builtin.set_fact: + vm_status_info: "{{ vm_status_check.results | map(attribute='stdout') | map('from_json') | list }}" + when: vm_status_check.results is defined + +- name: Display VM status before stop operation + ansible.builtin.debug: + msg: + - "VM {{ vm_hostnames[ansible_loop.index0] }} ({{ item }}) current status:" + - " Power State: {{ vm_status_info[ansible_loop.index0].powerState.name if vm_status_info[ansible_loop.index0].powerState is defined else 'Unknown' }}" + - " Action: {{ 'Skip (already stopped)' if (vm_status_info[ansible_loop.index0].powerState.name | default('') | upper) in ['HALTED', 'STOPPED'] else 'Will attempt to stop' }}" + loop: "{{ vm_ids }}" + loop_control: + extended: true + when: vm_status_info is defined + +- name: Stop IBM Cloud Classic VMs (all VMs) + ansible.builtin.command: + cmd: ibmcloud sl vs power-off {{ item }} --force + loop: "{{ vm_ids }}" + register: vm_stop_results + changed_when: vm_stop_results.rc == 0 + failed_when: false + environment: + IBMCLOUD_API_KEY: "{{ ibm_cloud_api_key }}" + loop_control: + extended: true + +- name: Display detailed stop results for each VM + ansible.builtin.debug: + msg: + - "VM {{ vm_hostnames[ansible_loop.index0] }} ({{ item.item }}) stop result:" + - " Initial State: {{ vm_status_info[ansible_loop.index0].powerState.name if vm_status_info is defined and vm_status_info[ansible_loop.index0].powerState is defined else 'Unknown' }}" + - " Action Taken: Stop command attempted" + - " Result: {{ 'Success' if item.rc == 0 else ('Already stopped' if 'AlreadyHalted' in (item.stderr | default('')) else 'Failed: ' + (item.stderr | default('Unknown error'))) }}" + - " Return Code: {{ item.rc }}" + loop: "{{ vm_stop_results.results }}" + loop_control: + extended: true + when: vm_stop_results.results is defined + +- name: Display stop action completion + agnosticd_user_info: + msg: | + IBM Cloud Classic VMs Stop Operation Summary: + + Total VMs: {{ vm_count }} + {% if vm_stop_results.results is defined %} + {% for result in vm_stop_results.results %} + {% set vm_name = vm_hostnames[loop.index0] %} + {% set vm_id = result.item %} + {% set initial_state = vm_status_info[loop.index0].powerState.name if vm_status_info is defined and vm_status_info[loop.index0].powerState is defined else 'Unknown' %} + + VM {{ vm_name }} ({{ vm_id }}): + {% if result.rc == 0 %} + - Initial State: {{ initial_state }} + - Action: Stop command sent successfully + - Result: ✅ VM stopped + {% elif 'AlreadyHalted' in (result.stderr | default('')) %} + - Initial State: {{ initial_state }} + - Action: Stop command attempted + - Result: ✅ VM already stopped (target state achieved) + {% else %} + - Initial State: {{ initial_state }} + - Action: Stop command failed + - Result: ❌ Failed: {{ result.stderr | default('Unknown error') }} + {% endif %} + {% endfor %} + {% endif %} + + ✅ Operation completed. All VMs are now in stopped state. + + Note: "AlreadyHalted" errors are expected and indicate success - the VM + was already in the desired stopped state. + when: vm_count | int > 0 + +- name: Display no VMs message + ansible.builtin.debug: + msg: "No IBM Cloud Classic VMs found to stop" + when: vm_count | int == 0 \ No newline at end of file diff --git a/ansible/roles-infra/infra-redfish-power-management/README.md b/ansible/roles-infra/infra-redfish-power-management/README.md new file mode 100644 index 00000000000..1e575d6ade6 --- /dev/null +++ b/ansible/roles-infra/infra-redfish-power-management/README.md @@ -0,0 +1,237 @@ +# Redfish Power Management Role (infra-redfish-power-management) + +This role provides power management capabilities for servers using the Redfish API standard. It supports power operations, PXE boot configuration, and server monitoring through BMC connections. + +## Features + +- **Power Management**: Start, stop, reset, and force power off servers +- **Power Status Monitoring**: Check current power state and monitor state changes +- **PXE Boot Support**: Configure PXE boot settings and initiate network boot +- **Boot Order Management**: Check and optimize boot order to prioritize hard disk first +- **Redfish API Integration**: Uses standard Redfish API for broad server compatibility +- **Connection Validation**: Tests BMC connectivity before operations +- **Graceful Handling**: Intelligent handling of already-powered servers + +## Supported Power Actions + +- `status`: Check current power state +- `on`: Power on the server +- `off`: Gracefully power off the server +- `force_off`: Force immediate power off +- `reset`: Reset/restart the server + +## Required Variables + +- `bmc_hostname`: BMC IP address or hostname +- `bmc_username`: BMC username for authentication +- `bmc_password`: BMC password for authentication +- `power_action`: Power action to perform (status, on, off, force_off, reset) + +## Optional Variables + +- `enable_pxe_boot_and_reset`: Enable PXE boot and reset server (default: false) +- `check_boot_order`: Check and fix boot order to prioritize hard disk first (default: true) +- `set_boot_order_on_provision`: Set boot order during provision operations (default: true) +- `validate_certs`: Validate SSL certificates (default: false) +- `force_basic_auth`: Use basic authentication (default: true) +- `connection_timeout`: Connection timeout in seconds (default: 30) +- `power_monitor_retries`: Number of retries for power monitoring (default: 30) +- `power_monitor_delay`: Delay between power state checks in seconds (default: 5) + +## Usage + +### Basic Power Operations + +```yaml +- include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "password123" + power_action: "on" +``` + +### Check Power Status + +```yaml +- include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "password123" + power_action: "status" +``` + +### PXE Boot Configuration + +```yaml +- include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "password123" + enable_pxe_boot_and_reset: true +``` + +### Boot Order Management + +```yaml +- include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "password123" + power_action: "status" + check_boot_order: true +``` + +### Skip Boot Order Check + +```yaml +- include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "password123" + power_action: "on" + check_boot_order: false +``` + +### Force Power Off + +```yaml +- include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "password123" + power_action: "force_off" +``` + +## Task Flow + +1. **Validation**: Validates required parameters and power action +2. **Connectivity**: Tests BMC connectivity using Redfish API +3. **Boot Order**: Checks and optimizes boot order to prioritize hard disk first +4. **Power Status**: Retrieves current power state +5. **Graceful Exits**: Checks for conditions requiring early exit +6. **Power Actions**: Executes requested power operations +7. **PXE Boot**: Configures PXE boot settings if enabled +8. **Monitoring**: Monitors power state changes +9. **Final Status**: Reports final power state + +## Integration with AgnosticD + +This role integrates with AgnosticD infrastructure workflows: + +```yaml +# Used by infra-ibm-cloud-classic-bm-resources for lifecycle management +- include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "{{ bmc_hostname }}" + bmc_username: "{{ bmc_username }}" + bmc_password: "{{ bmc_password }}" + power_action: "{{ power_action }}" +``` + +## Return Values + +The role sets the following facts: + +- `power_status`: Current power state information +- `power_action_result`: Result of the power action performed +- `connectivity_status`: BMC connectivity test results + +## Error Handling + +The role includes comprehensive error handling: + +- **Parameter Validation**: Checks for required BMC connection parameters +- **Power Action Validation**: Validates power action is supported +- **Connectivity Testing**: Verifies BMC is reachable before operations +- **Graceful Exits**: Handles already-powered servers appropriately +- **Timeout Handling**: Configurable timeouts for operations + +## Compatibility + +This role works with servers that support the Redfish API standard, including: + +- Lenovo ThinkSystem servers +- Dell PowerEdge servers +- HP/HPE ProLiant servers +- IBM System servers +- Other Redfish-compliant servers + +## Examples + +### Complete Power-On Sequence + +```yaml +- name: Power on server with monitoring + include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "password123" + power_action: "on" + power_monitor_retries: 60 + power_monitor_delay: 10 +``` + +### PXE Boot for OS Installation + +```yaml +- name: Configure PXE boot and reset + include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "password123" + enable_pxe_boot_and_reset: true + power_monitor_retries: 30 +``` + +### Boot Order Management for Production Systems + +```yaml +- name: Ensure hard disk boots first for production system + include_role: + name: infra-redfish-power-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "password123" + power_action: "status" + check_boot_order: true +``` + +## Security Considerations + +- Store BMC credentials securely using Ansible Vault +- Use SSL/TLS when possible (set `validate_certs: true`) +- Limit network access to BMC interfaces +- Use strong authentication credentials + +## Dependencies + +- Ansible `uri` module for HTTP/HTTPS requests +- Network connectivity to BMC interface +- Redfish API support on target servers + +## Troubleshooting + +Common issues and solutions: + +1. **Connection Timeout**: Increase `connection_timeout` value +2. **SSL Certificate Errors**: Set `validate_certs: false` for self-signed certificates +3. **Authentication Failures**: Verify BMC credentials and user permissions +4. **Power State Changes**: Adjust `power_monitor_retries` and `power_monitor_delay` \ No newline at end of file diff --git a/ansible/roles-infra/infra-redfish-power-management/defaults/main.yml b/ansible/roles-infra/infra-redfish-power-management/defaults/main.yml new file mode 100755 index 00000000000..70947d2dceb --- /dev/null +++ b/ansible/roles-infra/infra-redfish-power-management/defaults/main.yml @@ -0,0 +1,21 @@ +--- +# Redfish Power Management Role - Defaults + +## BMC Connection Variables (Required) +# These must be provided when calling the role +# bmc_hostname: "192.168.1.100" +# bmc_username: "admin" +# bmc_password: "password123" + +## Power Management Variables +power_action: "status" # status, on, off, force_off, reset +enable_pxe_boot_and_reset: false # Enable PXE boot with auto reset + +## Redfish API Settings +validate_certs: false # Skip certificate validation for self-signed certs +force_basic_auth: true # Use basic authentication +connection_timeout: 30 # Connection timeout in seconds + +## Power State Monitoring +power_monitor_retries: 30 # Number of retries for power state monitoring +power_monitor_delay: 5 # Delay between power state checks in seconds diff --git a/ansible/roles-infra/infra-redfish-power-management/meta/main.yml b/ansible/roles-infra/infra-redfish-power-management/meta/main.yml new file mode 100644 index 00000000000..282aa2c7a5b --- /dev/null +++ b/ansible/roles-infra/infra-redfish-power-management/meta/main.yml @@ -0,0 +1,25 @@ +--- +galaxy_info: + author: AgnosticD Team + description: Manages power operations on Redfish-compliant BMCs + company: Red Hat + license: GPL-3.0+ + min_ansible_version: 2.9 + platforms: + - name: EL + versions: + - 8 + - 9 + - name: Ubuntu + versions: + - 20.04 + - 22.04 + galaxy_tags: + - redfish + - bmc + - power + - management + - baremetal + - infrastructure + +dependencies: [] \ No newline at end of file diff --git a/ansible/roles-infra/infra-redfish-power-management/tasks/connectivity.yml b/ansible/roles-infra/infra-redfish-power-management/tasks/connectivity.yml new file mode 100755 index 00000000000..3a1fb2a8cc1 --- /dev/null +++ b/ansible/roles-infra/infra-redfish-power-management/tasks/connectivity.yml @@ -0,0 +1,21 @@ +--- +# BMC connectivity tasks for power management + +- name: Check BMC connectivity + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: bmc_connectivity + tags: [always, connectivity] + +- name: Display BMC connection status + ansible.builtin.debug: + msg: "Successfully connected to BMC at {{ bmc_hostname }}" + when: bmc_connectivity.status == 200 + tags: [always, connectivity] \ No newline at end of file diff --git a/ansible/roles-infra/infra-redfish-power-management/tasks/graceful_exits.yml b/ansible/roles-infra/infra-redfish-power-management/tasks/graceful_exits.yml new file mode 100755 index 00000000000..dc26c7bccf1 --- /dev/null +++ b/ansible/roles-infra/infra-redfish-power-management/tasks/graceful_exits.yml @@ -0,0 +1,24 @@ +--- +# Graceful exit conditions + +- name: Exit gracefully if system is already in desired power state + block: + - name: Display graceful exit message + ansible.builtin.debug: + msg: "System is already in the desired state '{{ power_status.json.PowerState }}' for action '{{ power_action }}'. Exiting gracefully." + - name: End playbook execution + ansible.builtin.meta: end_play + when: > + (power_action == 'on' and power_status.json.PowerState == 'On') or + (power_action in ['off', 'force_off'] and power_status.json.PowerState == 'Off') + tags: [always, power] + +- name: Exit gracefully if reset requested on powered off system + block: + - name: Display reset on off system message + ansible.builtin.debug: + msg: "Cannot reset system that is currently '{{ power_status.json.PowerState }}'. System must be powered on before it can be reset. Use power_action 'on' to power on the system first." + - name: End playbook execution + ansible.builtin.meta: end_play + when: power_action == 'reset' and power_status.json.PowerState != 'On' + tags: [always, power] \ No newline at end of file diff --git a/ansible/roles-infra/infra-redfish-power-management/tasks/main.yml b/ansible/roles-infra/infra-redfish-power-management/tasks/main.yml new file mode 100755 index 00000000000..2e001f19487 --- /dev/null +++ b/ansible/roles-infra/infra-redfish-power-management/tasks/main.yml @@ -0,0 +1,57 @@ +--- +- name: Validate required BMC connection parameters + ansible.builtin.fail: + msg: "Required parameter '{{ item }}' is missing" + when: vars[item] is not defined or vars[item] == "" + loop: + - bmc_hostname + - bmc_username + - bmc_password + tags: [always, validation] + +- name: Validate power action parameter + ansible.builtin.fail: + msg: "Invalid power_action. Must be one of: status, on, off, force_off, reset" + when: power_action not in ['status', 'on', 'off', 'force_off', 'reset'] + tags: [always, validation] + +- name: Exit with error if both enable_pxe_boot_and_reset and power_action are set + block: + - name: Display configuration error message + ansible.builtin.debug: + msg: "ERROR: Cannot specify both enable_pxe_boot_and_reset and power_action. When enable_pxe_boot_and_reset is enabled, the system will automatically reset after PXE configuration." + - name: End playbook execution with error + ansible.builtin.meta: end_play + when: (enable_pxe_boot_and_reset | bool) and power_action is defined and power_action != 'status' + tags: [always, validation] + +- name: Test BMC connectivity + include_tasks: connectivity.yml + tags: [always, connectivity] + +- name: Get current power state + include_tasks: power_status.yml + tags: [always, power] + +- name: Check for graceful exit conditions + include_tasks: graceful_exits.yml + tags: [always, power] + +- name: Execute power actions + include_tasks: power_actions.yml + when: power_action in ['on', 'off', 'force_off', 'reset'] and not (enable_pxe_boot_and_reset | bool) + tags: [power] + +- name: Handle PXE boot configuration + include_tasks: pxe_boot.yml + when: enable_pxe_boot_and_reset | bool + tags: [pxe, boot] + +- name: Handle power actions that require monitoring + include_tasks: power_monitoring.yml + when: power_action in ['on', 'off', 'force_off', 'reset'] or (enable_pxe_boot_and_reset | bool) + tags: [power, monitoring] + +- name: Get final power state + include_tasks: power_status.yml + tags: [always, power] diff --git a/ansible/roles-infra/infra-redfish-power-management/tasks/power_actions.yml b/ansible/roles-infra/infra-redfish-power-management/tasks/power_actions.yml new file mode 100755 index 00000000000..1e8db68965e --- /dev/null +++ b/ansible/roles-infra/infra-redfish-power-management/tasks/power_actions.yml @@ -0,0 +1,78 @@ +--- +# Power action execution tasks + +- name: Power on the system + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1/Actions/ComputerSystem.Reset" + method: POST + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + ResetType: "On" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: power_action == "on" and power_status.json.PowerState != "On" + register: power_on_result + tags: [power, power_on] + +- name: Power off the system (graceful) + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1/Actions/ComputerSystem.Reset" + method: POST + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + ResetType: "GracefulShutdown" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: power_action == "off" and power_status.json.PowerState == "On" + register: power_off_result + tags: [power, power_off] + +- name: Force power off the system + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1/Actions/ComputerSystem.Reset" + method: POST + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + ResetType: "ForceOff" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: power_action == "force_off" and power_status.json.PowerState == "On" + register: force_power_off_result + tags: [power, force_power_off] + +- name: Reset the system + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1/Actions/ComputerSystem.Reset" + method: POST + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + ResetType: "ForceRestart" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: power_action == "reset" + register: reset_result + tags: [power, reset] + +- name: Display power action result + ansible.builtin.debug: + msg: "Power action '{{ power_action }}' completed successfully" + when: power_action in ['on', 'off', 'force_off', 'reset'] and + (power_on_result is succeeded or power_off_result is succeeded or + force_power_off_result is succeeded or reset_result is succeeded) + tags: [power] \ No newline at end of file diff --git a/ansible/roles-infra/infra-redfish-power-management/tasks/power_monitoring.yml b/ansible/roles-infra/infra-redfish-power-management/tasks/power_monitoring.yml new file mode 100755 index 00000000000..8a8bec05181 --- /dev/null +++ b/ansible/roles-infra/infra-redfish-power-management/tasks/power_monitoring.yml @@ -0,0 +1,26 @@ +--- +# Power state monitoring tasks + +- name: Monitor power state until desired state is reached + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: final_power_status + until: > + (power_action == 'on' and final_power_status.json.PowerState == 'On') or + (power_action in ['off', 'force_off'] and final_power_status.json.PowerState == 'Off') or + (power_action == 'reset' and final_power_status.json.PowerState == 'On') or + ((enable_pxe_boot_and_reset | bool) and + ((pxe_reset_result is defined and pxe_reset_result is succeeded) or (pxe_power_on_result is defined and pxe_power_on_result is succeeded)) and + final_power_status.json.PowerState == 'On') or + (power_action == 'status' or power_action is not defined) + retries: "{{ power_monitor_retries }}" + delay: "{{ power_monitor_delay }}" + when: power_action in ['on', 'off', 'force_off', 'reset'] or ((enable_pxe_boot_and_reset | bool) and ((pxe_reset_result is defined and pxe_reset_result is succeeded) or (pxe_power_on_result is defined and pxe_power_on_result is succeeded))) + tags: [always, power, monitoring] \ No newline at end of file diff --git a/ansible/roles-infra/infra-redfish-power-management/tasks/power_status.yml b/ansible/roles-infra/infra-redfish-power-management/tasks/power_status.yml new file mode 100755 index 00000000000..1dd3139a74a --- /dev/null +++ b/ansible/roles-infra/infra-redfish-power-management/tasks/power_status.yml @@ -0,0 +1,20 @@ +--- +# Power status tasks + +- name: Get current power state + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: power_status + tags: [always, power] + +- name: Display current power state + ansible.builtin.debug: + msg: "Current power state: {{ power_status.json.PowerState }}" + tags: [always, power] \ No newline at end of file diff --git a/ansible/roles-infra/infra-redfish-power-management/tasks/pxe_boot.yml b/ansible/roles-infra/infra-redfish-power-management/tasks/pxe_boot.yml new file mode 100755 index 00000000000..23bc691b85f --- /dev/null +++ b/ansible/roles-infra/infra-redfish-power-management/tasks/pxe_boot.yml @@ -0,0 +1,123 @@ +--- +# PXE boot configuration tasks + +- name: Get current boot settings + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: current_boot_settings + tags: [pxe, boot] + +- name: Display current boot settings + ansible.builtin.debug: + msg: + - "Boot source override: {{ current_boot_settings.json.Boot.BootSourceOverrideTarget | default('None') }}" + - "Boot source override enabled: {{ current_boot_settings.json.Boot.BootSourceOverrideEnabled | default('Disabled') }}" + tags: [pxe, boot] + +- name: Enable one-time PXE boot + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1" + method: PATCH + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + Boot: + BootSourceOverrideTarget: "Pxe" + BootSourceOverrideEnabled: "Once" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + register: pxe_boot_result + tags: [pxe, boot] + +- name: Verify PXE boot configuration + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: verify_boot_settings + when: pxe_boot_result is succeeded + tags: [pxe, boot] + +- name: Display updated boot settings + ansible.builtin.debug: + msg: + - "PXE boot successfully configured for next boot" + - "Boot source override: {{ verify_boot_settings.json.Boot.BootSourceOverrideTarget }}" + - "Boot source override enabled: {{ verify_boot_settings.json.Boot.BootSourceOverrideEnabled }}" + when: verify_boot_settings is defined + tags: [pxe, boot] + +- name: Power on system to boot from PXE (system is currently off) + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1/Actions/ComputerSystem.Reset" + method: POST + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + ResetType: "On" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: pxe_boot_result is succeeded and power_status.json.PowerState != "On" + register: pxe_power_on_result + tags: [pxe, boot, power_on] + +- name: Reset system to boot from PXE (system is currently on) + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1/Actions/ComputerSystem.Reset" + method: POST + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + ResetType: "ForceRestart" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: pxe_boot_result is succeeded and power_status.json.PowerState == "On" + register: pxe_reset_result + tags: [pxe, boot, reset] + +- name: Display PXE power on notification + ansible.builtin.debug: + msg: "System powered on to boot from PXE network (system was previously off)" + when: pxe_power_on_result is defined and pxe_power_on_result is succeeded and power_status.json.PowerState != "On" + tags: [pxe, boot, power_on] + +- name: Display PXE reset notification + ansible.builtin.debug: + msg: "System reset initiated to boot from PXE network (system was previously on)" + when: pxe_reset_result is defined and pxe_reset_result is succeeded and power_status.json.PowerState == "On" + tags: [pxe, boot, reset] + +- name: Update power state after PXE power operations + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/Systems/1" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: updated_power_status_after_pxe + when: (enable_pxe_boot_and_reset | bool) and ((pxe_power_on_result is defined and pxe_power_on_result is succeeded) or (pxe_reset_result is defined and pxe_reset_result is succeeded)) + tags: [always, power, pxe] \ No newline at end of file diff --git a/ansible/roles-infra/infra-redfish-user-management/README.md b/ansible/roles-infra/infra-redfish-user-management/README.md new file mode 100644 index 00000000000..b604672da80 --- /dev/null +++ b/ansible/roles-infra/infra-redfish-user-management/README.md @@ -0,0 +1,304 @@ +# Redfish User Management Role (infra-redfish-user-management) + +This role provides comprehensive user management capabilities for servers using the Redfish API standard. It supports creating, updating, deleting, and managing user accounts on BMCs with full password validation and role assignment. + +## Features + +- **User Lifecycle Management**: Create, update, delete, and check status of user accounts +- **Password Validation**: Comprehensive password requirements enforcement +- **Role Assignment**: Support for Administrator, Operator, ReadOnly, and PowerUser roles +- **PowerUser Role**: Custom OEM role with specific privileges for console and power management +- **Flexible Create Behavior**: Option to force password update when user already exists during create action +- **Redfish API Integration**: Uses standard Redfish API for broad server compatibility +- **Connection Validation**: Tests BMC connectivity before operations +- **Account Service Discovery**: Automatic discovery of account service capabilities + +## Supported User Actions + +- `create`: Create a new user account (or update existing user if `force_password_update_on_existing` is true) +- `update_password`: Update an existing user's password +- `delete`: Delete a user account +- `status`: Check user account status and information + +## Supported User Roles + +- `Administrator`: Full administrative privileges +- `Operator`: Operational privileges without configuration changes +- `ReadOnly`: Read-only access to system information +- `PowerUser`: Custom OEM role with console and power management access + +## Required Variables + +- `bmc_hostname`: BMC IP address or hostname +- `bmc_username`: BMC username for authentication +- `bmc_password`: BMC password for authentication +- `target_username`: Username to manage (default: "console") +- `target_password`: Password for create/update operations (auto-generated if not provided) +- `user_action`: Action to perform (create, update_password, delete, status) + +## Optional Variables + +- `user_role`: User role to assign (default: "ReadOnly") +- `enable_user`: Enable the user account (default: true) +- `force_password_update_on_existing`: Force password update if user exists during create action (default: true) +- `validate_certs`: Validate SSL certificates (default: false) +- `force_basic_auth`: Use basic authentication (default: true) +- `connection_timeout`: Connection timeout in seconds (default: 30) + +## Password Requirements + +The role enforces comprehensive password validation: + +- **Length**: 10-32 characters +- **Character Types**: At least 2 of uppercase, lowercase, special characters +- **Required**: At least one letter and one number +- **Restrictions**: No more than 2 consecutive identical characters +- **Security**: Cannot be username or reverse of username +- **Character Set**: A-Z, a-z, 0-9, and ~`!@#$%^&*()-+={}[]|:;"'<>,?/._ + +## Usage + +### Create a New User + +```yaml +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "admin123" + target_username: "console" + target_password: "SecurePass123!" + user_action: "create" + user_role: "PowerUser" + enable_user: true +``` + +### Update User Password + +```yaml +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "admin123" + target_username: "console" + target_password: "NewSecurePass123!" + user_action: "update_password" +``` + +### Check User Status + +```yaml +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "admin123" + target_username: "console" + user_action: "status" +``` + +### Delete User Account + +```yaml +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "admin123" + target_username: "console" + user_action: "delete" +``` + +### Auto-Generated Password + +```yaml +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "admin123" + target_username: "console" + user_action: "create" + user_role: "PowerUser" + # target_password will be auto-generated +``` + +### Force Password Update on Existing User + +```yaml +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "admin123" + target_username: "console" + target_password: "NewSecurePass123!" + user_action: "create" + user_role: "PowerUser" + force_password_update_on_existing: true # Force update if user exists +``` + +## Task Flow + +1. **Validation**: Validates user action, role, and password requirements +2. **Password Validation**: Enforces comprehensive password requirements +3. **Connectivity**: Tests BMC connectivity using Redfish API +4. **Account Service**: Discovers account service capabilities +5. **User Operations**: Performs the requested user management operation +6. **PowerUser Role**: Assigns custom OEM role if PowerUser is selected +7. **Summary**: Displays operation completion summary + +## PowerUser Role Details + +The PowerUser role provides custom OEM privileges: + +- **RemoteConsoleAndVirtualMediaAccess**: Console and virtual media management +- **RemoteServerPowerRestartAccess**: Power control capabilities + +This role is ideal for automated deployment scenarios where console access and power management are required. + +## Integration with AgnosticD + +This role integrates with AgnosticD infrastructure workflows: + +```yaml +# Used in XClarity baremetal infrastructure deployment +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "{{ bm_server_info.remote_mgmt_ip }}" + bmc_username: "{{ bm_server_info.remote_mgmt_user }}" + bmc_password: "{{ bm_server_info.remote_mgmt_password }}" + target_username: "console" + target_password: "{{ generated_password }}" + user_action: "create" + user_role: "PowerUser" +``` + +## Return Values + +The role sets the following facts: + +- `user_operation_result`: Result of the user management operation +- `user_account_info`: User account information (for status operations) +- `poweruser_role_result`: Result of PowerUser role assignment +- `password_validation_result`: Result of password validation + +## Error Handling + +The role includes comprehensive error handling: + +- **Parameter Validation**: Checks for required parameters and valid values +- **Password Validation**: Enforces password complexity requirements +- **Connectivity Testing**: Verifies BMC is reachable before operations +- **Account Service Discovery**: Handles different BMC capabilities +- **Operation Validation**: Validates operations completed successfully + +## Compatibility + +This role works with servers that support the Redfish API standard, including: + +- Lenovo ThinkSystem servers +- Dell PowerEdge servers +- HP/HPE ProLiant servers +- IBM System servers +- Other Redfish-compliant servers + +## Examples + +### Complete User Lifecycle + +```yaml +# Create user +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "admin123" + target_username: "deployuser" + target_password: "Deploy123!" + user_action: "create" + user_role: "Administrator" + +# Update password +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "admin123" + target_username: "deployuser" + target_password: "NewDeploy123!" + user_action: "update_password" + +# Check status +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "admin123" + target_username: "deployuser" + user_action: "status" + +# Delete user +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "admin123" + target_username: "deployuser" + user_action: "delete" +``` + +### PowerUser for Deployment + +```yaml +- include_role: + name: infra-redfish-user-management + vars: + bmc_hostname: "192.168.1.100" + bmc_username: "admin" + bmc_password: "admin123" + target_username: "console" + target_password: "{{ generated_password }}" + user_action: "create" + user_role: "PowerUser" + enable_user: true +``` + +## Security Considerations + +- Store BMC credentials securely using Ansible Vault +- Use strong passwords that meet complexity requirements +- Use SSL/TLS when possible (set `validate_certs: true`) +- Limit network access to BMC interfaces +- Regularly rotate user passwords +- Delete temporary accounts after use + +## Dependencies + +- Ansible `uri` module for HTTP/HTTPS requests +- Network connectivity to BMC interface +- Redfish API support on target servers +- BMC administrative privileges for user management + +## Troubleshooting + +Common issues and solutions: + +1. **Password Validation Failures**: Ensure passwords meet complexity requirements +2. **Connection Timeout**: Increase `connection_timeout` value +3. **SSL Certificate Errors**: Set `validate_certs: false` for self-signed certificates +4. **User Creation Failures**: Check available user slots and existing users +5. **PowerUser Role Issues**: Verify BMC supports OEM role extensions +6. **Permission Errors**: Ensure BMC admin account has user management privileges \ No newline at end of file diff --git a/ansible/roles-infra/infra-redfish-user-management/defaults/main.yml b/ansible/roles-infra/infra-redfish-user-management/defaults/main.yml new file mode 100755 index 00000000000..5faa9321224 --- /dev/null +++ b/ansible/roles-infra/infra-redfish-user-management/defaults/main.yml @@ -0,0 +1,21 @@ +--- +# Redfish User Management Role - Defaults + +## BMC Connection Variables (Required) +# These must be provided when calling the role +# bmc_hostname: "192.168.1.100" +# bmc_username: "admin" +# bmc_password: "password123" + +## User Management Variables +target_username: "console" # Target username to manage +target_password: "{{ lookup('password', '/dev/null length=12 chars=ascii_letters,digits') }}" +user_action: "status" # create, update_password, delete, status +user_role: "ReadOnly" # Administrator, Operator, ReadOnly, PowerUser +enable_user: true # Enable user account +force_password_update_on_existing: true # Force password update if user exists during create action + +## Redfish API Settings +validate_certs: false # Skip certificate validation for self-signed certs +force_basic_auth: true # Use basic authentication +connection_timeout: 30 # Connection timeout in seconds diff --git a/ansible/roles-infra/infra-redfish-user-management/meta/main.yml b/ansible/roles-infra/infra-redfish-user-management/meta/main.yml new file mode 100644 index 00000000000..8014976efaf --- /dev/null +++ b/ansible/roles-infra/infra-redfish-user-management/meta/main.yml @@ -0,0 +1,25 @@ +--- +galaxy_info: + author: AgnosticD Team + description: Manages user accounts on Redfish-compliant BMCs + company: Red Hat + license: GPL-3.0+ + min_ansible_version: 2.9 + platforms: + - name: EL + versions: + - 8 + - 9 + - name: Ubuntu + versions: + - 20.04 + - 22.04 + galaxy_tags: + - redfish + - bmc + - user + - management + - baremetal + - infrastructure + +dependencies: [] \ No newline at end of file diff --git a/ansible/roles-infra/infra-redfish-user-management/tasks/account_service.yml b/ansible/roles-infra/infra-redfish-user-management/tasks/account_service.yml new file mode 100755 index 00000000000..caff72e9025 --- /dev/null +++ b/ansible/roles-infra/infra-redfish-user-management/tasks/account_service.yml @@ -0,0 +1,56 @@ +--- +- name: Get current account service information + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/AccountService" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: account_service_info + tags: [account_service] + +- name: Display account service information + ansible.builtin.debug: + msg: + - "Account Service Information:" + - "Service Enabled: {{ account_service_info.json.ServiceEnabled | default('Unknown') }}" + - "Max Password Length: {{ account_service_info.json.MaxPasswordLength | default('Unknown') }}" + - "Min Password Length: {{ account_service_info.json.MinPasswordLength | default('Unknown') }}" + - "Accounts Collection: {{ account_service_info.json.Accounts['@odata.id'] | default('Unknown') }}" + - "Roles Collection: {{ account_service_info.json.Roles['@odata.id'] | default('Unknown') }}" + when: account_service_info.status == 200 and account_service_info.json is defined + tags: [account_service, debug] + +- name: Get current user accounts + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}{{ account_service_info.json.Accounts['@odata.id'] }}" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: current_accounts + when: account_service_info.status == 200 and account_service_info.json is defined and account_service_info.json.Accounts is defined + tags: [account_service, users] + +- name: Initialize current_accounts if not defined + ansible.builtin.set_fact: + current_accounts: + status: 404 + json: + Members: [] + when: current_accounts is not defined + +- name: Display current user accounts summary + ansible.builtin.debug: + msg: + - "Current User Accounts Summary:" + - "Total accounts: {{ current_accounts.json.Members | length }}" + - "Account endpoints: Available ({{ current_accounts.json.Members | length }} total)" + when: current_accounts.status == 200 and current_accounts.json is defined and current_accounts.json.Members is defined + tags: [account_service, users, debug] diff --git a/ansible/roles-infra/infra-redfish-user-management/tasks/connectivity.yml b/ansible/roles-infra/infra-redfish-user-management/tasks/connectivity.yml new file mode 100755 index 00000000000..3bc8cf51d2d --- /dev/null +++ b/ansible/roles-infra/infra-redfish-user-management/tasks/connectivity.yml @@ -0,0 +1,33 @@ +--- +- name: Check BMC connectivity + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}/redfish/v1/" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: bmc_connectivity + tags: [connectivity] + +- name: Display BMC connection status + ansible.builtin.debug: + msg: "Successfully connected to BMC at {{ bmc_hostname }}" + when: bmc_connectivity.status == 200 + tags: [connectivity] + +- name: Display Redfish service information + ansible.builtin.debug: + msg: + - "Redfish Service Information:" + - "Service Name: {{ bmc_connectivity.json.Name | default('Not specified') }}" + - "Service ID: {{ bmc_connectivity.json.Id | default('Not specified') }}" + - "Redfish Version: {{ bmc_connectivity.json.RedfishVersion | default('Not specified') }}" + - "Service Version: {{ bmc_connectivity.json.ServiceVersion | default('Not specified') }}" + - "UUID: {{ bmc_connectivity.json.UUID | default('Not specified') }}" + - "Product: {{ bmc_connectivity.json.Product | default('Not specified') }}" + - "Vendor: {{ bmc_connectivity.json.Vendor | default('Not specified') }}" + when: bmc_connectivity.status == 200 and bmc_connectivity.json is defined + tags: [connectivity, version] diff --git a/ansible/roles-infra/infra-redfish-user-management/tasks/main.yml b/ansible/roles-infra/infra-redfish-user-management/tasks/main.yml new file mode 100755 index 00000000000..4d467ab2855 --- /dev/null +++ b/ansible/roles-infra/infra-redfish-user-management/tasks/main.yml @@ -0,0 +1,57 @@ +--- +- name: Validate user action parameter + ansible.builtin.fail: + msg: "Invalid user_action. Must be one of: create, update_password, delete, status" + when: user_action not in ['create', 'update_password', 'delete', 'status'] + tags: [always, validation] + +- name: Validate user role parameter + ansible.builtin.fail: + msg: "Invalid user_role. Must be one of: Administrator, Operator, ReadOnly, PowerUser" + when: user_role not in ['Administrator', 'Operator', 'ReadOnly', 'PowerUser'] + tags: [always, validation] + +- name: Validate target password for create/update actions + ansible.builtin.fail: + msg: "target_password is required and cannot be empty when user_action is '{{ user_action }}'" + when: + - user_action in ['create', 'update_password'] + - target_password is not defined or target_password == "" + tags: [always, validation] + +- name: Validate password requirements + include_tasks: validate_password.yml + when: + - target_password is defined and target_password != "" + - user_action in ['create', 'update_password'] + tags: [always, validation] + +- name: Test BMC connectivity + include_tasks: connectivity.yml + tags: [always, connectivity] + +- name: Get account service information + include_tasks: account_service.yml + tags: [always, account_service] + +- name: Manage user account + include_tasks: user_operations.yml + when: user_action in ['create', 'update_password', 'delete', 'status'] + tags: [users] + +- name: Handle PowerUser role assignment + include_tasks: poweruser_role.yml + when: + - user_action in ['create', 'update_password'] + - user_role == "PowerUser" + tags: [role_management, poweruser] + +- name: Display operation summary + ansible.builtin.debug: + msg: + - "Redfish User Management Operation Completed" + - "Action: {{ user_action }}" + - "Target User: {{ target_username }}" + - "User Role: {{ user_role }}" + - "BMC Host: {{ bmc_hostname }}" + tags: [always, summary] diff --git a/ansible/roles-infra/infra-redfish-user-management/tasks/poweruser_role.yml b/ansible/roles-infra/infra-redfish-user-management/tasks/poweruser_role.yml new file mode 100755 index 00000000000..86ce75c59a3 --- /dev/null +++ b/ansible/roles-infra/infra-redfish-user-management/tasks/poweruser_role.yml @@ -0,0 +1,122 @@ +--- +- name: Get user details after creation/update to find role ID + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}{{ target_user_info.endpoint if target_user_exists else available_user_slots[0].json['@odata.id'] }}" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: user_details_after_action + when: + - (create_user_result is defined and create_user_result is succeeded) or (update_password_result is defined and update_password_result is succeeded) + tags: [role_management, users] + +- name: Extract account slot number for custom role + ansible.builtin.set_fact: + account_slot_number: "{{ (target_user_info.endpoint if target_user_exists else available_user_slots[0].json['@odata.id']) | regex_search('/(\\d+)$') | regex_replace('/', '') }}" + when: + - user_details_after_action is defined + - user_details_after_action is succeeded + tags: [role_management, users] + +- name: Set custom role name based on account slot + ansible.builtin.set_fact: + custom_role_name: "CustomRole{{ account_slot_number }}" + custom_role_endpoint: "{{ account_service_info.json.Roles['@odata.id'] }}/CustomRole{{ account_slot_number }}" + when: + - account_slot_number is defined + - account_slot_number != "" + tags: [role_management, users] + +- name: Extract user role ID + ansible.builtin.set_fact: + user_role_id: "{{ user_details_after_action.json.RoleId | default('') }}" + user_role_endpoint: "{{ user_details_after_action.json.Links.Role['@odata.id'] | default('') }}" + when: + - user_details_after_action is defined + - user_details_after_action is succeeded + tags: [role_management, users] + +- name: Update CustomRole with PowerUser privileges + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}{{ custom_role_endpoint }}" + method: PATCH + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + OemPrivileges: + - "RemoteConsoleAndVirtualMediaAccess" + - "RemoteServerPowerRestartAccess" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: + - custom_role_endpoint is defined + - custom_role_endpoint != "" + register: poweruser_role_update + tags: [role_management, poweruser] + +- name: Assign user to CustomRole after PowerUser update + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}{{ target_user_info.endpoint if target_user_exists else available_user_slots[0].json['@odata.id'] }}" + method: PATCH + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + RoleId: "{{ custom_role_name }}" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: + - poweruser_role_update is defined + - poweruser_role_update is succeeded + - custom_role_name is defined + register: user_role_assignment + tags: [role_management, poweruser] + +- name: Display PowerUser role update result + ansible.builtin.debug: + msg: + - "{{ custom_role_name | default('CustomRole') }} updated successfully with PowerUser privileges" + - "Role privileges: RemoteConsoleAndVirtualMediaAccess, RemoteServerPowerRestartAccess" + - "User assigned to {{ custom_role_name | default('CustomRole') }}: {{ 'Success' if (user_role_assignment is defined and user_role_assignment is succeeded) else 'Pending' }}" + when: + - poweruser_role_update is defined + - poweruser_role_update is succeeded + tags: [role_management, poweruser] + +- name: Verify PowerUser role assignment + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}{{ target_user_info.endpoint if target_user_exists else available_user_slots[0].json['@odata.id'] }}" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: verify_poweruser_assignment + when: + - user_role_assignment is defined + - user_role_assignment is succeeded + tags: [role_management, poweruser, verification] + +- name: Display PowerUser verification results + ansible.builtin.debug: + msg: + - "PowerUser Role Verification:" + - "Username: {{ verify_poweruser_assignment.json.UserName }}" + - "Role ID: {{ verify_poweruser_assignment.json.RoleId }}" + - "Enabled: {{ verify_poweruser_assignment.json.Enabled }}" + - "PowerUser Setup: Complete" + when: + - verify_poweruser_assignment is defined + - verify_poweruser_assignment is succeeded + tags: [role_management, poweruser, verification] diff --git a/ansible/roles-infra/infra-redfish-user-management/tasks/user_operations.yml b/ansible/roles-infra/infra-redfish-user-management/tasks/user_operations.yml new file mode 100755 index 00000000000..19e7f88becc --- /dev/null +++ b/ansible/roles-infra/infra-redfish-user-management/tasks/user_operations.yml @@ -0,0 +1,162 @@ +--- +- name: Get detailed information for each user account + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}{{ item['@odata.id'] }}" + method: GET + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + status_code: 200 + timeout: "{{ connection_timeout }}" + register: user_details + loop: "{{ current_accounts.json.Members }}" + when: + - current_accounts.json is defined + - current_accounts.json.Members is defined + - item['@odata.id'] is defined + tags: [users, status] + +- name: Initialize user_details if not defined + ansible.builtin.set_fact: + user_details: + results: [] + when: user_details is not defined + +- name: Check if target user already exists + ansible.builtin.set_fact: + target_user_exists: false + target_user_info: {} + available_user_slots: [] + +- name: Find target user and available slots + ansible.builtin.set_fact: + target_user_exists: "{{ target_user_exists or (item.json.UserName | default('')) == target_username }}" + target_user_info: "{{ target_user_info | combine({'endpoint': item.json['@odata.id'], 'id': item.json.Id, 'role': item.json.RoleId | default('')}) if (item.json.UserName | default('')) == target_username else target_user_info }}" + available_user_slots: "{{ available_user_slots + [{'json': item.json}] if (item.json.UserName | default('')) == '' and item.json['@odata.id'] is defined else available_user_slots }}" + loop: "{{ user_details.results }}" + loop_control: + label: "User ID {{ item.json.Id | default('Unknown') }}: {{ item.json.UserName | default('(empty)') }}" + when: user_details.results | length > 0 + tags: [users, status] + +- name: Display user status information + ansible.builtin.debug: + msg: + - "Target User Status:" + - "Username: {{ target_username }}" + - "User Exists: {{ target_user_exists }}" + - "User Info: {{ target_user_info if target_user_exists else 'User not found' }}" + - "Available Slots: {{ available_user_slots | length }}" + when: target_user_exists or user_action == 'status' + tags: [users, status] + +- name: Display user exists message + ansible.builtin.debug: + msg: "User '{{ target_username }}' already exists. Current role: {{ target_user_info.role }}. {{ 'Forcing password update.' if force_password_update_on_existing | bool else 'Skipping creation.' }}" + when: + - target_user_exists + - user_action == 'create' + tags: [users, create] + +- name: Force password update for existing user during create action + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}{{ target_user_info.endpoint }}" + method: PATCH + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + Password: "{{ target_password }}" + RoleId: "{{ user_role if user_role != 'PowerUser' else 'ReadOnly' }}" + Enabled: "{{ enable_user | bool }}" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: + - target_user_exists + - user_action == 'create' + - force_password_update_on_existing | bool + register: force_update_result + tags: [users, create, force_update] + +- name: Display user not found message + ansible.builtin.debug: + msg: "User '{{ target_username }}' does not exist. Cannot perform action '{{ user_action }}'." + when: not target_user_exists and user_action in ['update_password', 'delete'] + tags: [users, update_password, delete] + +- name: Display no slots message + ansible.builtin.debug: + msg: "No available user slots found. Cannot create new user '{{ target_username }}'." + when: not target_user_exists and user_action == 'create' and available_user_slots | length == 0 + tags: [users, create] + +- name: Create new user account + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}{{ available_user_slots[0].json['@odata.id'] }}" + method: PATCH + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + UserName: "{{ target_username }}" + Password: "{{ target_password }}" + RoleId: "{{ user_role if user_role != 'PowerUser' else 'ReadOnly' }}" + Enabled: "{{ enable_user | bool }}" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: + - not target_user_exists + - user_action == 'create' + - available_user_slots | length > 0 + register: create_user_result + tags: [users, create] + +- name: Update user password + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}{{ target_user_info.endpoint }}" + method: PATCH + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + Password: "{{ target_password }}" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: target_user_exists and user_action == 'update_password' + register: update_password_result + tags: [users, update_password] + +- name: Delete user account + ansible.builtin.uri: + url: "https://{{ bmc_hostname }}{{ target_user_info.endpoint }}" + method: PATCH + user: "{{ bmc_username }}" + password: "{{ bmc_password }}" + validate_certs: "{{ validate_certs }}" + force_basic_auth: "{{ force_basic_auth }}" + body_format: json + body: + UserName: "" + status_code: [200, 202, 204] + timeout: "{{ connection_timeout }}" + when: target_user_exists and user_action == 'delete' + register: delete_user_result + tags: [users, delete] + +- name: Display operation results + ansible.builtin.debug: + msg: + - "User Operation Results:" + - "Action: {{ user_action }}" + - "User: {{ target_username }}" + - "Success: {{ 'Yes' if (create_user_result is succeeded or update_password_result is succeeded or delete_user_result is succeeded or force_update_result is succeeded) else 'N/A' }}" + - "{{ 'Password forcefully updated for existing user' if force_update_result is succeeded else '' }}" + when: user_action in ['create', 'update_password', 'delete'] + tags: [users, results] diff --git a/ansible/roles-infra/infra-redfish-user-management/tasks/validate_password.yml b/ansible/roles-infra/infra-redfish-user-management/tasks/validate_password.yml new file mode 100755 index 00000000000..cb6e07d3c7a --- /dev/null +++ b/ansible/roles-infra/infra-redfish-user-management/tasks/validate_password.yml @@ -0,0 +1,75 @@ +--- +- name: Debug target_password + ansible.builtin.debug: + msg: "Password to be validated: {{ target_password }}" + +- name: Validate password length (10-32 characters) + ansible.builtin.fail: + msg: "Password must be between 10 and 32 characters long (current length: {{ target_password | length }})" + when: + - target_password | length < 10 or target_password | length > 32 + +- name: Validate password contains allowed characters only + ansible.builtin.fail: + msg: "Password contains invalid characters. Only A-Z, a-z, 0-9, and ~`!@#$%^&*()-+={}[]|:;\"'<>,?/._ are allowed" + when: + - not (target_password | regex_search('^[A-Za-z0-9~`!@#$%^&*()\\-+={}\\[\\]|:;"\'<>,?/._]+$')) + +- name: Validate password contains at least one letter + ansible.builtin.fail: + msg: "Password must contain at least one letter (A-Z or a-z)" + when: + - not (target_password | regex_search('[A-Za-z]')) + +- name: Validate password contains at least one number + ansible.builtin.fail: + msg: "Password must contain at least one number (0-9)" + when: + - not (target_password | regex_search('[0-9]')) + +- name: Check password complexity requirements + ansible.builtin.set_fact: + has_uppercase: "{{ (target_password | regex_search('[A-Z]')) is not none }}" + has_lowercase: "{{ (target_password | regex_search('[a-z]')) is not none }}" + has_special: "{{ (target_password | regex_search('[~`!@#$%^&*()\\-+={}\\[\\]|:;<>,?/._]')) is not none }}" + +- name: Debug password complexity check + ansible.builtin.debug: + msg: + - "Password complexity analysis:" + - "Has uppercase: {{ has_uppercase }}" + - "Has lowercase: {{ has_lowercase }}" + - "Has special: {{ has_special }}" + tags: [debug] + +- name: Calculate complexity score and validate + ansible.builtin.set_fact: + complexity_count: "{{ (has_uppercase | bool | int) + (has_lowercase | bool | int) + (has_special | bool | int) }}" + +- name: Validate password has at least 2 character types + ansible.builtin.fail: + msg: "Password must contain at least 2 of: uppercase letter, lowercase letter, or special character. Found {{ complexity_count }}/3 types" + when: + - complexity_count | int < 2 + +- name: Validate password has no more than 2 consecutive identical characters + ansible.builtin.fail: + msg: "Password cannot have more than 2 consecutive identical characters" + when: + - target_password | regex_search('(.)\\1{2}') + +- name: Validate password is not same as username + ansible.builtin.fail: + msg: "Password cannot be the same as username (case-insensitive)" + when: + - target_password | lower == target_username | lower + +- name: Create reversed username for validation + ansible.builtin.set_fact: + reversed_username: "{{ target_username[::-1] }}" + +- name: Validate password is not reverse of username + ansible.builtin.fail: + msg: "Password cannot be the reverse of username (case-insensitive)" + when: + - target_password | lower == reversed_username | lower diff --git a/ansible/setup_runtime.yml b/ansible/setup_runtime.yml index 744843ff13e..0650cc38821 100644 --- a/ansible/setup_runtime.yml +++ b/ansible/setup_runtime.yml @@ -26,6 +26,7 @@ - vmware_ibm - multi - shared_openshift + - redfish_baremetal assert: that: cloud_provider in agnosticd_cloud_providers msg: "Cloud provider {{ cloud_provider }} is not supported."