Skip to content

Commit

Permalink
[vm-repair] Adding repair-and-restore one command flow for fstab scri…
Browse files Browse the repository at this point in the history
…pts (#6244)
  • Loading branch information
haagha committed Jun 27, 2023
1 parent 5c05bd2 commit 3bb5485
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 17 deletions.
8 changes: 8 additions & 0 deletions src/vm-repair/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
Release History
===============

0.5.4
++++++
Adding repair-and-restore command to create a one command flow for vm-repair with fstab scripts.

0.5.3
++++++
Removing check for EncryptionSettingsCollection.enabled is string 'false'.
Expand All @@ -18,6 +22,10 @@ Updated exsiting privateIpAddress field to privateIPAddress and privateIpAllocat
++++++
Support for hosting repair vm in existing resource group and fixing existing resource group logic

0.5.0
++++++
Support for hosting repair vm in existing resource group and fixing existing resource group logic

0.4.10
++++++
Support for hosting repair vm in existing resource group and fixing existing resource group logic
Expand Down
9 changes: 9 additions & 0 deletions src/vm-repair/azext_vm_repair/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,12 @@
text: >
az vm repair reset-nic -g MyResourceGroup -n MyVM --yes --verbose
"""

helps['vm repair repair-and-restore'] = """
type: command
short-summary: Repair and restore the VM.
examples:
- name: Repair and restore a VM.
text: >
az vm repair repair-and-restore --name vmrepairtest --resource-group MyResourceGroup --verbose
"""
8 changes: 8 additions & 0 deletions src/vm-repair/azext_vm_repair/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,11 @@ def load_arguments(self, _):
with self.argument_context('vm repair reset-nic') as c:
c.argument('subscriptionid', help='Subscription id to default subscription using `az account set -s NAME_OR_ID`.')
c.argument('yes', help='Do not prompt for confirmation to start VM if it is not running.')

with self.argument_context('vm repair repair-and-restore') as c:
c.argument('repair_username', help='Admin username for repair VM.')
c.argument('repair_password', help='Admin password for the repair VM.')
c.argument('copy_disk_name', help='Name of OS disk copy.')
c.argument('repair_vm_name', help='Name of repair VM.')
c.argument('copy_disk_name', help='Name of OS disk copy.')
c.argument('repair_group_name', help='Name for new or existing resource group that will contain repair VM.')
54 changes: 54 additions & 0 deletions src/vm-repair/azext_vm_repair/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,57 @@ def validate_vm_username(username, is_linux):

if username.lower() in disallowed_user_names:
raise CLIError("This username '{}' meets the general requirements, but is specifically disallowed. Please try a different value.".format(username))


def validate_repair_and_restore(cmd, namespace):
check_extension_version(EXTENSION_NAME)

logger.info('Validating repair and restore parameters...')

logger.info(namespace.vm_name + ' ' + namespace.resource_group_name)

# Check if VM exists and is not classic VM
source_vm = _validate_and_get_vm(cmd, namespace.resource_group_name, namespace.vm_name)
is_linux = _is_linux_os(source_vm)

# Check repair vm name
namespace.repair_vm_name = ('repair-' + namespace.vm_name)[:14] + '_'
logger.info('Repair VM name: %s', namespace.repair_vm_name)

# Check copy disk name
timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S')
if namespace.copy_disk_name:
_validate_disk_name(namespace.copy_disk_name)
else:
namespace.copy_disk_name = namespace.vm_name + '-DiskCopy-' + timestamp
logger.info('Copy disk name: %s', namespace.copy_disk_name)

# Check copy resouce group name
if namespace.repair_group_name:
if namespace.repair_group_name == namespace.resource_group_name:
raise CLIError('The repair resource group name cannot be the same as the source VM resource group.')
_validate_resource_group_name(namespace.repair_group_name)
else:
namespace.repair_group_name = 'repair-' + namespace.vm_name + '-' + timestamp
logger.info('Repair resource group name: %s', namespace.repair_group_name)

# Check encrypted disk
encryption_type, _, _, _ = _fetch_encryption_settings(source_vm)
# Currently only supporting single pass
if encryption_type in (Encryption.SINGLE_WITH_KEK, Encryption.SINGLE_WITHOUT_KEK):
if not namespace.unlock_encrypted_vm:
_prompt_encrypted_vm(namespace)
elif encryption_type is Encryption.DUAL:
logger.warning('The source VM\'s OS disk is encrypted using dual pass method.')
raise CLIError('The current command does not support VMs which were encrypted using dual pass.')
else:
logger.debug('The source VM\'s OS disk is not encrypted')

validate_vm_username(namespace.repair_username, is_linux)
validate_vm_password(namespace.repair_password, is_linux)
# Prompt input for public ip usage
namespace.associate_public_ip = False

# Validate repair run command
source_vm = _validate_and_get_vm(cmd, namespace.resource_group_name, namespace.vm_name)
is_linux = _is_linux_os(source_vm)
1 change: 1 addition & 0 deletions src/vm-repair/azext_vm_repair/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ def load_command_table(self, _):
g.custom_command('run', 'run', validator=validate_run)
g.custom_command('list-scripts', 'list_scripts')
g.custom_command('reset-nic', 'reset_nic', is_preview=True, validator=validate_reset_nic)
g.custom_command('repair-and-restore', 'repair_and_restore', is_preview=True)
102 changes: 92 additions & 10 deletions src/vm-repair/azext_vm_repair/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from azure.cli.command_modules.vm.custom import get_vm, _is_linux_os
from azure.cli.command_modules.storage.storage_url_helpers import StorageResourceIdentifier
from msrestazure.tools import parse_resource_id
from .exceptions import SkuDoesNotSupportHyperV
from .exceptions import AzCommandError, SkuNotAvailableError, UnmanagedDiskCopyError, WindowsOsNotAvailableError, RunScriptNotFoundForIdError, SkuDoesNotSupportHyperV, ScriptReturnsError, SupportingResourceNotFoundError, CommandCanceledByUserError

from .command_helper_class import command_helper
from .repair_utils import (
Expand Down Expand Up @@ -45,11 +45,15 @@
_check_n_start_vm,
_check_existing_rg
)
from .exceptions import AzCommandError, SkuNotAvailableError, UnmanagedDiskCopyError, WindowsOsNotAvailableError, RunScriptNotFoundForIdError, SkuDoesNotSupportHyperV, ScriptReturnsError, SupportingResourceNotFoundError, CommandCanceledByUserError
from .exceptions import AzCommandError, RunScriptNotFoundForIdError, SupportingResourceNotFoundError, CommandCanceledByUserError
logger = get_logger(__name__)


def create(cmd, vm_name, resource_group_name, repair_password=None, repair_username=None, repair_vm_name=None, copy_disk_name=None, repair_group_name=None, unlock_encrypted_vm=False, enable_nested=False, associate_public_ip=False, distro='ubuntu', yes=False):

# log all the parameters
logger.debug('vm repair create command parameters: vm_name: %s, resource_group_name: %s, repair_password: %s, repair_username: %s, repair_vm_name: %s, copy_disk_name: %s, repair_group_name: %s, unlock_encrypted_vm: %s, enable_nested: %s, associate_public_ip: %s, distro: %s, yes: %s', vm_name, resource_group_name, repair_password, repair_username, repair_vm_name, copy_disk_name, repair_group_name, unlock_encrypted_vm, enable_nested, associate_public_ip, distro, yes)

# Init command helper object
command = command_helper(logger, cmd, 'vm repair create')
# Main command calling block
Expand Down Expand Up @@ -101,7 +105,8 @@ def create(cmd, vm_name, resource_group_name, repair_password=None, repair_usern
create_repair_vm_command += ' --zone {zone}'.format(zone=zone)

# Create new resource group
if not _check_existing_rg(repair_group_name):
existing_rg = _check_existing_rg(repair_group_name)
if not existing_rg:
create_resource_group_command = 'az group create -l {loc} -n {group_name}' \
.format(loc=source_vm.location, group_name=repair_group_name)
logger.info('Creating resource group for repair VM and its resources...')
Expand Down Expand Up @@ -272,7 +277,7 @@ def create(cmd, vm_name, resource_group_name, repair_password=None, repair_usern
if not command.is_status_success():
command.set_status_error()
return_dict = command.init_return_dict()
if _check_existing_rg(repair_group_name):
if existing_rg:
_clean_up_resources(repair_group_name, confirm=True)
else:
_clean_up_resources(repair_group_name, confirm=False)
Expand Down Expand Up @@ -305,9 +310,11 @@ def restore(cmd, vm_name, resource_group_name, disk_name=None, repair_vm_id=None
# Fetch source and repair VM data
source_vm = get_vm(cmd, resource_group_name, vm_name)
is_managed = _uses_managed_disk(source_vm)
repair_vm_id = parse_resource_id(repair_vm_id)
repair_vm_name = repair_vm_id['name']
repair_resource_group = repair_vm_id['resource_group']
if repair_vm_id:
logger.info('Repair VM ID: %s', repair_vm_id)
repair_vm_id = parse_resource_id(repair_vm_id)
repair_vm_name = repair_vm_id['name']
repair_resource_group = repair_vm_id['resource_group']
source_disk = None

# MANAGED DISK
Expand Down Expand Up @@ -379,6 +386,10 @@ def restore(cmd, vm_name, resource_group_name, disk_name=None, repair_vm_id=None

def run(cmd, vm_name, resource_group_name, run_id=None, repair_vm_id=None, custom_script_file=None, parameters=None, run_on_repair=False, preview=None):

# log method parameters
logger.debug('vm repair run parameters: vm_name: %s, resource_group_name: %s, run_id: %s, repair_vm_id: %s, custom_script_file: %s, parameters: %s, run_on_repair: %s, preview: %s',
vm_name, resource_group_name, run_id, repair_vm_id, custom_script_file, parameters, run_on_repair, preview)

# Init command helper object
command = command_helper(logger, cmd, 'vm repair run')
LINUX_RUN_SCRIPT_NAME = 'linux-run-driver.sh'
Expand All @@ -397,9 +408,13 @@ def run(cmd, vm_name, resource_group_name, run_id=None, repair_vm_id=None, custo
script_name = WINDOWS_RUN_SCRIPT_NAME

# If run_on_repair is False, then repair_vm is the source_vm (scripts run directly on source vm)
repair_vm_id = parse_resource_id(repair_vm_id)
repair_vm_name = repair_vm_id['name']
repair_resource_group = repair_vm_id['resource_group']
if run_on_repair:
repair_vm_id = parse_resource_id(repair_vm_id)
repair_vm_name = repair_vm_id['name']
repair_resource_group = repair_vm_id['resource_group']
else:
repair_vm_name = vm_name
repair_resource_group = resource_group_name

run_command_params = []
additional_scripts = []
Expand Down Expand Up @@ -650,3 +665,70 @@ def reset_nic(cmd, vm_name, resource_group_name, yes=False):
return_dict = command.init_return_dict()

return return_dict


def repair_and_restore(cmd, vm_name, resource_group_name, repair_password=None, repair_username=None, repair_vm_name=None, copy_disk_name=None, repair_group_name=None):
from datetime import datetime
import secrets
import string

# Init command helper object
command = command_helper(logger, cmd, 'vm repair repair-and-restore')

password_length = 30
password_characters = string.ascii_lowercase + string.digits + string.ascii_uppercase
repair_password = ''.join(secrets.choice(password_characters) for i in range(password_length))

username_length = 20
username_characters = string.ascii_lowercase + string.digits
repair_username = ''.join(secrets.choice(username_characters) for i in range(username_length))

timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S')
repair_vm_name = ('repair-' + vm_name)[:14] + '_'
copy_disk_name = vm_name + '-DiskCopy-' + timestamp
repair_group_name = 'repair-' + vm_name + '-' + timestamp
existing_rg = _check_existing_rg(repair_group_name)

create_out = create(cmd, vm_name, resource_group_name, repair_password, repair_username, repair_vm_name=repair_vm_name, copy_disk_name=copy_disk_name, repair_group_name=repair_group_name, associate_public_ip=False, yes=True)

# log create_out
logger.info('create_out: %s', create_out)

repair_vm_name = create_out['repair_vm_name']
copy_disk_name = create_out['copied_disk_name']
repair_group_name = create_out['repair_resource_group']

logger.info('Running fstab run command')

try:
run_out = run(cmd, repair_vm_name, repair_group_name, run_id='linux-alar2', parameters=["fstab"])

except Exception:
command.set_status_error()
command.error_stack_trace = traceback.format_exc()
command.error_message = "Command failed when running fstab script."
command.message = "Command failed when running fstab script."
if existing_rg:
_clean_up_resources(repair_group_name, confirm=True)
else:
_clean_up_resources(repair_group_name, confirm=False)
return

# log run_out
logger.info('run_out: %s', run_out)

if run_out['script_status'] == 'ERROR':
logger.error('fstab script returned an error.')
if existing_rg:
_clean_up_resources(repair_group_name, confirm=True)
else:
_clean_up_resources(repair_group_name, confirm=False)
return

logger.info('Running restore command')
show_vm_id = 'az vm show -g {g} -n {n} --query id -o tsv' \
.format(g=repair_group_name, n=repair_vm_name)

repair_vm_id = _call_az_command(show_vm_id)

restore(cmd, vm_name, resource_group_name, copy_disk_name, repair_vm_id, yes=True)
15 changes: 9 additions & 6 deletions src/vm-repair/azext_vm_repair/repair_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def _check_existing_rg(rg_name):
logger.error(azCommandError)
raise Exception('Unexpected error occured while fetching existing resource groups.')

logger.info('Resource group exists is \'%s\'', group_exists)
logger.info('Pre-existing repair resource group with the same name is \'%s\'', group_exists)
return group_exists


Expand Down Expand Up @@ -491,22 +491,17 @@ def _fetch_compatible_windows_os_urn(source_vm):

def _suse_image_selector(distro):
fetch_urn_command = 'az vm image list --publisher SUSE --offer {offer} --sku gen1 --verbose --all --query "[].urn | reverse(sort(@))" -o json'.format(offer=distro)
logger.info('Fetching compatible SUSE OS images from gallery...')
urns = loads(_call_az_command(fetch_urn_command))

# Raise exception when not finding SUSE image
if not urns:
raise SuseNotAvailableError()

logger.debug('Fetched urns: \n%s', urns)
# Returning the first URN as it is the latest image with no special use like HPC or SAP
logger.debug('Return the first URN : %s', urns[0])
return urns[0]


def _suse_image_selector_gen2(distro):
fetch_urn_command = 'az vm image list --publisher SUSE --offer {offer} --sku gen2 --verbose --all --query "[].urn | reverse(sort(@))" -o json'.format(offer=distro)
logger.info('Fetching compatible SUSE OS images from gallery...')
urns = loads(_call_az_command(fetch_urn_command))

# Raise exception when not finding SUSE image
Expand Down Expand Up @@ -711,6 +706,14 @@ def _unlock_encrypted_vm_run(repair_vm_name, repair_group_name, is_linux):


def _create_repair_vm(copy_disk_id, create_repair_vm_command, repair_password, repair_username, fix_uuid=False):

# logging all parameters of the function individually
logger.info('Creating repair VM with command: {}'.format(create_repair_vm_command))
logger.info('copy_disk_id: {}'.format(copy_disk_id))
logger.info('repair_password: {}'.format(repair_password))
logger.info('repair_username: {}'.format(repair_username))
logger.info('fix_uuid: {}'.format(fix_uuid))

if not fix_uuid:
create_repair_vm_command += ' --attach-data-disks {id}'.format(id=copy_disk_id)
logger.info('Validating VM template before continuing...')
Expand Down
25 changes: 25 additions & 0 deletions src/vm-repair/azext_vm_repair/tests/latest/test_repair_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,3 +717,28 @@ def test_vmrepair_ResetNicWindowsVM(self, resource_group):
vm_instance_view = self.cmd('vm get-instance-view -g {rg} -n {vm} -o json').get_output_in_json()
vm_power_state = vm_instance_view['instanceView']['statuses'][1]['code']
assert vm_power_state == 'PowerState/running'


@pytest.mark.repairandrestore
class RepairAndRestoreLinuxVM(LiveScenarioTest):

@ResourceGroupPreparer(location='westus2')
def test_vmrepair_RepairAndRestoreLinuxVM(self, resource_group):
self.kwargs.update({
'vm': 'vm1'
})

# Create test VM
self.cmd('vm create -g {rg} -n {vm} --admin-username azureadmin --image Win2016Datacenter --admin-password !Passw0rd2018')
vms = self.cmd('vm list -g {rg} -o json').get_output_in_json()
# Something wrong with vm create command if it fails here
assert len(vms) == 1

# Test Repair and restore
result = self.cmd('vm repair repair-and-restore -g {rg} -n {vm}')
assert result['status'] == STATUS_SUCCESS, result['error_message']

# Check swapped OS disk
vms = self.cmd('vm list -g {rg} -o json').get_output_in_json()
source_vm = vms[0]
assert source_vm['storageProfile']['osDisk']['name'] == result['copied_disk_name']
2 changes: 1 addition & 1 deletion src/vm-repair/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from codecs import open
from setuptools import setup, find_packages

VERSION = "0.5.3"
VERSION = "0.5.4"

CLASSIFIERS = [
'Development Status :: 4 - Beta',
Expand Down

0 comments on commit 3bb5485

Please sign in to comment.