Skip to content

Commit

Permalink
AKS: Add validation for PremiumV2 disks and fixes in error messages (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
mukhoakash authored Jul 24, 2024
1 parent 3ca3d51 commit 0f1ef37
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 27 deletions.
5 changes: 5 additions & 0 deletions src/aks-preview/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,13 @@ To release a new version, please select a new version number (usually plus 1 to

Pending
+++++++

7.0.0b2
++++++++
* Update the minimum required cli core version to `2.61.0`.
* Add option `--enable-imds-restriction --disable-imds-restriction` to `az aks create` and `az aks update`.
* Introduce valdations to `az aks create` and `az aks update` while using PremiumV2 disk during enabling Azure Container Storage.
* Delete the Azure Container Storage installation after failure to prevent retries.

7.0.0b1
++++++++
Expand Down
2 changes: 1 addition & 1 deletion src/aks-preview/azext_aks_preview/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -1323,7 +1323,7 @@ def load_arguments(self, _):
c.argument(
"disable_azure_container_storage",
arg_type=get_enum_type(disable_storage_pool_types),
help="disable azure container storage or any one of the storagepool types",
help="disable azure container storage or any one of the storage pool types",
)
c.argument(
"storage_pool_name",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def validate_storagepool_creation(
if not role_assignment_success:
raise UnknownError(
f"Cannot set --enable-azure-container-storage to {CONST_STORAGE_POOL_TYPE_ELASTIC_SAN}. "
"Unable to add Role Assignments needed for Elastic SAN storagepools to be functional. "
"Unable to add Role Assignments needed for Elastic SAN storage pools to be functional. "
"Please check with your admin for permissions."
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
CONST_STORAGE_POOL_OPTION_SSD,
CONST_STORAGE_POOL_SKU_PREMIUM_LRS,
CONST_STORAGE_POOL_SKU_PREMIUM_ZRS,
CONST_STORAGE_POOL_SKU_PREMIUMV2_LRS,
CONST_STORAGE_POOL_TYPE_AZURE_DISK,
CONST_STORAGE_POOL_TYPE_ELASTIC_SAN,
CONST_STORAGE_POOL_TYPE_EPHEMERAL_DISK,
Expand Down Expand Up @@ -137,7 +138,7 @@ def validate_disable_azure_container_storage_params( # pylint: disable=too-many
if is_storagepool_type_not_active:
raise ArgumentUsageError(
'Invalid --disable-azure-container-storage value. '
'Azure Container Storage is not enabled for storagepool '
'Azure Container Storage is not enabled for storage pool '
f'type {storage_pool_type} in the cluster.'
)

Expand Down Expand Up @@ -166,8 +167,8 @@ def validate_disable_azure_container_storage_params( # pylint: disable=too-many

if number_of_storagepool_types_active == number_of_storagepool_types_to_be_disabled:
raise ArgumentUsageError(
f'Since {storage_pool_type} is the only storagepool type enabled for Azure Container Storage, '
'disabling the storagepool type will lead to disabling Azure Container Storage from the cluster. '
f'Since {storage_pool_type} is the only storage pool type enabled for Azure Container Storage, '
'disabling the storage pool type will lead to disabling Azure Container Storage from the cluster. '
f'To disable Azure Container Storage, set --disable-azure-container-storage to {CONST_ACSTOR_ALL}.'
)

Expand Down Expand Up @@ -362,6 +363,7 @@ def validate_enable_azure_container_storage_params( # pylint: disable=too-many-
agentpool_details,
storage_pool_type,
storage_pool_option,
storage_pool_sku,
is_extension_installed
)

Expand All @@ -372,7 +374,7 @@ def validate_enable_azure_container_storage_params( # pylint: disable=too-many-
storage_pool_type == CONST_STORAGE_POOL_TYPE_ELASTIC_SAN):
raise ArgumentUsageError(
'Invalid --enable-azure-container-storage value. '
'Azure Container Storage is already enabled for storagepool type '
'Azure Container Storage is already enabled for storage pool type '
f'{storage_pool_type} in the cluster.'
)

Expand All @@ -388,7 +390,7 @@ def validate_enable_azure_container_storage_params( # pylint: disable=too-many-
is_ephemeralDisk_localssd_enabled else CONST_STORAGE_POOL_OPTION_NVME
raise ArgumentUsageError(
'Invalid --enable-azure-container-storage value. '
'Azure Container Storage is already enabled for storagepool type '
'Azure Container Storage is already enabled for storage pool type '
f'{CONST_STORAGE_POOL_TYPE_EPHEMERAL_DISK} and option {ephemeral_disk_type_installed} '
'in the cluster.'
)
Expand Down Expand Up @@ -427,11 +429,12 @@ def _validate_storage_pool_size(storage_pool_size, storage_pool_type):
)


def _validate_nodepools( # pylint: disable=too-many-branches
def _validate_nodepools( # pylint: disable=too-many-branches,too-many-locals
nodepool_list,
agentpool_details,
storage_pool_type,
storage_pool_option,
storage_pool_sku,
is_extension_installed,
):
nodepool_arr = []
Expand All @@ -444,7 +447,7 @@ def _validate_nodepools( # pylint: disable=too-many-branches
if nodepool_list is not None:
raise ArgumentUsageError(
'Cannot set --azure-container-storage-nodepools while using '
'--enable-azure-container-storage to enable a type of storagepool '
'--enable-azure-container-storage to enable a type of storage pool '
'in a cluster where Azure Container Storage is already installed.'
)

Expand All @@ -458,12 +461,12 @@ def _validate_nodepools( # pylint: disable=too-many-branches

if len(nodepool_arr) == 0:
raise ArgumentUsageError(
f'Cannot enable Azure Container Storage storagepool of type {storage_pool_type} '
f'Cannot enable Azure Container Storage storage pool of type {storage_pool_type} '
'since none of the nodepools in the cluster are labelled for Azure Container Storage.'
)

insufficient_core_error = (
f'Cannot enable Azure Container Storage storagepool type: {storage_pool_type} '
f'Cannot enable Azure Container Storage storage pool type: {storage_pool_type} '
'on a node pool consisting of nodes with cores less than 4. '
'Node pool: {0} with node size: {1} has nodes with {2} cores. '
f'Remove the label {CONST_ACSTOR_IO_ENGINE_LABEL_KEY}={CONST_ACSTOR_IO_ENGINE_LABEL_VAL} '
Expand All @@ -487,6 +490,7 @@ def _validate_nodepools( # pylint: disable=too-many-branches

nvme_nodepool_found = False
available_node_count = 0
multi_zoned_cluster = False
for nodepool in nodepool_arr:
for agentpool in agentpool_details:
pool_name = agentpool.get("name")
Expand Down Expand Up @@ -524,6 +528,10 @@ def _validate_nodepools( # pylint: disable=too-many-branches
if node_count is not None:
available_node_count = available_node_count + node_count

zoned_nodepool = agentpool.get("zoned")
if zoned_nodepool:
multi_zoned_cluster = True

if available_node_count < 3:
raise UnknownError(
'Insufficient nodes present. Azure Container Storage requires atleast 3 nodes to be enabled.'
Expand All @@ -537,6 +545,15 @@ def _validate_nodepools( # pylint: disable=too-many-branches
'as none of the node pools can support ephemeral NVMe disk.'
)

if storage_pool_type == CONST_STORAGE_POOL_TYPE_AZURE_DISK and \
storage_pool_sku == CONST_STORAGE_POOL_SKU_PREMIUMV2_LRS and \
not multi_zoned_cluster:
raise ArgumentUsageError(
f'Cannot set --storage-pool-sku as {CONST_STORAGE_POOL_SKU_PREMIUMV2_LRS} '
'as none of the node pools are zoned. Please add a zoned node pool and '
'try again.'
)


# _validate_nodepool_names validates that the nodepool_list is a comma separated
# string consisting of valid nodepool names i.e. a lower alphanumeric
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ def perform_enable_azure_container_storage( # pylint: disable=too-many-statemen
]

update_after_exception = False
delete_extension = False
try:
if is_extension_installed:
result = k8s_extension_custom_mod.update_k8s_extension(
Expand Down Expand Up @@ -260,6 +261,7 @@ def perform_enable_azure_container_storage( # pylint: disable=too-many-statemen
"Please run `az aks update` along with `--enable-azure-container-storage` "
"to enable Azure Container Storage."
)
delete_extension = True
else:
if is_extension_installed:
update_after_exception = True
Expand Down Expand Up @@ -310,8 +312,20 @@ def perform_enable_azure_container_storage( # pylint: disable=too-many-statemen
)
else:
logger.error("AKS update to enable Azure Container Storage failed.\nError: %s", ex)
delete_extension = True

if is_storagepool_create_op_required or update_after_exception:
if delete_extension:
k8s_extension_custom_mod.delete_k8s_extension(
cmd,
client,
resource_group,
cluster_name,
CONST_EXT_INSTALLATION_NAME,
"managedClusters",
yes=True,
no_wait=True,
)
elif is_storagepool_create_op_required or update_after_exception:
k8s_extension_custom_mod.update_k8s_extension(
cmd,
client,
Expand Down
16 changes: 9 additions & 7 deletions src/aks-preview/azext_aks_preview/managed_cluster_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3410,6 +3410,7 @@ def set_up_azure_container_storage(self, mc: ManagedCluster) -> ManagedCluster:
pool_details["os_type"] = agentpool.os_type
pool_details["mode"] = agentpool.mode
pool_details["node_taints"] = agentpool.node_taints
pool_details["zoned"] = agentpool.availability_zones is not None
agentpool_details.append(pool_details)
# Marking the only agentpool name as the valid nodepool for
# installing Azure Container Storage during `az aks create`
Expand Down Expand Up @@ -4202,6 +4203,7 @@ def update_azure_container_storage(self, mc: ManagedCluster) -> ManagedCluster:
pool_details["os_type"] = agentpool.os_type
pool_details["mode"] = agentpool.mode
pool_details["node_taints"] = agentpool.node_taints
pool_details["zoned"] = agentpool.availability_zones is not None
if agentpool.node_labels is not None:
node_labels = agentpool.node_labels
if node_labels is not None and \
Expand Down Expand Up @@ -4330,22 +4332,22 @@ def update_azure_container_storage(self, mc: ManagedCluster) -> ManagedCluster:
pre_disable_validate = False

msg = (
"Disabling Azure Container Storage will forcefully delete all the storagepools in the cluster "
"and affect the applications using these storagepools. Forceful deletion of storagepools can "
"Disabling Azure Container Storage will forcefully delete all the storage pools in the cluster "
"and affect the applications using these storage pools. Forceful deletion of storage pools can "
"also lead to leaking of storage resources which are being consumed. Do you want to validate "
"whether any of the storagepools are being used before disabling Azure Container Storage?"
"whether any of the storage pools are being used before disabling Azure Container Storage?"
)

from azext_aks_preview.azurecontainerstorage._consts import (
CONST_ACSTOR_ALL,
)
if disable_pool_type != CONST_ACSTOR_ALL:
msg = (
f"Disabling Azure Container Storage for storagepool type {disable_pool_type} "
"will forcefully delete all the storagepools of the same type and affect the "
"applications using these storagepools. Forceful deletion of storagepools can "
f"Disabling Azure Container Storage for storage pool type {disable_pool_type} "
"will forcefully delete all the storage pools of the same type and affect the "
"applications using these storage pools. Forceful deletion of storage pools can "
"also lead to leaking of storage resources which are being consumed. Do you want to "
f"validate whether any of the storagepools of type {disable_pool_type} are being used "
f"validate whether any of the storage pools of type {disable_pool_type} are being used "
"before disabling Azure Container Storage?"
)
if self.context.get_yes() or prompt_y_n(msg, default="y"):
Expand Down
33 changes: 26 additions & 7 deletions src/aks-preview/azext_aks_preview/tests/latest/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ def test_disable_type_when_not_enabled(self):
is_azureDisk_enabled = False
err = (
"Invalid --disable-azure-container-storage value. "
"Azure Container Storage is not enabled for storagepool "
"Azure Container Storage is not enabled for storage pool "
"type {0} in the cluster.".format(pool_type)
)
with self.assertRaises(ArgumentUsageError) as cm:
Expand All @@ -840,8 +840,8 @@ def test_disable_type_when_not_enabled(self):
def test_disable_only_storage_pool_installed(self):
pool_type = acstor_consts.CONST_STORAGE_POOL_TYPE_AZURE_DISK
err = (
"Since azureDisk is the only storagepool type enabled for Azure Container Storage, "
"disabling the storagepool type will lead to disabling Azure Container Storage from the cluster. "
"Since azureDisk is the only storage pool type enabled for Azure Container Storage, "
"disabling the storage pool type will lead to disabling Azure Container Storage from the cluster. "
"To disable Azure Container Storage, set --disable-azure-container-storage to all."
)
with self.assertRaises(ArgumentUsageError) as cm:
Expand All @@ -854,8 +854,8 @@ def test_disable_only_storagepool_type_enabled(self):
pool_type = acstor_consts.CONST_STORAGE_POOL_TYPE_AZURE_DISK
is_azureDisk_enabled = True
err = (
"Since azureDisk is the only storagepool type enabled for Azure Container Storage, "
"disabling the storagepool type will lead to disabling Azure Container Storage from the cluster. "
"Since azureDisk is the only storage pool type enabled for Azure Container Storage, "
"disabling the storage pool type will lead to disabling Azure Container Storage from the cluster. "
"To disable Azure Container Storage, set --disable-azure-container-storage to all."
)
with self.assertRaises(ArgumentUsageError) as cm:
Expand Down Expand Up @@ -918,6 +918,25 @@ def test_enable_with_sku_and_elastic_san_pool(self):
)
self.assertEqual(str(cm.exception), err)

def test_enable_with_premiumv2_sku_and_azure_disk(self):
storage_pool_name = "valid-name"
storage_pool_sku = acstor_consts.CONST_STORAGE_POOL_SKU_PREMIUMV2_LRS
storage_pool_type = acstor_consts.CONST_STORAGE_POOL_TYPE_AZURE_DISK
nodepool_list = "pool1"
agentpools = [{"name": "pool1", "vm_size": "Standard_L8s_v3", "count": 3, "zoned": False}]
err = (
"Cannot set --storage-pool-sku as {0} "
"as none of the node pools are zoned. "
"Please add a zoned node pool and try again.".format(
storage_pool_sku
)
)
with self.assertRaises(ArgumentUsageError) as cm:
acstor_validator.validate_enable_azure_container_storage_params(
storage_pool_type, storage_pool_name, storage_pool_sku, None, None, nodepool_list, agentpools, False, False, False, False, False, None, None, acstor_consts.CONST_DISK_TYPE_EPHEMERAL_VOLUME_ONLY, acstor_consts.CONST_EPHEMERAL_NVME_PERF_TIER_STANDARD
)
self.assertEqual(str(cm.exception), err)

def test_enable_with_option_and_non_ephemeral_disk_pool(self):
storage_pool_name = "valid-name"
storage_pool_option = acstor_consts.CONST_STORAGE_POOL_OPTION_NVME
Expand Down Expand Up @@ -1199,7 +1218,7 @@ def test_extension_installed_nodepool_list_defined(self):
nodepool_list = "nodepool1,nodepool2"
err = (
"Cannot set --azure-container-storage-nodepools while using "
"--enable-azure-container-storage to enable a type of storagepool "
"--enable-azure-container-storage to enable a type of storage pool "
"in a cluster where Azure Container Storage is already installed."
)
with self.assertRaises(ArgumentUsageError) as cm:
Expand All @@ -1216,7 +1235,7 @@ def test_extension_installed_storagepool_type_installed(self):
agentpools = [{"name": "nodepool1", "node_labels": {"acstor.azure.com/io-engine": "acstor"}, "count": 3}, {"name": "nodepool2"}]
err = (
"Invalid --enable-azure-container-storage value. "
"Azure Container Storage is already enabled for storagepool type "
"Azure Container Storage is already enabled for storage pool type "
"{0} in the cluster.".format(storage_pool_type)
)
with self.assertRaises(ArgumentUsageError) as cm:
Expand Down
2 changes: 1 addition & 1 deletion src/aks-preview/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from setuptools import setup, find_packages

VERSION = "7.0.0b1"
VERSION = "7.0.0b2"

CLASSIFIERS = [
"Development Status :: 4 - Beta",
Expand Down

0 comments on commit 0f1ef37

Please sign in to comment.