diff --git a/hack/hive-config/hive-additional-install-log-regexes.yaml b/hack/hive-config/hive-additional-install-log-regexes.yaml index ad5adc77410..5439c990e9b 100644 --- a/hack/hive-config/hive-additional-install-log-regexes.yaml +++ b/hack/hive-config/hive-additional-install-log-regexes.yaml @@ -12,6 +12,11 @@ data: name: AzureInvalidTemplateDeployment searchRegexStrings: - '"code":\w?"InvalidTemplateDeployment"' + - installFailingMessage: Deployment failed. Please see details for more information. + installFailingReason: AzureZonalAllocationFailed + name: AzureZonalAllocationFailed + searchRegexStrings: + - '"code\W*":\W*"ZonalAllocationFailed\W*"' kind: ConfigMap metadata: creationTimestamp: null diff --git a/pkg/hive/failure/handler.go b/pkg/hive/failure/handler.go index 7d4a78b393a..0488a7e93dd 100644 --- a/pkg/hive/failure/handler.go +++ b/pkg/hive/failure/handler.go @@ -50,6 +50,16 @@ func HandleProvisionFailed(ctx context.Context, cd *hivev1.ClusterDeployment, co AzureInvalidTemplateDeployment.Message, *armError, ) + case AzureZonalAllocationFailed.Reason: + armError, err := parseDeploymentFailedJson(*installLog) + if err != nil { + return err + } + + return wrapArmError( + AzureZonalAllocationFailed.Message, + *armError, + ) default: return genericErr } diff --git a/pkg/hive/failure/reasons.go b/pkg/hive/failure/reasons.go index 414e0ed03e8..449903b83fe 100644 --- a/pkg/hive/failure/reasons.go +++ b/pkg/hive/failure/reasons.go @@ -17,6 +17,7 @@ var Reasons = []InstallFailingReason{ // priority over later ones. AzureRequestDisallowedByPolicy, AzureInvalidTemplateDeployment, + AzureZonalAllocationFailed, } var AzureRequestDisallowedByPolicy = InstallFailingReason{ @@ -36,3 +37,12 @@ var AzureInvalidTemplateDeployment = InstallFailingReason{ regexp.MustCompile(`"code":\w?"InvalidTemplateDeployment"`), }, } + +var AzureZonalAllocationFailed = InstallFailingReason{ + Name: "AzureZonalAllocationFailed", + Reason: "AzureZonalAllocationFailed", + Message: "Deployment failed. Please see details for more information.", + SearchRegexes: []*regexp.Regexp{ + regexp.MustCompile(`"code\W*":\W*"ZonalAllocationFailed\W*"`), + }, +} diff --git a/pkg/hive/failure/reasons_test.go b/pkg/hive/failure/reasons_test.go index 19c4fd09c1d..1573b02baa4 100644 --- a/pkg/hive/failure/reasons_test.go +++ b/pkg/hive/failure/reasons_test.go @@ -63,6 +63,69 @@ level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Insta level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"InvalidTemplateDeployment","message":"The template deployment failed with multiple errors. Please see details for more information.","details":[{"additionalInfo":[],"code":"RequestDisallowedByPolicy","message":"Resource 'test-bootstrap' was disallowed by policy. Policy identifiers: ''.","target":"test-bootstrap"}]}`, want: AzureRequestDisallowedByPolicy, }, + { + name: "ZonalAllocationFailed-1", + installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) level=info msg=InstanceMetadata: running on AzurePublicCloud level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1] level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2] level=info msg=resolving graph level=info + msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3] level=info msg=checking if graph exists level=info msg=save graph Generates the Ignition Config asset level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) level=info msg=InstanceMetadata: running on AzurePublicCloud level=info + msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph level=info msg=deploying resources template level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] + encountered error: 400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details": + [{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\": + [\r\n {\r\n \"code\": \"ZonalAllocationFailed\",\r\n \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null} + level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details": + [{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\": + [\r\n {\r\n \"code\": \"ZonalAllocationFailed\",\r\n \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null}`, + want: AzureZonalAllocationFailed, + }, + { + name: "ZonalAllocationFailed-2", + installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) + level=info msg=InstanceMetadata: running on AzurePublicCloud + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1] + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2] + level=info msg=resolving graph + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3] + level=info msg=checking if graph exists level=info msg=save graph Generates the Ignition Config asset + level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) + level=info msg=InstanceMetadata: running on AzurePublicCloud + level=info msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph + level=info msg=deploying resources template + level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] encountered error: 400: DeploymentFailed: : Deployment failed. + Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details": + [{"code":"Conflict","message":"{ \"status\": \"Failed\",\"error\":{\"code\":\"ResourceDeploymentFailure\", \"message\":\"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\"details\": + [{\"code\":\"ZonalAllocationFailed\", \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"}]}}"}],"innererror":null,"additionalInfo":null} + + level=error msg=400: DeploymentFailed: : Deployment failed. + Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. + Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":[{"code":"Conflict","message":"{\"status\":\"Failed\",\"error\": + {\"code\":\"ResourceDeploymentFailure\",\"message\":\"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\"details\": + [{\"code\":\"ZonalAllocationFailed\", \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"}]}}"}],"innererror":null,"additionalInfo":null}`, + want: AzureZonalAllocationFailed, + }, + { + name: "ZonalAllocationFailed-3", + installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) + level=info msg=InstanceMetadata: running on AzurePublicCloud + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1] + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2] + level=info msg=resolving graph + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3] + level=info msg=checking if graph exists level=info msg=save graph Generates the Ignition Config asset + level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) + level=info msg=InstanceMetadata: running on AzurePublicCloud + level=info msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph + level=info msg=deploying resources template + level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] encountered error: 400: DeploymentFailed: : Deployment failed. + Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details": + [{"code":"Conflict","message":"{ "status": "Failed","error":{"code":"ResourceDeploymentFailure", "message":"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.","details": + [{"code":"ZonalAllocationFailed", "message": "Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance"}]}}"}],"innererror":null,"additionalInfo":null} + + level=error msg=400: DeploymentFailed: : Deployment failed. + Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. + Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details": + [{"code":"Conflict","message":"{"status":"Failed","error":{"code":"ResourceDeploymentFailure","message":"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.","details": + [{"code":"ZonalAllocationFailed", "message": "Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance"}]}}"}],"innererror":null,"additionalInfo":null}`, + want: AzureZonalAllocationFailed, + }, } { t.Run(tt.name, func(t *testing.T) { // This test uses a "mock" version of Hive's real implementation for matching install logs against regex patterns.