diff --git a/hack/hive-config/hive-additional-install-log-regexes.yaml b/hack/hive-config/hive-additional-install-log-regexes.yaml index ad5adc77410..2dcd3f3868a 100644 --- a/hack/hive-config/hive-additional-install-log-regexes.yaml +++ b/hack/hive-config/hive-additional-install-log-regexes.yaml @@ -12,6 +12,12 @@ data: name: AzureInvalidTemplateDeployment searchRegexStrings: - '"code":\w?"InvalidTemplateDeployment"' + - installFailingMessage: Allocation failed. We do not have sufficient capacity for + the requested VM size in this zone. + installFailingReason: AzureZonalAllocationFailed + name: AzureZonalAllocationFailed + searchRegexStrings: + - '"code":\w?"DeploymentFailed".*"code":\w?"ZonalAllocationFailed"' kind: ConfigMap metadata: creationTimestamp: null diff --git a/pkg/api/error.go b/pkg/api/error.go index 5e124f765fd..a27cfc5718e 100644 --- a/pkg/api/error.go +++ b/pkg/api/error.go @@ -103,6 +103,7 @@ const ( CloudErrorCodeScopeLocked = "ScopeLocked" CloudErrorCodeRequestDisallowedByPolicy = "RequestDisallowedByPolicy" CloudErrorCodeInvalidNetworkAddress = "InvalidNetworkAddress" + CloudErrorCodeZonalAllocationFailed = "ZonalAllocationFailed" ) // NewCloudError returns a new CloudError diff --git a/pkg/hive/failure/handler.go b/pkg/hive/failure/handler.go index 7d4a78b393a..0488a7e93dd 100644 --- a/pkg/hive/failure/handler.go +++ b/pkg/hive/failure/handler.go @@ -50,6 +50,16 @@ func HandleProvisionFailed(ctx context.Context, cd *hivev1.ClusterDeployment, co AzureInvalidTemplateDeployment.Message, *armError, ) + case AzureZonalAllocationFailed.Reason: + armError, err := parseDeploymentFailedJson(*installLog) + if err != nil { + return err + } + + return wrapArmError( + AzureZonalAllocationFailed.Message, + *armError, + ) default: return genericErr } diff --git a/pkg/hive/failure/reasons.go b/pkg/hive/failure/reasons.go index 414e0ed03e8..6666f9a4bbf 100644 --- a/pkg/hive/failure/reasons.go +++ b/pkg/hive/failure/reasons.go @@ -17,6 +17,7 @@ var Reasons = []InstallFailingReason{ // priority over later ones. AzureRequestDisallowedByPolicy, AzureInvalidTemplateDeployment, + AzureZonalAllocationFailed, } var AzureRequestDisallowedByPolicy = InstallFailingReason{ @@ -36,3 +37,12 @@ var AzureInvalidTemplateDeployment = InstallFailingReason{ regexp.MustCompile(`"code":\w?"InvalidTemplateDeployment"`), }, } + +var AzureZonalAllocationFailed = InstallFailingReason{ + Name: "AzureZonalAllocationFailed", + Reason: "AzureZonalAllocationFailed", + Message: "Allocation failed. We do not have sufficient capacity for the requested VM size in this zone.", + SearchRegexes: []*regexp.Regexp{ + regexp.MustCompile(`"code":\w?"DeploymentFailed".*"code":\w?"ZonalAllocationFailed"`), + }, +} diff --git a/pkg/hive/failure/reasons_test.go b/pkg/hive/failure/reasons_test.go index 19c4fd09c1d..ce0750de309 100644 --- a/pkg/hive/failure/reasons_test.go +++ b/pkg/hive/failure/reasons_test.go @@ -63,6 +63,30 @@ level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Insta level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"InvalidTemplateDeployment","message":"The template deployment failed with multiple errors. Please see details for more information.","details":[{"additionalInfo":[],"code":"RequestDisallowedByPolicy","message":"Resource 'test-bootstrap' was disallowed by policy. Policy identifiers: ''.","target":"test-bootstrap"}]}`, want: AzureRequestDisallowedByPolicy, }, + { + name: "ResourceDeploymentFailure - ZonalAllocationFailed", + installLog: ` +level=info msg=running in local development mode +level=info msg=creating development InstanceMetadata +level=info msg=InstanceMetadata: running on AzurePublicCloud +level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1] +level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2] +level=info msg=resolving graph +level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3] +level=info msg=checking if graph exists +level=info msg=save graph +Generates the Ignition Config asset + +level=info msg=running in local development mode +level=info msg=creating development InstanceMetadata +level=info msg=InstanceMetadata: running on AzurePublicCloud +level=info msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] +level=info msg=load persisted graph +level=info msg=deploying resources template +level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] encountered error: 400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","details":[{"additionalInfo":[],"code":"ZonalAllocationFailed","message":"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone.","target":"null"}]} +level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","details":[{"additionalInfo":[],"code":"ZonalAllocationFailed","message":"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone.","target":"null"}]}`, + want: AzureZonalAllocationFailed, + }, } { t.Run(tt.name, func(t *testing.T) { // This test uses a "mock" version of Hive's real implementation for matching install logs against regex patterns.