Skip to content

Commit

Permalink
CIF - ServerError: ZonalAllocationFailed
Browse files Browse the repository at this point in the history
  • Loading branch information
rhamitarora committed Jul 16, 2024
1 parent 1b6e0e7 commit 49af5c6
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 0 deletions.
5 changes: 5 additions & 0 deletions hack/hive-config/hive-additional-install-log-regexes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ data:
name: AzureInvalidTemplateDeployment
searchRegexStrings:
- '"code":\w?"InvalidTemplateDeployment"'
- installFailingMessage: Deployment failed. Please see details for more information.
installFailingReason: AzureZonalAllocationFailed
name: AzureZonalAllocationFailed
searchRegexStrings:
- '"code\W*":\W*"ZonalAllocationFailed\W*"'
kind: ConfigMap
metadata:
creationTimestamp: null
Expand Down
10 changes: 10 additions & 0 deletions pkg/hive/failure/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ func HandleProvisionFailed(ctx context.Context, cd *hivev1.ClusterDeployment, co
AzureInvalidTemplateDeployment.Message,
*armError,
)
case AzureZonalAllocationFailed.Reason:
armError, err := parseDeploymentFailedJson(*installLog)
if err != nil {
return err
}

return wrapArmError(
AzureZonalAllocationFailed.Message,
*armError,
)
default:
return genericErr
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/hive/failure/reasons.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ var Reasons = []InstallFailingReason{
// priority over later ones.
AzureRequestDisallowedByPolicy,
AzureInvalidTemplateDeployment,
AzureZonalAllocationFailed,
}

var AzureRequestDisallowedByPolicy = InstallFailingReason{
Expand All @@ -36,3 +37,12 @@ var AzureInvalidTemplateDeployment = InstallFailingReason{
regexp.MustCompile(`"code":\w?"InvalidTemplateDeployment"`),
},
}

var AzureZonalAllocationFailed = InstallFailingReason{
Name: "AzureZonalAllocationFailed",
Reason: "AzureZonalAllocationFailed",
Message: "Deployment failed. Please see details for more information.",
SearchRegexes: []*regexp.Regexp{
regexp.MustCompile(`"code\W*":\W*"ZonalAllocationFailed\W*"`),
},
}
63 changes: 63 additions & 0 deletions pkg/hive/failure/reasons_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,69 @@ level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Insta
level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"InvalidTemplateDeployment","message":"The template deployment failed with multiple errors. Please see details for more information.","details":[{"additionalInfo":[],"code":"RequestDisallowedByPolicy","message":"Resource 'test-bootstrap' was disallowed by policy. Policy identifiers: ''.","target":"test-bootstrap"}]}`,
want: AzureRequestDisallowedByPolicy,
},
{
name: "ZonalAllocationFailed-1",
installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) level=info msg=InstanceMetadata: running on AzurePublicCloud level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1] level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2] level=info msg=resolving graph level=info
msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3] level=info msg=checking if graph exists level=info msg=save graph Generates the Ignition Config asset level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) level=info msg=InstanceMetadata: running on AzurePublicCloud level=info
msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph level=info msg=deploying resources template level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm]
encountered error: 400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":
[{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\":
[\r\n {\r\n \"code\": \"ZonalAllocationFailed\",\r\n \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null}
level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":
[{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\":
[\r\n {\r\n \"code\": \"ZonalAllocationFailed\",\r\n \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null}`,
want: AzureZonalAllocationFailed,
},
{
name: "ZonalAllocationFailed-2",
installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS)
level=info msg=InstanceMetadata: running on AzurePublicCloud
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1]
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2]
level=info msg=resolving graph
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3]
level=info msg=checking if graph exists level=info msg=save graph Generates the Ignition Config asset
level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS)
level=info msg=InstanceMetadata: running on AzurePublicCloud
level=info msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph
level=info msg=deploying resources template
level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] encountered error: 400: DeploymentFailed: : Deployment failed.
Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":
[{"code":"Conflict","message":"{ \"status\": \"Failed\",\"error\":{\"code\":\"ResourceDeploymentFailure\", \"message\":\"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\"details\":
[{\"code\":\"ZonalAllocationFailed\", \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"}]}}"}],"innererror":null,"additionalInfo":null}
level=error msg=400: DeploymentFailed: : Deployment failed.
Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details.
Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":[{"code":"Conflict","message":"{\"status\":\"Failed\",\"error\":
{\"code\":\"ResourceDeploymentFailure\",\"message\":\"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\"details\":
[{\"code\":\"ZonalAllocationFailed\", \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"}]}}"}],"innererror":null,"additionalInfo":null}`,
want: AzureZonalAllocationFailed,
},
{
name: "ZonalAllocationFailed-3",
installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS)
level=info msg=InstanceMetadata: running on AzurePublicCloud
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1]
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2]
level=info msg=resolving graph
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3]
level=info msg=checking if graph exists level=info msg=save graph Generates the Ignition Config asset
level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS)
level=info msg=InstanceMetadata: running on AzurePublicCloud
level=info msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph
level=info msg=deploying resources template
level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] encountered error: 400: DeploymentFailed: : Deployment failed.
Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":
[{"code":"Conflict","message":"{ "status": "Failed","error":{"code":"ResourceDeploymentFailure", "message":"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.","details":
[{"code":"ZonalAllocationFailed", "message": "Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance"}]}}"}],"innererror":null,"additionalInfo":null}
level=error msg=400: DeploymentFailed: : Deployment failed.
Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details.
Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":
[{"code":"Conflict","message":"{"status":"Failed","error":{"code":"ResourceDeploymentFailure","message":"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.","details":
[{"code":"ZonalAllocationFailed", "message": "Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance"}]}}"}],"innererror":null,"additionalInfo":null}`,
want: AzureZonalAllocationFailed,
},
} {
t.Run(tt.name, func(t *testing.T) {
// This test uses a "mock" version of Hive's real implementation for matching install logs against regex patterns.
Expand Down

0 comments on commit 49af5c6

Please sign in to comment.