Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CIF - ServerError: ZonalAllocationFailed #3691

Merged
merged 1 commit into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions hack/hive-config/hive-additional-install-log-regexes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ data:
name: AzureInvalidTemplateDeployment
searchRegexStrings:
- '"code":\w?"InvalidTemplateDeployment"'
- installFailingMessage: Deployment failed. Please see details for more information.
installFailingReason: AzureZonalAllocationFailed
name: AzureZonalAllocationFailed
searchRegexStrings:
- '"code\W*":\W*"ZonalAllocationFailed\W*"'
kind: ConfigMap
metadata:
creationTimestamp: null
Expand Down
10 changes: 10 additions & 0 deletions pkg/hive/failure/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ func HandleProvisionFailed(ctx context.Context, cd *hivev1.ClusterDeployment, co
AzureInvalidTemplateDeployment.Message,
*armError,
)
case AzureZonalAllocationFailed.Reason:
armError, err := parseDeploymentFailedJson(*installLog)
if err != nil {
return err
}

return wrapArmError(
AzureZonalAllocationFailed.Message,
*armError,
)
default:
return genericErr
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/hive/failure/reasons.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ var Reasons = []InstallFailingReason{
// priority over later ones.
AzureRequestDisallowedByPolicy,
AzureInvalidTemplateDeployment,
AzureZonalAllocationFailed,
}

var AzureRequestDisallowedByPolicy = InstallFailingReason{
Expand All @@ -36,3 +37,12 @@ var AzureInvalidTemplateDeployment = InstallFailingReason{
regexp.MustCompile(`"code":\w?"InvalidTemplateDeployment"`),
},
}

var AzureZonalAllocationFailed = InstallFailingReason{
Name: "AzureZonalAllocationFailed",
Reason: "AzureZonalAllocationFailed",
Message: "Deployment failed. Please see details for more information.",
SearchRegexes: []*regexp.Regexp{
regexp.MustCompile(`"code\W*":\W*"ZonalAllocationFailed\W*"`),
},
}
63 changes: 63 additions & 0 deletions pkg/hive/failure/reasons_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,69 @@ level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Insta
level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"InvalidTemplateDeployment","message":"The template deployment failed with multiple errors. Please see details for more information.","details":[{"additionalInfo":[],"code":"RequestDisallowedByPolicy","message":"Resource 'test-bootstrap' was disallowed by policy. Policy identifiers: ''.","target":"test-bootstrap"}]}`,
want: AzureRequestDisallowedByPolicy,
},
{
name: "ZonalAllocationFailed-1",
installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) level=info msg=InstanceMetadata: running on AzurePublicCloud level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1] level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2] level=info msg=resolving graph level=info
msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3] level=info msg=checking if graph exists level=info msg=save graph Generates the Ignition Config asset level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) level=info msg=InstanceMetadata: running on AzurePublicCloud level=info
msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph level=info msg=deploying resources template level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm]
encountered error: 400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":
[{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\":
[\r\n {\r\n \"code\": \"ZonalAllocationFailed\",\r\n \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null}
level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":
[{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\":
[\r\n {\r\n \"code\": \"ZonalAllocationFailed\",\r\n \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null}`,
want: AzureZonalAllocationFailed,
},
{
name: "ZonalAllocationFailed-2",
installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS)
level=info msg=InstanceMetadata: running on AzurePublicCloud
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1]
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2]
level=info msg=resolving graph
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3]
level=info msg=checking if graph exists level=info msg=save graph Generates the Ignition Config asset
level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS)
level=info msg=InstanceMetadata: running on AzurePublicCloud
level=info msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph
level=info msg=deploying resources template
level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] encountered error: 400: DeploymentFailed: : Deployment failed.
Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":
[{"code":"Conflict","message":"{ \"status\": \"Failed\",\"error\":{\"code\":\"ResourceDeploymentFailure\", \"message\":\"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\"details\":
[{\"code\":\"ZonalAllocationFailed\", \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"}]}}"}],"innererror":null,"additionalInfo":null}

level=error msg=400: DeploymentFailed: : Deployment failed.
Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details.
Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":[{"code":"Conflict","message":"{\"status\":\"Failed\",\"error\":
{\"code\":\"ResourceDeploymentFailure\",\"message\":\"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\"details\":
[{\"code\":\"ZonalAllocationFailed\", \"message\": \"Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance\"}]}}"}],"innererror":null,"additionalInfo":null}`,
want: AzureZonalAllocationFailed,
},
{
name: "ZonalAllocationFailed-3",
installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS)
level=info msg=InstanceMetadata: running on AzurePublicCloud
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1]
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2]
level=info msg=resolving graph
level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3]
level=info msg=checking if graph exists level=info msg=save graph Generates the Ignition Config asset
level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS)
level=info msg=InstanceMetadata: running on AzurePublicCloud
level=info msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph
level=info msg=deploying resources template
level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] encountered error: 400: DeploymentFailed: : Deployment failed.
Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":
[{"code":"Conflict","message":"{ "status": "Failed","error":{"code":"ResourceDeploymentFailure", "message":"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.","details":
[{"code":"ZonalAllocationFailed", "message": "Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance"}]}}"}],"innererror":null,"additionalInfo":null}

level=error msg=400: DeploymentFailed: : Deployment failed.
Details: : :{"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details.
Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":
[{"code":"Conflict","message":"{"status":"Failed","error":{"code":"ResourceDeploymentFailure","message":"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.","details":
[{"code":"ZonalAllocationFailed", "message": "Allocation failed. We do not have sufficient capacity for the requested VM size in this zone. Read more about improving likelihood of allocation success at http://aka.ms/allocation-guidance"}]}}"}],"innererror":null,"additionalInfo":null}`,
want: AzureZonalAllocationFailed,
},
} {
t.Run(tt.name, func(t *testing.T) {
// This test uses a "mock" version of Hive's real implementation for matching install logs against regex patterns.
Expand Down
Loading