Skip to content

Commit

Permalink
CIF: OSProvisioningTimedOut
Browse files Browse the repository at this point in the history
  • Loading branch information
rhamitarora committed Jul 13, 2024
1 parent e711e61 commit 42df7bf
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 0 deletions.
5 changes: 5 additions & 0 deletions hack/hive-config/hive-additional-install-log-regexes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ data:
name: AzureInvalidTemplateDeployment
searchRegexStrings:
- '"code":\w?"InvalidTemplateDeployment"'
- installFailingMessage: OS Provisioning for VM, didn't finished in the allotted time. Please check provisioning state later.
installFailingReason: AzureOSProvisioningTimedOut
name: AzureOSProvisioningTimedOut
searchRegexStrings:
- '"code\\":\W\\"OSProvisioningTimedOut\\"'
kind: ConfigMap
metadata:
creationTimestamp: null
Expand Down
1 change: 1 addition & 0 deletions pkg/api/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ const (
CloudErrorCodeInvalidNetworkAddress = "InvalidNetworkAddress"
CloudErrorCodeThrottlingLimitExceeded = "ThrottlingLimitExceeded"
CloudErrorCodeInvalidCIDRRange = "InvalidCIDRRange"
CloudErrorCodeOSProvisioningTimedOut = "OSProvisioningTimedOut"
)

// NewCloudError returns a new CloudError
Expand Down
10 changes: 10 additions & 0 deletions pkg/hive/failure/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ func HandleProvisionFailed(ctx context.Context, cd *hivev1.ClusterDeployment, co
AzureInvalidTemplateDeployment.Message,
*armError,
)
case AzureOSProvisioningTimedOut.Reason:
armError, err := parseDeploymentFailedJson(*installLog)
if err != nil {
return err
}

return wrapArmError(
AzureOSProvisioningTimedOut.Message,
*armError,
)
default:
return genericErr
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/hive/failure/reasons.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ var Reasons = []InstallFailingReason{
// priority over later ones.
AzureRequestDisallowedByPolicy,
AzureInvalidTemplateDeployment,
AzureOSProvisioningTimedOut,
}

var AzureRequestDisallowedByPolicy = InstallFailingReason{
Expand All @@ -36,3 +37,12 @@ var AzureInvalidTemplateDeployment = InstallFailingReason{
regexp.MustCompile(`"code":\w?"InvalidTemplateDeployment"`),
},
}

var AzureOSProvisioningTimedOut = InstallFailingReason{
Name: "AzureOSProvisioningTimedOut",
Reason: "AzureOSProvisioningTimedOut",
Message: "OS Provisioning for VM, didn't finished in the allotted time. Please check provisioning state later.",
SearchRegexes: []*regexp.Regexp{
regexp.MustCompile(`"code\\":\W\\"OSProvisioningTimedOut\\"`),
},
}
5 changes: 5 additions & 0 deletions pkg/hive/failure/reasons_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Insta
level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"InvalidTemplateDeployment","message":"The template deployment failed with multiple errors. Please see details for more information.","details":[{"additionalInfo":[],"code":"RequestDisallowedByPolicy","message":"Resource 'test-bootstrap' was disallowed by policy. Policy identifiers: ''.","target":"test-bootstrap"}]}`,
want: AzureRequestDisallowedByPolicy,
},
{
name: "OSProvisioningTimedOut",
installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) level=info msg=InstanceMetadata: running on AzurePublicCloud level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1] level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2] level=info msg=resolving graph level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3] level=info msg=checking if graph exists level=info msg=save graph Generates the Ignition Config asset level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) level=info msg=InstanceMetadata: running on AzurePublicCloud level=info msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph level=info msg=deploying resources template level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] encountered error: 400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":[{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\": [\r\n {\r\n \"code\": \"OSProvisioningTimedOut\",\r\n \"message\": \"OS Provisioning for VM 'aro-test-j57nv-master-2' did not finish in the allotted time. The VM may still finish provisioning successfully. Please check provisioning state later. For details on how to check current provisioning state of Windows VMs, refer to https://aka.ms/WindowsVMLifecycle and Linux VMs, refer to https://aka.ms/LinuxVMLifecycle.\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null} level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":[{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\": [\r\n {\r\n \"code\": \"OSProvisioningTimedOut\",\r\n \"message\": \"OS Provisioning for VM 'aro-test-j57nv-master-2' did not finish in the allotted time. The VM may still finish provisioning successfully. Please check provisioning state later. For details on how to check current provisioning state of Windows VMs, refer to https://aka.ms/WindowsVMLifecycle and Linux VMs, refer to https://aka.ms/LinuxVMLifecycle.\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null}`,
want: AzureOSProvisioningTimedOut,
},
} {
t.Run(tt.name, func(t *testing.T) {
// This test uses a "mock" version of Hive's real implementation for matching install logs against regex patterns.
Expand Down

0 comments on commit 42df7bf

Please sign in to comment.