diff --git a/hack/hive-config/hive-additional-install-log-regexes.yaml b/hack/hive-config/hive-additional-install-log-regexes.yaml index ad5adc77410..31fd20511fa 100644 --- a/hack/hive-config/hive-additional-install-log-regexes.yaml +++ b/hack/hive-config/hive-additional-install-log-regexes.yaml @@ -12,6 +12,12 @@ data: name: AzureInvalidTemplateDeployment searchRegexStrings: - '"code":\w?"InvalidTemplateDeployment"' + - installFailingMessage: OS Provisioning for VM, didn't finished in the allotted time. + Please see details for more information. + installFailingReason: AzureOSProvisioningTimedOut + name: AzureOSProvisioningTimedOut + searchRegexStrings: + - '"code\W*":\W*"OSProvisioningTimedOut\W*"' kind: ConfigMap metadata: creationTimestamp: null diff --git a/pkg/hive/failure/handler.go b/pkg/hive/failure/handler.go index 7d4a78b393a..101deaf0bbf 100644 --- a/pkg/hive/failure/handler.go +++ b/pkg/hive/failure/handler.go @@ -50,6 +50,16 @@ func HandleProvisionFailed(ctx context.Context, cd *hivev1.ClusterDeployment, co AzureInvalidTemplateDeployment.Message, *armError, ) + case AzureOSProvisioningTimedOut.Reason: + armError, err := parseDeploymentFailedJson(*installLog) + if err != nil { + return err + } + + return wrapArmError( + AzureOSProvisioningTimedOut.Message, + *armError, + ) default: return genericErr } diff --git a/pkg/hive/failure/reasons.go b/pkg/hive/failure/reasons.go index 414e0ed03e8..99a9c74f06d 100644 --- a/pkg/hive/failure/reasons.go +++ b/pkg/hive/failure/reasons.go @@ -17,6 +17,7 @@ var Reasons = []InstallFailingReason{ // priority over later ones. AzureRequestDisallowedByPolicy, AzureInvalidTemplateDeployment, + AzureOSProvisioningTimedOut, } var AzureRequestDisallowedByPolicy = InstallFailingReason{ @@ -36,3 +37,12 @@ var AzureInvalidTemplateDeployment = InstallFailingReason{ regexp.MustCompile(`"code":\w?"InvalidTemplateDeployment"`), }, } + +var AzureOSProvisioningTimedOut = InstallFailingReason{ + Name: "AzureOSProvisioningTimedOut", + Reason: "AzureOSProvisioningTimedOut", + Message: "OS Provisioning for VM, didn't finished in the allotted time. Please see details for more information.", + SearchRegexes: []*regexp.Regexp{ + regexp.MustCompile(`"code\W*":\W*"OSProvisioningTimedOut\W*"`), + }, +} diff --git a/pkg/hive/failure/reasons_test.go b/pkg/hive/failure/reasons_test.go index 19c4fd09c1d..fa2afa21eae 100644 --- a/pkg/hive/failure/reasons_test.go +++ b/pkg/hive/failure/reasons_test.go @@ -63,6 +63,68 @@ level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Insta level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"InvalidTemplateDeployment","message":"The template deployment failed with multiple errors. Please see details for more information.","details":[{"additionalInfo":[],"code":"RequestDisallowedByPolicy","message":"Resource 'test-bootstrap' was disallowed by policy. Policy identifiers: ''.","target":"test-bootstrap"}]}`, want: AzureRequestDisallowedByPolicy, }, + { + name: "OSProvisioningTimedOut-1", + installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) level=info msg=InstanceMetadata: running on AzurePublicCloud level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1] level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2] + level=info msg=resolving graph level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3] level=info msg=checking if graph exists level=info msg=save graph Generates the Ignition Config asset level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) + level=info msg=InstanceMetadata: running on AzurePublicCloud level=info msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph level=info msg=deploying resources template level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] + encountered error: 400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null, + "details":[{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\": [\r\n {\r\n \"code\": \"OSProvisioningTimedOut\",\r\n \"message\": \"OS Provisioning for VM 'aro-test-j57nv-master-2' did not finish in the allotted time. + The VM may still finish provisioning successfully. Please check provisioning state later. For details on how to check current provisioning state of Windows VMs, refer to https://aka.ms/WindowsVMLifecycle and Linux VMs, refer to https://aka.ms/LinuxVMLifecycle.\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null} level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. + Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details":[{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\": + [\r\n {\r\n \"code\": \"OSProvisioningTimedOut\",\r\n \"message\": \"OS Provisioning for VM 'aro-test-j57nv-master-2' did not finish in the allotted time. The VM may still finish provisioning successfully. Please check provisioning state later. For details on how to check current provisioning state of Windows VMs, refer to https://aka.ms/WindowsVMLifecycle and Linux VMs, refer to https://aka.ms/LinuxVMLifecycle.\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null}`, + want: AzureOSProvisioningTimedOut, + }, + { + name: "OSProvisioningTimedOut-2", + installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) + level=info msg=InstanceMetadata: running on AzurePublicCloud + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1] + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2] + level=info msg=resolving graph + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3] + level=info msg=checking if graph exists + level=info msg=save graph Generates the Ignition Config asset + level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) + level=info msg=InstanceMetadata: running on AzurePublicCloud + level=info msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph + level=info msg=deploying resources template + level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] encountered error: 400: + DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details": + [{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\": + [\r\n {\r\n \"code\": \"OSProvisioningTimedOut\",\r\n \"message\": \"OS Provisioning for VM 'aro-test-j57nv-master-2' did not finish in the allotted time. The VM may still finish provisioning successfully. + Please check provisioning state later. For details on how to check current provisioning state of Windows VMs, refer to https://aka.ms/WindowsVMLifecycle and Linux VMs, refer to https://aka.ms/LinuxVMLifecycle.\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null} + level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details": + [{"code":"Conflict","message":"{\r\n \"status\": \"Failed\",\r\n \"error\": {\r\n \"code\": \"ResourceDeploymentFailure\",\r\n \"message\": \"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.\",\r\n \"details\": + [\r\n {\r\n \"code\": \"OSProvisioningTimedOut\",\r\n \"message\": \"OS Provisioning for VM 'aro-test-j57nv-master-2' did not finish in the allotted time. The VM may still finish provisioning successfully. Please check provisioning state later. + For details on how to check current provisioning state of Windows VMs, refer to https://aka.ms/WindowsVMLifecycle and Linux VMs, refer to https://aka.ms/LinuxVMLifecycle.\"\r\n }\r\n ]\r\n }\r\n}"}],"innererror":null,"additionalInfo":null}`, + want: AzureOSProvisioningTimedOut, + }, + { + name: "OSProvisioningTimedOut-3", + installLog: `Message: level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) + level=info msg=InstanceMetadata: running on AzurePublicCloud + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func1] + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func2] + level=info msg=resolving graph + level=info msg=running step [Action github.com/openshift/ARO-Installer/pkg/installer.(*manager).Manifests.func3] + level=info msg=checking if graph exists + level=info msg=save graph Generates the Ignition Config asset + level=info msg=creating InstanceMetadata from Azure Instance Metadata Service (AIMS) + level=info msg=InstanceMetadata: running on AzurePublicCloud + level=info msg=running step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] level=info msg=load persisted graph + level=info msg=deploying resources template + level=error msg=step [AuthorizationRetryingAction github.com/openshift/ARO-Installer/pkg/installer.(*manager).deployResourceTemplate-fm] encountered error: 400: + DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details": + [{"code":"Conflict","message":"{"status":"Failed","error": {"code":"ResourceDeploymentFailure","message":"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.","details": + [ {"code":"OSProvisioningTimedOut","message":"OS Provisioning for VM 'aro-test-j57nv-master-2' did not finish in the allotted time. The VM may still finish provisioning successfully. + Please check provisioning state later. For details on how to check current provisioning state of Windows VMs, refer to https://aka.ms/WindowsVMLifecycle and Linux VMs, refer to https://aka.ms/LinuxVMLifecycle."}]}}"}],"innererror":null,"additionalInfo":null} + level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code":"DeploymentFailed","message":"At least one resource deployment operation failed. Please list deployment operations for details. Please see https://aka.ms/arm-deployment-operations for usage details.","target":null,"details": + [{"code":"Conflict","message":"{"status":"Failed","error": {"code":"ResourceDeploymentFailure","message":"The resource write operation failed to complete successfully, because it reached terminal provisioning state 'Failed'.","details": + [{"code":"OSProvisioningTimedOut","message":"OS Provisioning for VM 'aro-test-j57nv-master-2' did not finish in the allotted time. The VM may still finish provisioning successfully. Please check provisioning state later. + For details on how to check current provisioning state of Windows VMs, refer to https://aka.ms/WindowsVMLifecycle and Linux VMs, refer to https://aka.ms/LinuxVMLifecycle."}]}}"}],"innererror":null,"additionalInfo":null}`, + want: AzureOSProvisioningTimedOut, + }, } { t.Run(tt.name, func(t *testing.T) { // This test uses a "mock" version of Hive's real implementation for matching install logs against regex patterns.