Skip to content

Commit

Permalink
Add initial spike implementation for Hive RequestDisallowedByPolicy e…
Browse files Browse the repository at this point in the history
…rror handling
  • Loading branch information
tsatam committed Jul 6, 2023
1 parent 5f9df8a commit 80ed4a4
Show file tree
Hide file tree
Showing 3 changed files with 242 additions and 12 deletions.
9 changes: 9 additions & 0 deletions pkg/hive/const.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package hive

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

const (
ProvisionFailedReasonInvalidTemplateDeployment = "AzureInvalidTemplateDeployment"
ProvisionFailedReasonUnknownError = "UnknownError"
)
91 changes: 84 additions & 7 deletions pkg/hive/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@ package hive

import (
"context"
"encoding/json"
"fmt"
"net/http"
"regexp"
"sort"
"strings"

mgmtfeatures "github.com/Azure/azure-sdk-for-go/services/resources/mgmt/2019-07-01/features"
hivev1 "github.com/openshift/hive/apis/hive/v1"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -195,14 +201,9 @@ func (hr *clusterManager) IsClusterInstallationComplete(ctx context.Context, doc
return true, nil
}

checkFailureConditions := map[hivev1.ClusterDeploymentConditionType]corev1.ConditionStatus{
hivev1.ProvisionFailedCondition: corev1.ConditionTrue,
}

for _, cond := range cd.Status.Conditions {
conditionStatus, found := checkFailureConditions[cond.Type]
if found && conditionStatus == cond.Status {
return false, fmt.Errorf("clusterdeployment has failed: %s == %s", cond.Type, cond.Status)
if cond.Type == hivev1.ProvisionFailedCondition {
return false, hr.handleProvisionFailed(ctx, cd, cond)
}
}

Expand Down Expand Up @@ -237,3 +238,79 @@ func (hr *clusterManager) ResetCorrelationData(ctx context.Context, doc *api.Ope
return hr.hiveClientset.Update(ctx, cd)
})
}

func (hr *clusterManager) handleProvisionFailed(ctx context.Context, cd *hivev1.ClusterDeployment, cond hivev1.ClusterDeploymentCondition) error {
if cond.Status != corev1.ConditionTrue {
return nil
}

switch cond.Reason {
case ProvisionFailedReasonInvalidTemplateDeployment:
// TODO: refactor this case body to dedicated handler. Extract reusable components (install log JSON parsing)
latestProvision, err := hr.latestProvisionForDeployment(ctx, cd)
if err != nil {
return err
}
installLog := *latestProvision.Spec.InstallLog
installLog = strings.TrimSpace(installLog)
installLogLines := strings.Split(installLog, "\n")
lastLine := installLogLines[len(installLogLines)-1]

regex := regexp.MustCompile(`(\{.*\})`)
responseJson := regex.FindStringSubmatch(lastLine)[1]

response := &mgmtfeatures.ErrorResponse{}
if err := json.Unmarshal([]byte(responseJson), response); err != nil {
return err
}

cloudErr := &api.CloudError{
StatusCode: http.StatusBadRequest,
CloudErrorBody: &api.CloudErrorBody{
Code: api.CloudErrorCodeDeploymentFailed,
Message: "The deployment failed. Please see details for more information.",
Details: make([]api.CloudErrorBody, len(*response.Details)),
},
}

for i, detail := range *response.Details {
cloudErr.CloudErrorBody.Details[i] = api.CloudErrorBody{
Code: *detail.Code,
Message: *detail.Message,
Target: *detail.Target,
}
}

return cloudErr
default:
return &api.CloudError{
StatusCode: http.StatusInternalServerError,
CloudErrorBody: &api.CloudErrorBody{
Code: api.CloudErrorCodeInternalServerError,
Message: "Deployment failed.",
},
}
}
}

func (hr *clusterManager) latestProvisionForDeployment(ctx context.Context, cd *hivev1.ClusterDeployment) (*hivev1.ClusterProvision, error) {
provisionList := &hivev1.ClusterProvisionList{}
if err := hr.hiveClientset.List(
ctx,
provisionList,
client.InNamespace(cd.Namespace),
client.MatchingLabels(map[string]string{"hive.openshift.io/cluster-deployment-name": cd.Name}),
); err != nil {
hr.log.WithError(err).Warn("could not list provisions for clusterdeployment")
return nil, err
}
if len(provisionList.Items) == 0 {
return nil, nil
}
provisions := make([]*hivev1.ClusterProvision, len(provisionList.Items))
for i := range provisionList.Items {
provisions[i] = &provisionList.Items[i]
}
sort.Slice(provisions, func(i, j int) bool { return provisions[i].Spec.Attempt > provisions[j].Spec.Attempt })
return provisions[0], nil
}
154 changes: 149 additions & 5 deletions pkg/hive/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package hive

import (
"context"
"net/http"
"reflect"
"testing"

Expand Down Expand Up @@ -181,9 +182,10 @@ func TestIsClusterInstallationComplete(t *testing.T) {

for _, tt := range []struct {
name string
cd kruntime.Object
cd *hivev1.ClusterDeployment
cp *hivev1.ClusterProvision
wantResult bool
wantErr string
wantErr error
}{
{
name: "is installed",
Expand Down Expand Up @@ -228,7 +230,7 @@ func TestIsClusterInstallationComplete(t *testing.T) {
wantResult: false,
},
{
name: "has failed provisioning",
name: "has failed provisioning - no Reason",
cd: &hivev1.ClusterDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: ClusterDeploymentName,
Expand All @@ -243,22 +245,35 @@ func TestIsClusterInstallationComplete(t *testing.T) {
},
},
},
wantErr: "clusterdeployment has failed: ProvisionFailed == True",
wantErr: &api.CloudError{
StatusCode: http.StatusInternalServerError,
CloudErrorBody: &api.CloudErrorBody{
Code: api.CloudErrorCodeInternalServerError,
Message: "Deployment failed.",
},
},
wantResult: false,
},
// TODO: move test cases for handleProvisionFailed here
} {
t.Run(tt.name, func(t *testing.T) {
fakeClientBuilder := fake.NewClientBuilder()
if tt.cd != nil {
fakeClientBuilder = fakeClientBuilder.WithRuntimeObjects(tt.cd)
}
if tt.cp != nil {
fakeClientBuilder = fakeClientBuilder.WithRuntimeObjects(tt.cp)
}
c := clusterManager{
hiveClientset: fakeClientBuilder.Build(),
log: logrus.NewEntry(logrus.StandardLogger()),
}

result, err := c.IsClusterInstallationComplete(context.Background(), doc)
utilerror.AssertErrorMessage(t, err, tt.wantErr)

if diff := cmp.Diff(tt.wantErr, err); diff != "" {
t.Error(diff)
}

if tt.wantResult != result {
t.Error(result)
Expand Down Expand Up @@ -440,3 +455,132 @@ func TestGetClusterDeployment(t *testing.T) {
})
}
}

func TestHandleProvisionFailed(t *testing.T) {
fakeNamespace := "aro-00000000-0000-0000-0000-00000000000"
genericErr := &api.CloudError{
StatusCode: http.StatusInternalServerError,
CloudErrorBody: &api.CloudErrorBody{
Code: api.CloudErrorCodeInternalServerError,
Message: "Deployment failed.",
},
}

for _, tt := range []struct {
name string
reason string
installLog string
wantErr error
}{
{
name: "No Reason provided returns generic error",
reason: "",
wantErr: genericErr,
},
{
name: "Known Reason not relevant to ARO returns generic error",
reason: "AWSInsufficientCapacity",
wantErr: genericErr,
},
{
name: "Reason: UnknownError returns generic error",
reason: ProvisionFailedReasonUnknownError,
wantErr: genericErr,
},
{
name: "Reason: InvalidTemplateDeployment extracts error from logs",
reason: ProvisionFailedReasonInvalidTemplateDeployment,
installLog: `level=info msg=running in local development mode
level=info msg=creating development InstanceMetadata
level=info msg=InstanceMetadata: running on AzurePublicCloud
level=info msg=running step [Action github.com/Azure/ARO-RP/pkg/installer.(*manager).Manifests.func1]
level=info msg=running step [Action github.com/Azure/ARO-RP/pkg/installer.(*manager).Manifests.func2]
level=info msg=resolving graph
level=info msg=running step [Action github.com/Azure/ARO-RP/pkg/installer.(*manager).Manifests.func3]
level=info msg=checking if graph exists
level=info msg=save graph
Generates the Ignition Config asset
level=info msg=running in local development mode
level=info msg=creating development InstanceMetadata
level=info msg=InstanceMetadata: running on AzurePublicCloud
level=info msg=running step [AuthorizationRefreshingAction [Action github.com/Azure/ARO-RP/pkg/installer.(*manager).deployResourceTemplate-fm]]
level=info msg=load persisted graph
level=info msg=deploying resources template
level=error msg=step [AuthorizationRefreshingAction [Action github.com/Azure/ARO-RP/pkg/installer.(*manager).deployResourceTemplate-fm]] encountered error: 400: DeploymentFailed: : Deployment failed. Details: : : {"code": "InvalidTemplateDeployment","message": "The template deployment failed with multiple errors. Please see details for more information.","target": null,"details": [{"code": "RequestDisallowedByPolicy","message": "Resource 'aro-test-aaaaa-bootstrap' was disallowed by policy.","target": "aro-test-aaaaa-bootstrap"},{"code": "RequestDisallowedByPolicy","message": "Resource 'aro-test-aaaaa-master-0' was disallowed by policy.","target": "aro-test-aaaaa-master-0"},{"code": "RequestDisallowedByPolicy","message": "Resource 'aro-test-aaaaa-master-1' was disallowed by policy.","target": "aro-test-aaaaa-master-1"},{"code": "RequestDisallowedByPolicy","message": "Resource 'aro-test-aaaaa-master-2' was disallowed by policy.","target": "aro-test-aaaaa-master-2"}]}
level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : {"code": "InvalidTemplateDeployment","message": "The template deployment failed with multiple errors. Please see details for more information.","target": null,"details": [{"code": "RequestDisallowedByPolicy","message": "Resource 'aro-test-aaaaa-bootstrap' was disallowed by policy.","target": "aro-test-aaaaa-bootstrap"},{"code": "RequestDisallowedByPolicy","message": "Resource 'aro-test-aaaaa-master-0' was disallowed by policy.","target": "aro-test-aaaaa-master-0"},{"code": "RequestDisallowedByPolicy","message": "Resource 'aro-test-aaaaa-master-1' was disallowed by policy.","target": "aro-test-aaaaa-master-1"},{"code": "RequestDisallowedByPolicy","message": "Resource 'aro-test-aaaaa-master-2' was disallowed by policy.","target": "aro-test-aaaaa-master-2"}]}`,
wantErr: &api.CloudError{
StatusCode: http.StatusBadRequest,
CloudErrorBody: &api.CloudErrorBody{
Code: api.CloudErrorCodeDeploymentFailed,
Message: "The deployment failed. Please see details for more information.",
Details: []api.CloudErrorBody{
{
Code: api.CloudErrorCodeRequestDisallowedByPolicy,
Message: "Resource 'aro-test-aaaaa-bootstrap' was disallowed by policy.",
Target: "aro-test-aaaaa-bootstrap",
},
{
Code: api.CloudErrorCodeRequestDisallowedByPolicy,
Message: "Resource 'aro-test-aaaaa-master-0' was disallowed by policy.",
Target: "aro-test-aaaaa-master-0",
},
{
Code: api.CloudErrorCodeRequestDisallowedByPolicy,
Message: "Resource 'aro-test-aaaaa-master-1' was disallowed by policy.",
Target: "aro-test-aaaaa-master-1",
},
{
Code: api.CloudErrorCodeRequestDisallowedByPolicy,
Message: "Resource 'aro-test-aaaaa-master-2' was disallowed by policy.",
Target: "aro-test-aaaaa-master-2",
},
},
},
},
},
} {
t.Run(tt.name, func(t *testing.T) {
cond := hivev1.ClusterDeploymentCondition{
Type: hivev1.ProvisionFailedCondition,
Status: corev1.ConditionTrue,
Reason: tt.reason,
}
hcd := &hivev1.ClusterDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: ClusterDeploymentName,
Namespace: fakeNamespace,
},
Status: hivev1.ClusterDeploymentStatus{
Conditions: []hivev1.ClusterDeploymentCondition{cond},
},
}
hcp := &hivev1.ClusterProvision{
ObjectMeta: metav1.ObjectMeta{
Name: ClusterDeploymentName + "-0-bbbbb",
Namespace: fakeNamespace,
Labels: map[string]string{
"hive.openshift.io/cluster-deployment-name": ClusterDeploymentName,
},
},
Spec: hivev1.ClusterProvisionSpec{
InstallLog: &tt.installLog,
},
}

fakeClientBuilder := fake.NewClientBuilder().
WithRuntimeObjects(hcd, hcp)

c := clusterManager{
hiveClientset: fakeClientBuilder.Build(),
log: logrus.NewEntry(logrus.StandardLogger()),
}

err := c.handleProvisionFailed(context.Background(), hcd, cond)

if diff := cmp.Diff(tt.wantErr, err); diff != "" {
t.Error(diff)
}
})
}
}

0 comments on commit 80ed4a4

Please sign in to comment.