Skip to content

Commit

Permalink
Implement generic handling for Hive ProvisionFailed=True conditions w…
Browse files Browse the repository at this point in the history
…/ Azure policy errors
  • Loading branch information
tsatam committed Aug 23, 2023
1 parent 8c4a761 commit 5e44eeb
Show file tree
Hide file tree
Showing 7 changed files with 345 additions and 57 deletions.
18 changes: 14 additions & 4 deletions hack/genhiveconfig/genhiveconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
const (
hiveNamespaceName = "hive"
configMapName = "additional-install-log-regexes"
configMapPath = "hack/hive-config/hive-additional-install-log-regexes.yaml"
regexDataEntryName = "regexes"
)

Expand All @@ -29,7 +28,7 @@ type installLogRegex struct {
InstallFailingMessage string `json:"installFailingMessage"`
}

func run(ctx context.Context) error {
func run(ctx context.Context, path string) error {
ilrs := []installLogRegex{}

for _, reason := range failure.Reasons {
Expand Down Expand Up @@ -59,7 +58,13 @@ func run(ctx context.Context) error {
if err != nil {
return err
}
return os.WriteFile(configMapPath, configmapRaw, 0666)

if path != "" {
return os.WriteFile(path, configmapRaw, 0666)
} else {
print(string(configmapRaw))
return nil
}
}

func failureReasonToInstallLogRegex(reason failure.InstallFailingReason) installLogRegex {
Expand All @@ -78,7 +83,12 @@ func failureReasonToInstallLogRegex(reason failure.InstallFailingReason) install
func main() {
log := utillog.GetLogger()

if err := run(context.Background()); err != nil {
path := ""
if len(os.Args) > 1 {
path = os.Args[1]
}

if err := run(context.Background(), path); err != nil {
log.Fatal(err)
}
}
6 changes: 6 additions & 0 deletions hack/hive-config/generate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package hiveconfig

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

//go:generate go run ../genhiveconfig ./hive-additional-install-log-regexes.yaml
9 changes: 7 additions & 2 deletions hack/hive-config/hive-additional-install-log-regexes.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
apiVersion: v1
data:
regexes: |
- installFailingMessage: The template deployment failed. Please see details for more
information.
- installFailingMessage: Deployment failed due to RequestDisallowedByPolicy. Please
see details for more information.
installFailingReason: AzureRequestDisallowedByPolicy
name: AzureRequestDisallowedByPolicy
searchRegexStrings:
- '"code":\w?"InvalidTemplateDeployment".*"code":\w?"RequestDisallowedByPolicy"'
- installFailingMessage: Deployment failed. Please see details for more information.
installFailingReason: AzureInvalidTemplateDeployment
name: AzureInvalidTemplateDeployment
searchRegexStrings:
Expand Down
96 changes: 96 additions & 0 deletions pkg/hive/failure/handler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package failure

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"encoding/json"
"net/http"
"regexp"

mgmtfeatures "github.com/Azure/azure-sdk-for-go/services/resources/mgmt/2019-07-01/features"
hivev1 "github.com/openshift/hive/apis/hive/v1"
corev1 "k8s.io/api/core/v1"

"github.com/Azure/ARO-RP/pkg/api"
)

var genericErr = &api.CloudError{
StatusCode: http.StatusInternalServerError,
CloudErrorBody: &api.CloudErrorBody{
Code: api.CloudErrorCodeInternalServerError,
Message: "Deployment failed.",
},
}

func HandleProvisionFailed(ctx context.Context, cd *hivev1.ClusterDeployment, cond hivev1.ClusterDeploymentCondition, installLog *string) error {
if cond.Status != corev1.ConditionTrue {
return nil
}

switch cond.Reason {
case AzureRequestDisallowedByPolicy.Reason:
armError, err := parseDeploymentFailedJson(*installLog)
if err != nil {
return err
}

return wrapArmError(
AzureRequestDisallowedByPolicy.Message,
*armError,
)
case AzureInvalidTemplateDeployment.Reason:
armError, err := parseDeploymentFailedJson(*installLog)
if err != nil {
return err
}

return wrapArmError(
AzureInvalidTemplateDeployment.Message,
*armError,
)
default:
return genericErr
}
}

func parseDeploymentFailedJson(installLog string) (*mgmtfeatures.ErrorResponse, error) {
regex := regexp.MustCompile(`level=error msg=400: DeploymentFailed: : Deployment failed. Details: : : (\{.*\})`)
rawJson := regex.FindStringSubmatch(installLog)[1]

armResponse := &mgmtfeatures.ErrorResponse{}
if err := json.Unmarshal([]byte(rawJson), armResponse); err != nil {
return nil, err
}
return armResponse, nil
}

func wrapArmError(errorMessage string, armError mgmtfeatures.ErrorResponse) *api.CloudError {
details := make([]api.CloudErrorBody, len(*armError.Details))
for i, detail := range *armError.Details {
details[i] = errorResponseToCloudErrorBody(detail)
}

return &api.CloudError{
StatusCode: http.StatusBadRequest,
CloudErrorBody: &api.CloudErrorBody{
Code: api.CloudErrorCodeDeploymentFailed,
Message: errorMessage,
Details: details,
},
}
}

func errorResponseToCloudErrorBody(errorResponse mgmtfeatures.ErrorResponse) api.CloudErrorBody {
body := api.CloudErrorBody{
Code: *errorResponse.Code,
Message: *errorResponse.Message,
}

if errorResponse.Target != nil {
body.Target = *errorResponse.Target
}

return body
}
4 changes: 2 additions & 2 deletions pkg/hive/failure/reasons.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ var Reasons = []InstallFailingReason{
var AzureRequestDisallowedByPolicy = InstallFailingReason{
Name: "AzureRequestDisallowedByPolicy",
Reason: "AzureRequestDisallowedByPolicy",
Message: "Cluster Deployment was disallowed by policy. Please see install log for more information.",
Message: "Deployment failed due to RequestDisallowedByPolicy. Please see details for more information.",
SearchRegexes: []*regexp.Regexp{
regexp.MustCompile(`"code":\w?"InvalidTemplateDeployment".*"code":\w?"RequestDisallowedByPolicy"`),
},
Expand All @@ -31,7 +31,7 @@ var AzureRequestDisallowedByPolicy = InstallFailingReason{
var AzureInvalidTemplateDeployment = InstallFailingReason{
Name: "AzureInvalidTemplateDeployment",
Reason: "AzureInvalidTemplateDeployment",
Message: "The template deployment failed. Please see install log for more information.",
Message: "Deployment failed. Please see details for more information.",
SearchRegexes: []*regexp.Regexp{
regexp.MustCompile(`"code":\w?"InvalidTemplateDeployment"`),
},
Expand Down
40 changes: 33 additions & 7 deletions pkg/hive/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ package hive

import (
"context"
"errors"
"fmt"
"sort"

hivev1 "github.com/openshift/hive/apis/hive/v1"
"github.com/sirupsen/logrus"
Expand All @@ -19,6 +21,7 @@ import (

"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/env"
"github.com/Azure/ARO-RP/pkg/hive/failure"
"github.com/Azure/ARO-RP/pkg/util/dynamichelper"
utillog "github.com/Azure/ARO-RP/pkg/util/log"
"github.com/Azure/ARO-RP/pkg/util/uuid"
Expand Down Expand Up @@ -195,14 +198,13 @@ func (hr *clusterManager) IsClusterInstallationComplete(ctx context.Context, doc
return true, nil
}

checkFailureConditions := map[hivev1.ClusterDeploymentConditionType]corev1.ConditionStatus{
hivev1.ProvisionFailedCondition: corev1.ConditionTrue,
}

for _, cond := range cd.Status.Conditions {
conditionStatus, found := checkFailureConditions[cond.Type]
if found && conditionStatus == cond.Status {
return false, fmt.Errorf("clusterdeployment has failed: %s == %s", cond.Type, cond.Status)
if cond.Type == hivev1.ProvisionFailedCondition && cond.Status == corev1.ConditionTrue {
log, err := hr.installLogsForDeployment(ctx, cd)
if err != nil {
return false, err
}
return false, failure.HandleProvisionFailed(ctx, cd, cond, log)
}
}

Expand Down Expand Up @@ -237,3 +239,27 @@ func (hr *clusterManager) ResetCorrelationData(ctx context.Context, doc *api.Ope
return hr.hiveClientset.Update(ctx, cd)
})
}

func (hr *clusterManager) installLogsForDeployment(ctx context.Context, cd *hivev1.ClusterDeployment) (*string, error) {
provisionList := &hivev1.ClusterProvisionList{}
if err := hr.hiveClientset.List(
ctx,
provisionList,
client.InNamespace(cd.Namespace),
client.MatchingLabels(map[string]string{"hive.openshift.io/cluster-deployment-name": cd.Name}),
); err != nil {
hr.log.WithError(err).Warn("could not list provisions for clusterdeployment")
return nil, err
}
if len(provisionList.Items) == 0 {
return nil, errors.New("no provisions for deployment")
}
provisions := make([]*hivev1.ClusterProvision, len(provisionList.Items))
for i := range provisionList.Items {
provisions[i] = &provisionList.Items[i]
}
sort.Slice(provisions, func(i, j int) bool { return provisions[i].Spec.Attempt > provisions[j].Spec.Attempt })
latestProvision := provisions[0]

return latestProvision.Spec.InstallLog, nil
}
Loading

0 comments on commit 5e44eeb

Please sign in to comment.