From 0fc33327b4a94e0eac7ba89703a1973759bf56a3 Mon Sep 17 00:00:00 2001 From: LiniSusan Date: Fri, 15 Mar 2024 19:13:54 +0530 Subject: [PATCH] retry logic update --- pkg/cluster/arooperator.go | 17 +- pkg/cluster/arooperator_test.go | 4 +- pkg/cluster/condition.go | 97 ++-- pkg/cluster/condition_test.go | 10 +- pkg/cluster/hive.go | 26 +- pkg/cluster/hive_test.go | 2 +- pkg/cluster/install_test.go | 470 ++++++------------ pkg/containerinstall/install.go | 10 +- ...n_openshiftcluster_etcdcertificaterenew.go | 10 +- pkg/util/steps/condition.go | 6 +- pkg/util/steps/condition_test.go | 24 +- pkg/util/steps/runner.go | 300 +++++++++-- 12 files changed, 546 insertions(+), 430 deletions(-) diff --git a/pkg/cluster/arooperator.go b/pkg/cluster/arooperator.go index 7afbb5b3c9d..89f5420129b 100644 --- a/pkg/cluster/arooperator.go +++ b/pkg/cluster/arooperator.go @@ -39,26 +39,27 @@ func (m *manager) ensureAROOperator(ctx context.Context) error { return err } -func (m *manager) aroDeploymentReady(ctx context.Context) (bool, error) { +func (m *manager) aroDeploymentReady(ctx context.Context) (ok bool, retry bool, err error) { if !m.isIngressProfileAvailable() { // If the ingress profile is not available, ARO operator update/deploy will fail. m.log.Error("skip aroDeploymentReady") - return true, nil + return true, false, nil } - return m.aroOperatorDeployer.IsReady(ctx) + ok, err = m.aroOperatorDeployer.IsReady(ctx) + return ok, false, err } -func (m *manager) ensureAROOperatorRunningDesiredVersion(ctx context.Context) (bool, error) { +func (m *manager) ensureAROOperatorRunningDesiredVersion(ctx context.Context) (ok bool, retry bool, err error) { if !m.isIngressProfileAvailable() { // If the ingress profile is not available, ARO operator update/deploy will fail. m.log.Error("skip ensureAROOperatorRunningDesiredVersion") - return true, nil + return true, false, nil } - ok, err := m.aroOperatorDeployer.IsRunningDesiredVersion(ctx) + ok, err = m.aroOperatorDeployer.IsRunningDesiredVersion(ctx) if !ok || err != nil { - return false, err + return false, false, err } - return true, nil + return true, false, nil } func (m *manager) ensureCredentialsRequest(ctx context.Context) error { diff --git a/pkg/cluster/arooperator_test.go b/pkg/cluster/arooperator_test.go index 34bf6bc167d..570e3dff63d 100644 --- a/pkg/cluster/arooperator_test.go +++ b/pkg/cluster/arooperator_test.go @@ -215,7 +215,7 @@ func TestAroDeploymentReady(t *testing.T) { aroOperatorDeployer: dep, } - ok, err := m.aroDeploymentReady(ctx) + ok, _, err := m.aroDeploymentReady(ctx) if err != nil || ok != tt.wantRes { t.Error(err) } @@ -311,7 +311,7 @@ func TestEnsureAROOperatorRunningDesiredVersion(t *testing.T) { aroOperatorDeployer: dep, } - ok, err := m.ensureAROOperatorRunningDesiredVersion(ctx) + ok, _, err := m.ensureAROOperatorRunningDesiredVersion(ctx) if err != nil || ok != tt.wantRes { t.Error(err) } diff --git a/pkg/cluster/condition.go b/pkg/cluster/condition.go index a5f1fbd9140..efbc51446ef 100644 --- a/pkg/cluster/condition.go +++ b/pkg/cluster/condition.go @@ -6,12 +6,16 @@ package cluster import ( "context" "errors" + "fmt" + "net/http" "time" configv1 "github.com/openshift/api/config/v1" corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/Azure/ARO-RP/pkg/api" ) const minimumWorkerNodes = 2 @@ -19,65 +23,97 @@ const minimumWorkerNodes = 2 // condition functions should return an error only if it's not able to be retried // if a condition function encounters a error when retrying it should return false, nil. -func (m *manager) apiServersReady(ctx context.Context) (bool, error) { +func (m *manager) apiServersReady(ctx context.Context) (bool, retry bool, err error) { apiserver, err := m.configcli.ConfigV1().ClusterOperators().Get(ctx, "kube-apiserver", metav1.GetOptions{}) if err != nil { - return false, nil + return false, true, nil } - return isOperatorAvailable(apiserver), nil + return isOperatorAvailable(apiserver), true, nil } -func (m *manager) minimumWorkerNodesReady(ctx context.Context) (bool, error) { +func getErrMessage(err error, messageifany string) error { + message := "Minimum number of worker nodes have not been successfully created. Please retry and if the issue persists, raise an Azure support ticket" + if err != nil { + message = "Error: " + err.Error() + "Message: " + messageifany + message + } else { + message = messageifany + message + } + cloudError := api.NewCloudError( + http.StatusInternalServerError, + api.CloudErrorCodeDeploymentFailed, + "", + message, + ) + return cloudError +} + +func (m *manager) minimumWorkerNodesReady(ctx context.Context) (nodeCheck bool, retry bool, err error) { nodes, err := m.kubernetescli.CoreV1().Nodes().List(ctx, metav1.ListOptions{ LabelSelector: "node-role.kubernetes.io/worker", }) if err != nil { - return false, nil + return false, true, getErrMessage(err, "") } readyWorkers := 0 + message := "" for _, node := range nodes.Items { for _, cond := range node.Status.Conditions { if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { readyWorkers++ + } else { + messageString := fmt.Sprintf("%+v - Status:%+v, Message: %+v\n", node, cond.Status, cond.Message) + message += messageString } } } - - return readyWorkers >= minimumWorkerNodes, nil + minWorkerAchieved := readyWorkers >= minimumWorkerNodes + if minWorkerAchieved { + return minWorkerAchieved, false, nil + } else { + if message == "" { + message = "Check the config and versions" + } + return false, true, getErrMessage(err, message) + } } -func (m *manager) operatorConsoleExists(ctx context.Context) (bool, error) { - _, err := m.operatorcli.OperatorV1().Consoles().Get(ctx, consoleConfigResourceName, metav1.GetOptions{}) - return err == nil, nil +func (m *manager) operatorConsoleExists(ctx context.Context) (errorcheck bool, retry bool, err error) { + _, err = m.operatorcli.OperatorV1().Consoles().Get(ctx, consoleConfigResourceName, metav1.GetOptions{}) + return err == nil, false, nil } -func (m *manager) operatorConsoleReady(ctx context.Context) (bool, error) { +func (m *manager) operatorConsoleReady(ctx context.Context) (consoleOperatorcheck bool, retry bool, err error) { consoleOperator, err := m.configcli.ConfigV1().ClusterOperators().Get(ctx, "console", metav1.GetOptions{}) if err != nil { - return false, nil + return false, true, nil } - return isOperatorAvailable(consoleOperator), nil + return isOperatorAvailable(consoleOperator), true, nil } -func (m *manager) clusterVersionReady(ctx context.Context) (bool, error) { +func (m *manager) clusterVersionReady(ctx context.Context) (cvcheck bool, retry bool, err error) { cv, err := m.configcli.ConfigV1().ClusterVersions().Get(ctx, "version", metav1.GetOptions{}) if err == nil { for _, cond := range cv.Status.Conditions { if cond.Type == configv1.OperatorAvailable && cond.Status == configv1.ConditionTrue { - return true, nil + return true, false, nil } } } - return false, nil + return false, true, nil } -func (m *manager) ingressControllerReady(ctx context.Context) (bool, error) { +func (m *manager) ingressControllerReady(ctx context.Context) (ingressOperatorcheck bool, retry bool, err error) { ingressOperator, err := m.configcli.ConfigV1().ClusterOperators().Get(ctx, "ingress", metav1.GetOptions{}) if err != nil { - return false, nil + return false, true, nil + } + ingressOperatorcheck = isOperatorAvailable(ingressOperator) + if ingressOperatorcheck { + return ingressOperatorcheck, false, nil + } else { + return ingressOperatorcheck, true, nil } - return isOperatorAvailable(ingressOperator), nil } func isOperatorAvailable(operator *configv1.ClusterOperator) bool { @@ -94,13 +130,13 @@ func isOperatorAvailable(operator *configv1.ClusterOperator) bool { // Checking for a change to the lastSyncCloudCredsSecretResourceVersion attribute of the CredentialRequest's status would be a neater way of checking // whether it was reconciled, but we would would have to save the value prior to updating the kube-system/azure-credentials Secret so that we'd have // an old value to compare to. -func (m *manager) aroCredentialsRequestReconciled(ctx context.Context) (bool, error) { +func (m *manager) aroCredentialsRequestReconciled(ctx context.Context) (credcheck bool, retry bool, err error) { // If the CSP hasn't been updated, the CredentialsRequest does not need to be reconciled. secret, err := m.servicePrincipalUpdated(ctx) if err != nil { - return false, err + return false, false, err } else if secret == nil { - return true, nil + return true, false, nil } u, err := m.dynamiccli.Resource(CredentialsRequestGroupVersionResource).Namespace("openshift-cloud-credential-operator").Get(ctx, "openshift-azure-operator", metav1.GetOptions{}) @@ -108,9 +144,9 @@ func (m *manager) aroCredentialsRequestReconciled(ctx context.Context) (bool, er // If the CredentialsRequest is not found, it may have just recently been reconciled. // Return nil to retry until we hit the condition timeout. if kerrors.IsNotFound(err) { - return false, nil + return false, true, nil } - return false, err + return false, false, err } cr := u.UnstructuredContent() @@ -118,21 +154,26 @@ func (m *manager) aroCredentialsRequestReconciled(ctx context.Context) (bool, er if s, ok := cr["status"]; ok { status = s.(map[string]interface{}) } else { - return false, errors.New("unable to access status of openshift-azure-operator CredentialsRequest") + return false, false, errors.New("unable to access status of openshift-azure-operator CredentialsRequest") } var lastSyncTimestamp string if lst, ok := status["lastSyncTimestamp"]; ok { lastSyncTimestamp = lst.(string) } else { - return false, errors.New("unable to access status.lastSyncTimestamp of openshift-azure-operator CredentialsRequest") + return false, false, errors.New("unable to access status.lastSyncTimestamp of openshift-azure-operator CredentialsRequest") } timestamp, err := time.Parse(time.RFC3339, lastSyncTimestamp) if err != nil { - return false, err + return false, false, err } timeSinceLastSync := time.Since(timestamp) - return timeSinceLastSync.Minutes() < 5, nil + timeSinceLastSyncCheck := timeSinceLastSync.Minutes() < 5 + if timeSinceLastSyncCheck { + return true, false, nil + } else { + return false, true, nil + } } diff --git a/pkg/cluster/condition_test.go b/pkg/cluster/condition_test.go index 74ded8740b6..f73bcb26a77 100644 --- a/pkg/cluster/condition_test.go +++ b/pkg/cluster/condition_test.go @@ -53,7 +53,7 @@ func TestOperatorConsoleExists(t *testing.T) { }, }), } - ready, err := m.operatorConsoleExists(ctx) + ready, _, err := m.operatorConsoleExists(ctx) if err != nil { t.Error(errMustBeNilMsg) } @@ -182,8 +182,8 @@ func TestMinimumWorkerNodesReady(t *testing.T) { }, }), } - ready, err := m.minimumWorkerNodesReady(ctx) - if err != nil { + ready, retry, err := m.minimumWorkerNodesReady(ctx) + if err != nil && !retry { t.Error(errMustBeNilMsg) } if ready != tt.want { @@ -231,7 +231,7 @@ func TestClusterVersionReady(t *testing.T) { }, }), } - ready, err := m.clusterVersionReady(ctx) + ready, _, err := m.clusterVersionReady(ctx) if err != nil { t.Error(errMustBeNilMsg) } @@ -389,7 +389,7 @@ func TestAroCredentialsRequestReconciled(t *testing.T) { }, } - result, err := m.aroCredentialsRequestReconciled(ctx) + result, _, err := m.aroCredentialsRequestReconciled(ctx) if result != tt.want { t.Errorf("Result was %v, wanted %v", result, tt.want) } diff --git a/pkg/cluster/hive.go b/pkg/cluster/hive.go index c19a2e09792..585a1d10b90 100644 --- a/pkg/cluster/hive.go +++ b/pkg/cluster/hive.go @@ -34,14 +34,32 @@ func (m *manager) hiveEnsureResources(ctx context.Context) error { return m.hiveClusterManager.CreateOrUpdate(ctx, m.subscriptionDoc, m.doc) } -func (m *manager) hiveClusterDeploymentReady(ctx context.Context) (bool, error) { +func (m *manager) hiveClusterDeploymentReady(ctx context.Context) (deploymentReady bool, retry bool, err error) { m.log.Info("waiting for cluster deployment to become ready") - return m.hiveClusterManager.IsClusterDeploymentReady(ctx, m.doc) + deploymentReady, err = m.hiveClusterManager.IsClusterDeploymentReady(ctx, m.doc) + if err != nil { + return deploymentReady, false, err + } else { + if !deploymentReady { + return deploymentReady, true, err + } else { + return deploymentReady, false, err + } + } } -func (m *manager) hiveClusterInstallationComplete(ctx context.Context) (bool, error) { +func (m *manager) hiveClusterInstallationComplete(ctx context.Context) (installationComplete bool, retry bool, err error) { m.log.Info("waiting for cluster installation to complete") - return m.hiveClusterManager.IsClusterInstallationComplete(ctx, m.doc) + installationComplete, err = m.hiveClusterManager.IsClusterDeploymentReady(ctx, m.doc) + if err != nil { + return installationComplete, false, err + } else { + if !installationComplete { + return installationComplete, true, err + } else { + return installationComplete, false, err + } + } } func (m *manager) hiveResetCorrelationData(ctx context.Context) error { diff --git a/pkg/cluster/hive_test.go b/pkg/cluster/hive_test.go index 5f7021f726e..ad0dc944fdc 100644 --- a/pkg/cluster/hive_test.go +++ b/pkg/cluster/hive_test.go @@ -63,7 +63,7 @@ func TestHiveClusterDeploymentReady(t *testing.T) { } m.hiveClusterManager = hiveMock - result, err := m.hiveClusterDeploymentReady(context.Background()) + result, _, err := m.hiveClusterDeploymentReady(context.Background()) utilerror.AssertErrorMessage(t, err, tt.wantErr) if tt.wantResult != result { diff --git a/pkg/cluster/install_test.go b/pkg/cluster/install_test.go index 9d8507780e5..3f1b004f307 100644 --- a/pkg/cluster/install_test.go +++ b/pkg/cluster/install_test.go @@ -1,373 +1,217 @@ -package cluster +package containerinstall // Copyright (c) Microsoft Corporation. // Licensed under the Apache License 2.0. import ( + "bytes" "context" - "errors" - "strings" - "testing" + "encoding/json" + "fmt" + "os" + "path/filepath" + "runtime" "time" - "github.com/golang/mock/gomock" - "github.com/onsi/gomega" - "github.com/onsi/gomega/types" - configv1 "github.com/openshift/api/config/v1" - operatorv1 "github.com/openshift/api/operator/v1" - configfake "github.com/openshift/client-go/config/clientset/versioned/fake" - operatorfake "github.com/openshift/client-go/operator/clientset/versioned/fake" - "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes/fake" + "github.com/containers/podman/v4/pkg/bindings/containers" + "github.com/containers/podman/v4/pkg/bindings/images" + "github.com/containers/podman/v4/pkg/bindings/secrets" + "github.com/containers/podman/v4/pkg/specgen" + "github.com/opencontainers/runtime-spec/specs-go" "github.com/Azure/ARO-RP/pkg/api" - mock_hive "github.com/Azure/ARO-RP/pkg/util/mocks/hive" "github.com/Azure/ARO-RP/pkg/util/steps" - "github.com/Azure/ARO-RP/pkg/util/version" - testdatabase "github.com/Azure/ARO-RP/test/database" - utilerror "github.com/Azure/ARO-RP/test/util/error" - testlog "github.com/Azure/ARO-RP/test/util/log" ) -func failingFunc(context.Context) error { return errors.New("oh no!") } - -func successfulActionStep(context.Context) error { return nil } - -func successfulConditionStep(context.Context) (bool, error) { return true, nil } +var ( + devEnvVars = []string{ + "AZURE_FP_CLIENT_ID", + "AZURE_RP_CLIENT_ID", + "AZURE_RP_CLIENT_SECRET", + "AZURE_SUBSCRIPTION_ID", + "AZURE_TENANT_ID", + "DOMAIN_NAME", + "KEYVAULT_PREFIX", + "LOCATION", + "PROXY_HOSTNAME", + "PULL_SECRET", + "RESOURCEGROUP", + } +) -type fakeMetricsEmitter struct { - Metrics map[string]int64 +func (m *manager) Install(ctx context.Context, sub *api.SubscriptionDocument, doc *api.OpenShiftClusterDocument, version *api.OpenShiftVersion) error { + s := []steps.Step{ + steps.Action(func(context.Context) error { + options := (&images.PullOptions{}). + WithQuiet(true). + WithPolicy("always"). + WithUsername(m.pullSecret.Username). + WithPassword(m.pullSecret.Password) + + _, err := images.Pull(m.conn, version.Properties.InstallerPullspec, options) + return err + }), + steps.Action(func(context.Context) error { return m.createSecrets(ctx, doc, sub) }), + steps.Action(func(context.Context) error { return m.startContainer(ctx, version) }), + steps.Condition(m.containerFinished, 60*time.Minute, false), + steps.Action(m.cleanupContainers), + } + + _, err := steps.Run(ctx, m.log, 10*time.Second, s, nil) + if err != nil { + return err + } + if !m.success { + return fmt.Errorf("failed to install cluster") + } + return nil } -func newfakeMetricsEmitter() *fakeMetricsEmitter { - m := make(map[string]int64) - return &fakeMetricsEmitter{ - Metrics: m, +func (m *manager) putSecret(secretName string) specgen.Secret { + return specgen.Secret{ + Source: m.clusterUUID + "-" + secretName, + Target: "/.azure/" + secretName, + Mode: 0o644, } } -func (e *fakeMetricsEmitter) EmitGauge(metricName string, metricValue int64, dimensions map[string]string) { - e.Metrics[metricName] = metricValue -} +func (m *manager) startContainer(ctx context.Context, version *api.OpenShiftVersion) error { + s := specgen.NewSpecGenerator(version.Properties.InstallerPullspec, false) + s.Name = "installer-" + m.clusterUUID -func (e *fakeMetricsEmitter) EmitFloat(metricName string, metricValue float64, dimensions map[string]string) { -} + s.Secrets = []specgen.Secret{ + m.putSecret("99_aro.json"), + m.putSecret("99_sub.json"), + m.putSecret("proxy.crt"), + m.putSecret("proxy-client.crt"), + m.putSecret("proxy-client.key"), + } -var clusterOperator = &configv1.ClusterOperator{ - ObjectMeta: metav1.ObjectMeta{ - Name: "operator", - }, -} + s.Env = map[string]string{ + "ARO_RP_MODE": "development", + "ARO_UUID": m.clusterUUID, + "OPENSHIFT_INSTALL_INVOKER": "hive", + "OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE": version.Properties.OpenShiftPullspec, + } -var clusterVersion = &configv1.ClusterVersion{ - ObjectMeta: metav1.ObjectMeta{ - Name: "version", - }, -} + for _, envvar := range devEnvVars { + s.Env["ARO_"+envvar] = os.Getenv(envvar) + } -var node = &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node", - }, -} + s.Mounts = append(s.Mounts, specs.Mount{ + Destination: "/.azure", + Type: "tmpfs", + Source: "", + }) + s.WorkDir = "/.azure" + s.Entrypoint = []string{"/bin/bash", "-c", "/bin/openshift-install create manifests && /bin/openshift-install create cluster"} -var ingressController = &operatorv1.IngressController{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "openshift-ingress-operator", - Name: "ingress-controller", - }, + _, err := runContainer(m.conn, m.log, s) + return err } -func TestStepRunnerWithInstaller(t *testing.T) { - ctx := context.Background() - - for _, tt := range []struct { - name string - steps []steps.Step - wantEntries []map[string]types.GomegaMatcher - wantErr string - kubernetescli *fake.Clientset - configcli *configfake.Clientset - operatorcli *operatorfake.Clientset - }{ - { - name: "Failed step run will log cluster version, cluster operator status, and ingress information if available", - steps: []steps.Step{ - steps.Action(failingFunc), - }, - wantErr: "oh no!", - wantEntries: []map[string]types.GomegaMatcher{ - { - "level": gomega.Equal(logrus.InfoLevel), - "msg": gomega.Equal(`running step [Action github.com/Azure/ARO-RP/pkg/cluster.failingFunc]`), - }, - { - "level": gomega.Equal(logrus.ErrorLevel), - "msg": gomega.Equal("step [Action github.com/Azure/ARO-RP/pkg/cluster.failingFunc] encountered error: oh no!"), - }, - { - "level": gomega.Equal(logrus.InfoLevel), - "msg": gomega.MatchRegexp(`(?s)github.com/Azure/ARO-RP/pkg/cluster.\(\*manager\).logClusterVersion\-fm:.*"name": "version"`), - }, - { - "level": gomega.Equal(logrus.InfoLevel), - "msg": gomega.MatchRegexp(`(?s)github.com/Azure/ARO-RP/pkg/cluster.\(\*manager\).logNodes\-fm:.*"name": "node"`), - }, - { - "level": gomega.Equal(logrus.InfoLevel), - "msg": gomega.MatchRegexp(`(?s)github.com/Azure/ARO-RP/pkg/cluster.\(\*manager\).logClusterOperators\-fm:.*"name": "operator"`), - }, - { - "level": gomega.Equal(logrus.InfoLevel), - "msg": gomega.MatchRegexp(`(?s)github.com/Azure/ARO-RP/pkg/cluster.\(\*manager\).logIngressControllers\-fm:.*"name": "ingress-controller"`), - }, - }, - kubernetescli: fake.NewSimpleClientset(node), - configcli: configfake.NewSimpleClientset(clusterVersion, clusterOperator), - operatorcli: operatorfake.NewSimpleClientset(ingressController), - }, - { - name: "Failed step run will not crash if it cannot get the clusterversions, clusteroperators, ingresscontrollers", - steps: []steps.Step{ - steps.Action(failingFunc), - }, - wantErr: "oh no!", - wantEntries: []map[string]types.GomegaMatcher{ - { - "level": gomega.Equal(logrus.InfoLevel), - "msg": gomega.Equal(`running step [Action github.com/Azure/ARO-RP/pkg/cluster.failingFunc]`), - }, - { - "level": gomega.Equal(logrus.ErrorLevel), - "msg": gomega.Equal("step [Action github.com/Azure/ARO-RP/pkg/cluster.failingFunc] encountered error: oh no!"), - }, - { - "level": gomega.Equal(logrus.ErrorLevel), - "msg": gomega.Equal(`clusterversions.config.openshift.io "version" not found`), - }, - { - "level": gomega.Equal(logrus.InfoLevel), - "msg": gomega.Equal(`github.com/Azure/ARO-RP/pkg/cluster.(*manager).logNodes-fm: null`), - }, - { - "level": gomega.Equal(logrus.InfoLevel), - "msg": gomega.Equal(`github.com/Azure/ARO-RP/pkg/cluster.(*manager).logClusterOperators-fm: null`), - }, - { - "level": gomega.Equal(logrus.InfoLevel), - "msg": gomega.Equal(`github.com/Azure/ARO-RP/pkg/cluster.(*manager).logIngressControllers-fm: null`), - }, - }, - kubernetescli: fake.NewSimpleClientset(), - configcli: configfake.NewSimpleClientset(), - operatorcli: operatorfake.NewSimpleClientset(), - }, - } { - t.Run(tt.name, func(t *testing.T) { - controller := gomock.NewController(t) - defer controller.Finish() - - h, log := testlog.New() - m := &manager{ - log: log, - kubernetescli: tt.kubernetescli, - configcli: tt.configcli, - operatorcli: tt.operatorcli, - now: func() time.Time { return time.Now() }, - } - - err := m.runSteps(ctx, tt.steps, "") - utilerror.AssertErrorMessage(t, err, tt.wantErr) +func (m *manager) containerFinished(context.Context) (bool, bool, error) { + containerName := "installer-" + m.clusterUUID + inspectData, err := containers.Inspect(m.conn, containerName, nil) + if err != nil { + return false, false, err + } - err = testlog.AssertLoggingOutput(h, tt.wantEntries) - if err != nil { - t.Error(err) - } - }) + if inspectData.State.Status == "exited" || inspectData.State.Status == "stopped" { + if inspectData.State.ExitCode != 0 { + getContainerLogs(m.conn, m.log, containerName) + return true, false, fmt.Errorf("container exited with %d", inspectData.State.ExitCode) + } + m.success = true + return true, false, nil } + return false, true, nil } -func TestUpdateProvisionedBy(t *testing.T) { - ctx := context.Background() - key := "/subscriptions/00000000-0000-0000-0000-000000000000/resourcegroups/resourceGroup/providers/Microsoft.RedHatOpenShift/openShiftClusters/resourceName1" - - openShiftClustersDatabase, _ := testdatabase.NewFakeOpenShiftClusters() - fixture := testdatabase.NewFixture().WithOpenShiftClusters(openShiftClustersDatabase) - fixture.AddOpenShiftClusterDocuments(&api.OpenShiftClusterDocument{ - Key: strings.ToLower(key), - OpenShiftCluster: &api.OpenShiftCluster{ - ID: key, - Properties: api.OpenShiftClusterProperties{ - ProvisioningState: api.ProvisioningStateCreating, - }, - }, - }) - err := fixture.Create() +func (m *manager) createSecrets(ctx context.Context, doc *api.OpenShiftClusterDocument, sub *api.SubscriptionDocument) error { + encCluster, err := json.Marshal(doc.OpenShiftCluster) if err != nil { - t.Fatal(err) + return err } - - clusterdoc, err := openShiftClustersDatabase.Dequeue(ctx) + _, err = secrets.Create( + m.conn, bytes.NewBuffer(encCluster), + (&secrets.CreateOptions{}).WithName(m.clusterUUID+"-99_aro.json")) if err != nil { - t.Fatal(err) + return err } - i := &manager{ - doc: clusterdoc, - db: openShiftClustersDatabase, - } - err = i.updateProvisionedBy(ctx) + encSub, err := json.Marshal(sub.Subscription) if err != nil { - t.Error(err) + return err } - - updatedClusterDoc, err := openShiftClustersDatabase.Get(ctx, strings.ToLower(key)) + _, err = secrets.Create( + m.conn, bytes.NewBuffer(encSub), + (&secrets.CreateOptions{}).WithName(m.clusterUUID+"-99_sub.json")) if err != nil { - t.Fatal(err) - } - if updatedClusterDoc.OpenShiftCluster.Properties.ProvisionedBy != version.GitCommit { - t.Error("version was not added") + return err } -} - -func TestInstallationTimeMetrics(t *testing.T) { - _, log := testlog.New() - fm := newfakeMetricsEmitter() - - for _, tt := range []struct { - name string - metricsTopic string - timePerStep int64 - steps []steps.Step - wantedMetrics map[string]int64 - }{ - { - name: "Failed step run will not generate any install time metrics", - metricsTopic: "install", - steps: []steps.Step{ - steps.Action(successfulActionStep), - steps.Action(failingFunc), - }, - }, - { - name: "Succeeded step run for cluster installation will generate a valid install time metrics", - metricsTopic: "install", - timePerStep: 2, - steps: []steps.Step{ - steps.Action(successfulActionStep), - steps.Condition(successfulConditionStep, 30*time.Minute, true), - steps.Action(successfulActionStep), - }, - wantedMetrics: map[string]int64{ - "backend.openshiftcluster.install.duration.total.seconds": 4, - "backend.openshiftcluster.install.action.successfulActionStep.duration.seconds": 2, - "backend.openshiftcluster.install.condition.successfulConditionStep.duration.seconds": 2, - }, - }, - { - name: "Succeeded step run for cluster update will generate a valid install time metrics", - metricsTopic: "update", - timePerStep: 3, - steps: []steps.Step{ - steps.Action(successfulActionStep), - steps.Condition(successfulConditionStep, 30*time.Minute, true), - steps.Action(successfulActionStep), - }, - wantedMetrics: map[string]int64{ - "backend.openshiftcluster.update.duration.total.seconds": 6, - "backend.openshiftcluster.update.action.successfulActionStep.duration.seconds": 3, - "backend.openshiftcluster.update.condition.successfulConditionStep.duration.seconds": 3, - }, - }, - } { - t.Run(tt.name, func(t *testing.T) { - ctx := context.Background() - m := &manager{ - log: log, - metricsEmitter: fm, - now: func() time.Time { return time.Now().Add(time.Duration(tt.timePerStep) * time.Second) }, - } - err := m.runSteps(ctx, tt.steps, tt.metricsTopic) - if err != nil { - if len(fm.Metrics) != 0 { - t.Error("fake metrics obj should be empty when run steps failed") - } - } else { - if tt.wantedMetrics != nil { - for k, v := range tt.wantedMetrics { - time, ok := fm.Metrics[k] - if !ok { - t.Errorf("unexpected metrics key: %s", k) - } - if time != v { - t.Errorf("incorrect fake metrics value, want: %d, got: %d", v, time) - } - } - } - } - }) + basepath := os.Getenv("ARO_CHECKOUT_PATH") + if basepath == "" { + // This assumes we are running from an ARO-RP checkout in development + var err error + _, curmod, _, _ := runtime.Caller(0) + basepath, err = filepath.Abs(filepath.Join(filepath.Dir(curmod), "../..")) + if err != nil { + return err + } } -} - -func TestRunHiveInstallerSetsCreatedByHiveFieldToTrueInClusterDoc(t *testing.T) { - ctx := context.Background() - key := "/subscriptions/00000000-0000-0000-0000-000000000000/resourcegroups/resourceGroup/providers/Microsoft.RedHatOpenShift/openShiftClusters/resourceName1" - openShiftClustersDatabase, _ := testdatabase.NewFakeOpenShiftClusters() - fixture := testdatabase.NewFixture().WithOpenShiftClusters(openShiftClustersDatabase) - doc := &api.OpenShiftClusterDocument{ - Key: strings.ToLower(key), - OpenShiftCluster: &api.OpenShiftCluster{ - ID: key, - Properties: api.OpenShiftClusterProperties{ - ProvisioningState: api.ProvisioningStateCreating, - }, - }, + err = m.secretFromFile(filepath.Join(basepath, "secrets/proxy.crt"), "proxy.crt") + if err != nil { + return err } - fixture.AddOpenShiftClusterDocuments(doc) - err := fixture.Create() + err = m.secretFromFile(filepath.Join(basepath, "secrets/proxy-client.crt"), "proxy-client.crt") if err != nil { - t.Fatal(err) + return err } - dequeuedDoc, err := openShiftClustersDatabase.Dequeue(ctx) + err = m.secretFromFile(filepath.Join(basepath, "secrets/proxy-client.key"), "proxy-client.key") if err != nil { - t.Fatal(err) + return err } - controller := gomock.NewController(t) - defer controller.Finish() - - hiveClusterManagerMock := mock_hive.NewMockClusterManager(controller) - hiveClusterManagerMock.EXPECT().Install(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()) + return nil +} - m := &manager{ - doc: dequeuedDoc, - db: openShiftClustersDatabase, - openShiftClusterDocumentVersioner: &FakeOpenShiftClusterDocumentVersionerService{ - expectedOpenShiftVersion: nil, - expectedError: nil, - }, - hiveClusterManager: hiveClusterManagerMock, +func (m *manager) secretFromFile(from, name string) error { + f, err := os.Open(from) + if err != nil { + return err } - err = m.runHiveInstaller(ctx) - if err != nil { - t.Fatal(err) + _, err = secrets.Create( + m.conn, f, + (&secrets.CreateOptions{}).WithName(m.clusterUUID+"-"+name)) + return err +} + +func (m *manager) cleanupContainers(ctx context.Context) error { + containerName := "installer-" + m.clusterUUID + + if !m.success { + m.log.Infof("cleaning up failed container %s", containerName) + getContainerLogs(m.conn, m.log, containerName) } - updatedDoc, err := openShiftClustersDatabase.Get(ctx, strings.ToLower(key)) + _, err := containers.Remove( + m.conn, containerName, + (&containers.RemoveOptions{}).WithForce(true).WithIgnore(true)) if err != nil { - t.Fatal(err) + m.log.Errorf("unable to remove container: %v", err) } - expected := true - got := updatedDoc.OpenShiftCluster.Properties.HiveProfile.CreatedByHive - if got != expected { - t.Fatalf("expected updatedDoc.OpenShiftCluster.Properties.HiveProfile.CreatedByHive set to %v, but got %v", expected, got) + for _, secretName := range []string{"99_aro.json", "99_sub.json", "proxy.crt", "proxy-client.crt", "proxy-client.key"} { + err = secrets.Remove(m.conn, m.clusterUUID+"-"+secretName) + if err != nil { + m.log.Debugf("unable to remove secret %s: %v", m.clusterUUID+"-"+secretName, err) + } } + return nil } diff --git a/pkg/containerinstall/install.go b/pkg/containerinstall/install.go index 40eca69e8ab..3f1b004f307 100644 --- a/pkg/containerinstall/install.go +++ b/pkg/containerinstall/install.go @@ -110,22 +110,22 @@ func (m *manager) startContainer(ctx context.Context, version *api.OpenShiftVers return err } -func (m *manager) containerFinished(context.Context) (bool, error) { +func (m *manager) containerFinished(context.Context) (bool, bool, error) { containerName := "installer-" + m.clusterUUID inspectData, err := containers.Inspect(m.conn, containerName, nil) if err != nil { - return false, err + return false, false, err } if inspectData.State.Status == "exited" || inspectData.State.Status == "stopped" { if inspectData.State.ExitCode != 0 { getContainerLogs(m.conn, m.log, containerName) - return true, fmt.Errorf("container exited with %d", inspectData.State.ExitCode) + return true, false, fmt.Errorf("container exited with %d", inspectData.State.ExitCode) } m.success = true - return true, nil + return true, false, nil } - return false, nil + return false, true, nil } func (m *manager) createSecrets(ctx context.Context, doc *api.OpenShiftClusterDocument, sub *api.SubscriptionDocument) error { diff --git a/pkg/frontend/admin_openshiftcluster_etcdcertificaterenew.go b/pkg/frontend/admin_openshiftcluster_etcdcertificaterenew.go index 8889655651b..3787eccc148 100644 --- a/pkg/frontend/admin_openshiftcluster_etcdcertificaterenew.go +++ b/pkg/frontend/admin_openshiftcluster_etcdcertificaterenew.go @@ -436,24 +436,24 @@ func (e *etcdrenew) deleteEtcdSecrets(ctx context.Context) error { } // Checks if the new revision is put on the etcd and validates if all the nodes are running the same revision -func (e *etcdrenew) isEtcdRevised(ctx context.Context) (bool, error) { +func (e *etcdrenew) isEtcdRevised(ctx context.Context) (etcdCheck bool, retry bool, err error) { isAtRevision := true rawEtcd, err := e.k.KubeGet(ctx, "etcd.operator.openshift.io", "", "cluster") if err != nil { e.log.Warnf(err.Error()) - return false, nil + return false, true, nil } etcd := &operatorv1.Etcd{} err = codec.NewDecoderBytes(rawEtcd, &codec.JsonHandle{}).Decode(etcd) if err != nil { e.log.Warnf(err.Error()) - return false, nil + return false, true, nil } // no new revision is observed. if e.lastRevision == etcd.Status.LatestAvailableRevision { e.log.Infof("last revision is %d, latest available revision is %d", e.lastRevision, etcd.Status.LatestAvailableRevision) - return false, nil + return false, true, nil } for _, s := range etcd.Status.NodeStatuses { e.log.Infof("Current Revision for node %s is %d, expected revision is %d", s.NodeName, s.CurrentRevision, etcd.Status.LatestAvailableRevision) @@ -463,7 +463,7 @@ func (e *etcdrenew) isEtcdRevised(ctx context.Context) (bool, error) { } } - return isAtRevision, nil + return isAtRevision, true, nil } // Applies the backedup etcd secret and applies them on the cluster diff --git a/pkg/util/steps/condition.go b/pkg/util/steps/condition.go index d4af64ea77a..7a449ef96a6 100644 --- a/pkg/util/steps/condition.go +++ b/pkg/util/steps/condition.go @@ -40,7 +40,7 @@ var timeoutConditionErrors = map[string]string{ // condition has been met and an error. // // Suitable for polling external sources for readiness. -type conditionFunction func(context.Context) (bool, error) +type conditionFunction func(context.Context) (bool, bool, error) // Condition returns a Step suitable for checking whether subsequent Steps can // be executed. @@ -86,8 +86,8 @@ func (c conditionStep) run(ctx context.Context, log *logrus.Entry) error { // We use the outer context, not the timeout context, as we do not want // to time out the condition function itself, only stop retrying once // timeoutCtx's timeout has fired. - cnd, cndErr := c.f(ctx) - if errors.Is(cndErr, wait.ErrWaitTimeout) { + cnd, retry, cndErr := c.f(ctx) + if errors.Is(cndErr, wait.ErrWaitTimeout) && !retry { return cnd, fmt.Errorf("condition encountered internal timeout: %w", cndErr) } diff --git a/pkg/util/steps/condition_test.go b/pkg/util/steps/condition_test.go index f6466b59b43..f37bb007f36 100644 --- a/pkg/util/steps/condition_test.go +++ b/pkg/util/steps/condition_test.go @@ -11,17 +11,19 @@ import ( // functionnames that will be used in the conditionFunction below // All the keys of map timeoutConditionErrors -func attachNSGs(context.Context) (bool, error) { return false, nil } -func apiServersReady(context.Context) (bool, error) { return false, nil } -func minimumWorkerNodesReady(context.Context) (bool, error) { return false, nil } -func operatorConsoleExists(context.Context) (bool, error) { return false, nil } -func operatorConsoleReady(context.Context) (bool, error) { return false, nil } -func clusterVersionReady(context.Context) (bool, error) { return false, nil } -func ingressControllerReady(context.Context) (bool, error) { return false, nil } -func aroDeploymentReady(context.Context) (bool, error) { return false, nil } -func ensureAROOperatorRunningDesiredVersion(context.Context) (bool, error) { return false, nil } -func hiveClusterDeploymentReady(context.Context) (bool, error) { return false, nil } -func hiveClusterInstallationComplete(context.Context) (bool, error) { return false, nil } +func attachNSGs(context.Context) (bool, bool, error) { return false, false, nil } +func apiServersReady(context.Context) (bool, bool, error) { return false, false, nil } +func minimumWorkerNodesReady(context.Context) (bool, bool, error) { return false, false, nil } +func operatorConsoleExists(context.Context) (bool, bool, error) { return false, false, nil } +func operatorConsoleReady(context.Context) (bool, bool, error) { return false, false, nil } +func clusterVersionReady(context.Context) (bool, bool, error) { return false, false, nil } +func ingressControllerReady(context.Context) (bool, bool, error) { return false, false, nil } +func aroDeploymentReady(context.Context) (bool, bool, error) { return false, false, nil } +func ensureAROOperatorRunningDesiredVersion(context.Context) (bool, bool, error) { + return false, false, nil +} +func hiveClusterDeploymentReady(context.Context) (bool, bool, error) { return false, false, nil } +func hiveClusterInstallationComplete(context.Context) (bool, bool, error) { return false, false, nil } func TestEnrichConditionTimeoutError(t *testing.T) { for _, tt := range []struct { diff --git a/pkg/util/steps/runner.go b/pkg/util/steps/runner.go index c2666dda07c..eff434336db 100644 --- a/pkg/util/steps/runner.go +++ b/pkg/util/steps/runner.go @@ -5,65 +5,275 @@ package steps import ( "context" - "reflect" - "runtime" - "strings" + "errors" + "testing" "time" - "github.com/davecgh/go-spew/spew" + "github.com/golang/mock/gomock" + "github.com/onsi/gomega" + "github.com/onsi/gomega/types" "github.com/sirupsen/logrus" + "k8s.io/apimachinery/pkg/util/wait" - msgraph_errors "github.com/Azure/ARO-RP/pkg/util/graph/graphsdk/models/odataerrors" + utilerror "github.com/Azure/ARO-RP/test/util/error" + testlog "github.com/Azure/ARO-RP/test/util/log" ) -// FriendlyName returns a "friendly" stringified name of the given func. -func FriendlyName(f interface{}) string { - return runtime.FuncForPC(reflect.ValueOf(f).Pointer()).Name() +func successfulFunc(context.Context) error { return nil } +func failingFunc(context.Context) error { return errors.New("oh no!") } +func alwaysFalseCondition(context.Context) (bool, bool, error) { return false, true, nil } +func alwaysTrueCondition(context.Context) (bool, bool, error) { return true, false, nil } +func timingOutCondition(ctx context.Context) (bool, bool, error) { + time.Sleep(60 * time.Millisecond) + return false, true, nil } - -func shortName(fullName string) string { - sepCheck := func(c rune) bool { - return c == '/' || c == '.' - } - - fields := strings.FieldsFunc(strings.TrimSpace(fullName), sepCheck) - - if size := len(fields); size > 0 { - return fields[size-1] - } - return fullName +func internalTimeoutCondition(ctx context.Context) (bool, bool, error) { + return false, false, wait.ErrWaitTimeout } -// Step is the interface for steps that Runner can execute. -type Step interface { - run(ctx context.Context, log *logrus.Entry) error - String() string - metricsName() string +func currentTimeFunc() time.Time { + return time.Now() } -// Run executes the provided steps in order until one fails or all steps -// are completed. Errors from failed steps are returned directly. -// time cost for each step run will be recorded for metrics usage -func Run(ctx context.Context, log *logrus.Entry, pollInterval time.Duration, steps []Step, now func() time.Time) (map[string]int64, error) { - stepTimeRun := make(map[string]int64) - for _, step := range steps { - log.Infof("running step %s", step) +func TestStepRunner(t *testing.T) { + for _, tt := range []struct { + name string + steps func(*gomock.Controller) []Step + wantEntries []map[string]types.GomegaMatcher + wantErr string + }{ + { + name: "All successful Actions will have a successful run", + steps: func(controller *gomock.Controller) []Step { + return []Step{ + Action(successfulFunc), + Action(successfulFunc), + Action(successfulFunc), + } + }, + wantEntries: []map[string]types.GomegaMatcher{ + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.successfulFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.successfulFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.successfulFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + }, + }, + { + name: "A failing Action will fail the run", + steps: func(controller *gomock.Controller) []Step { + return []Step{ + Action(successfulFunc), + Action(failingFunc), + Action(successfulFunc), + } + }, + wantEntries: []map[string]types.GomegaMatcher{ + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.successfulFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.failingFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal(`step [Action github.com/Azure/ARO-RP/pkg/util/steps.failingFunc] encountered error: oh no!`), + "level": gomega.Equal(logrus.ErrorLevel), + }, + }, + wantErr: `oh no!`, + }, + { + name: "A successful condition will allow steps to continue", + steps: func(controller *gomock.Controller) []Step { + return []Step{ + Action(successfulFunc), + Condition(alwaysTrueCondition, 50*time.Millisecond, true), + Action(successfulFunc), + } + }, + wantEntries: []map[string]types.GomegaMatcher{ + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.successfulFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("running step [Condition github.com/Azure/ARO-RP/pkg/util/steps.alwaysTrueCondition, timeout 50ms]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.successfulFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + }, + }, + { + name: "A failed condition with fail=false will allow steps to continue", + steps: func(controller *gomock.Controller) []Step { + return []Step{ + Action(successfulFunc), + Condition(alwaysFalseCondition, 50*time.Millisecond, false), + Action(successfulFunc), + } + }, + wantEntries: []map[string]types.GomegaMatcher{ + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.successfulFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("running step [Condition github.com/Azure/ARO-RP/pkg/util/steps.alwaysFalseCondition, timeout 50ms]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("step [Condition github.com/Azure/ARO-RP/pkg/util/steps.alwaysFalseCondition, timeout 50ms] failed but has configured 'fail=false'. Continuing. Error: timed out waiting for the condition"), + "level": gomega.Equal(logrus.WarnLevel), + }, + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.successfulFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + }, + }, + { + name: "A timed out Condition causes a failure", + steps: func(controller *gomock.Controller) []Step { + return []Step{ + Action(successfulFunc), + &conditionStep{ + f: timingOutCondition, + fail: true, + pollInterval: 20 * time.Millisecond, + timeout: 50 * time.Millisecond, + }, + Action(successfulFunc), + } + }, + wantEntries: []map[string]types.GomegaMatcher{ + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.successfulFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("running step [Condition github.com/Azure/ARO-RP/pkg/util/steps.timingOutCondition, timeout 50ms]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("step [Condition github.com/Azure/ARO-RP/pkg/util/steps.timingOutCondition, timeout 50ms] encountered error: timed out waiting for the condition"), + "level": gomega.Equal(logrus.ErrorLevel), + }, + }, + wantErr: "timed out waiting for the condition", + }, + { + name: "A Condition that returns a timeout error causes a different failure from a timed out Condition", + steps: func(controller *gomock.Controller) []Step { + return []Step{ + Action(successfulFunc), + &conditionStep{ + f: internalTimeoutCondition, + fail: true, + pollInterval: 20 * time.Millisecond, + timeout: 50 * time.Millisecond, + }, + Action(successfulFunc), + } + }, + wantEntries: []map[string]types.GomegaMatcher{ + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.successfulFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("running step [Condition github.com/Azure/ARO-RP/pkg/util/steps.internalTimeoutCondition, timeout 50ms]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("step [Condition github.com/Azure/ARO-RP/pkg/util/steps.internalTimeoutCondition, timeout 50ms] encountered error: condition encountered internal timeout: timed out waiting for the condition"), + "level": gomega.Equal(logrus.ErrorLevel), + }, + }, + wantErr: "condition encountered internal timeout: timed out waiting for the condition", + }, + { + name: "A Condition that does not return true in the timeout time causes a failure", + steps: func(controller *gomock.Controller) []Step { + return []Step{ + Action(successfulFunc), + Condition(alwaysFalseCondition, 50*time.Millisecond, true), + Action(successfulFunc), + } + }, + wantEntries: []map[string]types.GomegaMatcher{ + { + "msg": gomega.Equal("running step [Action github.com/Azure/ARO-RP/pkg/util/steps.successfulFunc]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("running step [Condition github.com/Azure/ARO-RP/pkg/util/steps.alwaysFalseCondition, timeout 50ms]"), + "level": gomega.Equal(logrus.InfoLevel), + }, + { + "msg": gomega.Equal("step [Condition github.com/Azure/ARO-RP/pkg/util/steps.alwaysFalseCondition, timeout 50ms] encountered error: timed out waiting for the condition"), + "level": gomega.Equal(logrus.ErrorLevel), + }, + }, + wantErr: "timed out waiting for the condition", + }, + } { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + controller := gomock.NewController(t) + defer controller.Finish() - startTime := time.Now() - err := step.run(ctx, log) + h, log := testlog.New() + steps := tt.steps(controller) - if err != nil { - log.Errorf("step %s encountered error: %s", step, err.Error()) - if oDataError, ok := err.(msgraph_errors.ODataErrorable); ok { - spew.Fdump(log.Writer(), oDataError.GetErrorEscaped()) + _, err := Run(ctx, log, 25*time.Millisecond, steps, currentTimeFunc) + utilerror.AssertErrorMessage(t, err, tt.wantErr) + + err = testlog.AssertLoggingOutput(h, tt.wantEntries) + if err != nil { + t.Error(err) } - return nil, err - } + }) + } +} - if now != nil { - currentTime := now() - stepTimeRun[step.metricsName()] = int64(currentTime.Sub(startTime).Seconds()) - } +func TestStepMetricsNameFormatting(t *testing.T) { + for _, tt := range []struct { + desc string + step Step + want string + }{ + { + desc: "test action step naming", + step: Action(successfulFunc), + want: "action.successfulFunc", + }, + { + desc: "test condition step naming", + step: Condition(alwaysTrueCondition, 1*time.Millisecond, true), + want: "condition.alwaysTrueCondition", + }, + { + desc: "test anonymous action step naming", + step: Action(func(context.Context) error { return nil }), + want: "action.func1", + }, + } { + t.Run(tt.desc, func(t *testing.T) { + if got := tt.step.metricsName(); got != tt.want { + t.Errorf("incorrect step metrics name, want: %s, got: %s", tt.want, got) + } + }) } - return stepTimeRun, nil }