From 1301b2bf7ebbbaf345ec9f5f01ad8d22730b4ddd Mon Sep 17 00:00:00 2001 From: Derek Whatley Date: Thu, 1 Apr 2021 13:33:56 -0400 Subject: [PATCH 1/3] Log abnormal events for things we are waiting on --- pkg/controller/directvolumemigration/rsync.go | 15 ++++ .../directvolumemigration/stunnel.go | 9 ++ pkg/controller/migmigration/hooks.go | 10 +++ pkg/controller/migmigration/pod.go | 15 ++++ pkg/controller/migmigration/stage.go | 9 ++ pkg/controller/migmigration/validation.go | 11 +++ pkg/event/event.go | 87 +++++++++++++++++++ 7 files changed, 156 insertions(+) create mode 100644 pkg/event/event.go diff --git a/pkg/controller/directvolumemigration/rsync.go b/pkg/controller/directvolumemigration/rsync.go index 404c7a2e3..e13cb7149 100644 --- a/pkg/controller/directvolumemigration/rsync.go +++ b/pkg/controller/directvolumemigration/rsync.go @@ -20,6 +20,7 @@ import ( liberr "github.com/konveyor/controller/pkg/error" migapi "github.com/konveyor/mig-controller/pkg/apis/migration/v1alpha1" "github.com/konveyor/mig-controller/pkg/compat" + migevent "github.com/konveyor/mig-controller/pkg/event" "github.com/konveyor/mig-controller/pkg/settings" routev1 "github.com/openshift/api/route/v1" "golang.org/x/crypto/ssh" @@ -139,6 +140,13 @@ func (t *Task) areRsyncTransferPodsRunning() (bool, error) { } for _, pod := range pods.Items { if pod.Status.Phase != corev1.PodRunning { + // Log abnormal events for Rsync transfer Pod if any are found + migevent.LogAbnormalEventsForResource( + destClient, t.Log, + "Found abnormal event for Rsync transfer Pod on destination cluster", + types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, + "pod") + for _, podCond := range pod.Status.Conditions { if podCond.Reason == corev1.PodReasonUnschedulable { t.Log.Info("Found UNSCHEDULABLE Rsync Transfer Pod on destination cluster", @@ -756,6 +764,13 @@ func (t *Task) areRsyncRoutesAdmitted() (bool, []string, error) { if err != nil { return false, messages, err } + // Logs abnormal events related to route if any are found + migevent.LogAbnormalEventsForResource( + destClient, t.Log, + "Found abnormal event for Rsync Route on destination cluster", + types.NamespacedName{Namespace: route.Namespace, Name: route.Name}, + "route") + admitted := false message := "no status condition available for the route" // Check if we can find the admitted condition for the route diff --git a/pkg/controller/directvolumemigration/stunnel.go b/pkg/controller/directvolumemigration/stunnel.go index 5cefbd4d3..fb2c7ab51 100644 --- a/pkg/controller/directvolumemigration/stunnel.go +++ b/pkg/controller/directvolumemigration/stunnel.go @@ -9,6 +9,7 @@ import ( "text/template" liberr "github.com/konveyor/controller/pkg/error" + migevent "github.com/konveyor/mig-controller/pkg/event" "github.com/konveyor/mig-controller/pkg/settings" "gopkg.in/yaml.v2" @@ -23,6 +24,7 @@ import ( "time" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" //"k8s.io/apimachinery/pkg/types" @@ -611,6 +613,13 @@ func (t *Task) areStunnelClientPodsRunning() (bool, error) { } for _, pod := range pods.Items { if pod.Status.Phase != corev1.PodRunning { + // Logs abnormal events for Stunnel Pod if any are found + migevent.LogAbnormalEventsForResource( + srcClient, t.Log, + "Found abnormal event for Stunnel Client Pod on source cluster", + types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, + "pod") + for _, podCond := range pod.Status.Conditions { if podCond.Reason == corev1.PodReasonUnschedulable { t.Log.Info("Found UNSCHEDULABLE Stunnel Client Pod "+ diff --git a/pkg/controller/migmigration/hooks.go b/pkg/controller/migmigration/hooks.go index 6347a106a..47db2cf1b 100644 --- a/pkg/controller/migmigration/hooks.go +++ b/pkg/controller/migmigration/hooks.go @@ -11,6 +11,7 @@ import ( liberr "github.com/konveyor/controller/pkg/error" migapi "github.com/konveyor/mig-controller/pkg/apis/migration/v1alpha1" + migevent "github.com/konveyor/mig-controller/pkg/event" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -153,6 +154,15 @@ func (t *Task) stopHookJobs() (bool, error) { func (t *Task) ensureJob(job *batchv1.Job, hook migapi.MigPlanHook, migHook migapi.MigHook, client k8sclient.Client) (bool, error) { runningJob, err := migHook.GetPhaseJob(client, hook.Phase, string(t.Owner.UID)) + if runningJob != nil { + // Logs abnormal events for Hook Jobs if any are found + migevent.LogAbnormalEventsForResource( + client, t.Log, + "Found abnormal event for Hook Job", + types.NamespacedName{Namespace: runningJob.Namespace, Name: runningJob.Name}, + "job") + } + if runningJob == nil && err == nil { err = client.Create(context.TODO(), job) if err != nil { diff --git a/pkg/controller/migmigration/pod.go b/pkg/controller/migmigration/pod.go index 96ac88320..5d8e76bf8 100644 --- a/pkg/controller/migmigration/pod.go +++ b/pkg/controller/migmigration/pod.go @@ -7,6 +7,7 @@ import ( liberr "github.com/konveyor/controller/pkg/error" migapi "github.com/konveyor/mig-controller/pkg/apis/migration/v1alpha1" pvdr "github.com/konveyor/mig-controller/pkg/cloudprovider" + migevent "github.com/konveyor/mig-controller/pkg/event" "github.com/konveyor/mig-controller/pkg/pods" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -147,6 +148,13 @@ func (t *Task) haveResticPodsStarted() (bool, error) { } for _, pod := range list.Items { + // Logs abnormal events for Restic Pods if any are found + migevent.LogAbnormalEventsForResource( + client, t.Log, + "Found abnormal event for Restic Pod", + types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, + "pod") + if pod.DeletionTimestamp != nil { t.Log.Info("Deletion timestamp found on Restic Pod, "+ "Pod is in the process of deleting. Requeuing and waiting for restart.", @@ -265,6 +273,13 @@ func (t *Task) haveVeleroPodsStarted() (bool, error) { } for _, pod := range list.Items { + // Logs abnormal events for Velero Pod if any are found + migevent.LogAbnormalEventsForResource( + client, t.Log, + "Found abnormal event for Velero Pod", + types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, + "pod") + if pod.DeletionTimestamp != nil { t.Log.Info("Found Velero Pod with deletion timestamp."+ " Requeuing and waiting for Pod to finish deleting and restart.", diff --git a/pkg/controller/migmigration/stage.go b/pkg/controller/migmigration/stage.go index ea1548e26..02f561179 100644 --- a/pkg/controller/migmigration/stage.go +++ b/pkg/controller/migmigration/stage.go @@ -14,6 +14,7 @@ import ( liberr "github.com/konveyor/controller/pkg/error" migapi "github.com/konveyor/mig-controller/pkg/apis/migration/v1alpha1" + migevent "github.com/konveyor/mig-controller/pkg/event" migpods "github.com/konveyor/mig-controller/pkg/pods" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -517,6 +518,14 @@ func (t *Task) stagePodReport(client k8sclient.Client) (report PodStartReport, e for _, pod := range podList.Items { t.Log.V(4).Info("Checking if Stage Pod is healthy.", "pod", path.Join(pod.Namespace, pod.Name)) + + // Logs abnormal events for Stage Pods if any are found + migevent.LogAbnormalEventsForResource( + client, t.Log, + "Found abnormal event for Stage Pod", + types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, + "pod") + initReady := true for _, c := range pod.Status.InitContainerStatuses { // If the init contianer is waiting, then nothing can happen. diff --git a/pkg/controller/migmigration/validation.go b/pkg/controller/migmigration/validation.go index bff9940cb..f4830a9af 100644 --- a/pkg/controller/migmigration/validation.go +++ b/pkg/controller/migmigration/validation.go @@ -10,9 +10,11 @@ import ( liberr "github.com/konveyor/controller/pkg/error" migapi "github.com/konveyor/mig-controller/pkg/apis/migration/v1alpha1" "github.com/konveyor/mig-controller/pkg/compat" + migevent "github.com/konveyor/mig-controller/pkg/event" migref "github.com/konveyor/mig-controller/pkg/reference" corev1 "k8s.io/api/core/v1" k8sLabels "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -298,6 +300,15 @@ func ensureRegistryHealth(c k8sclient.Client, migration *migapi.MigMigration) (i return nEnsured, "", liberr.Wrap(err) } + for _, registryPod := range registryPods.Items { + // Logs abnormal events for Registry Pods if any are found + migevent.LogAbnormalEventsForResource( + client, log, + "Found abnormal event for Registry Pod", + types.NamespacedName{Namespace: registryPod.Namespace, Name: registryPod.Name}, + "pod") + } + registryPodCount := len(registryPods.Items) if registryPodCount < 1 { unHealthyClusterName = cluster.ObjectMeta.Name diff --git a/pkg/event/event.go b/pkg/event/event.go new file mode 100644 index 000000000..8deab9784 --- /dev/null +++ b/pkg/event/event.go @@ -0,0 +1,87 @@ +package event + +import ( + "context" + "path" + "strings" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + k8sclient "sigs.k8s.io/controller-runtime/pkg/client" +) + +// GetAbnormalEventsForResource gets unique events of non-normal type for +// a namespaced resource. Useful for logging the most relevant events +// related to a resource we're waiting on. +func GetAbnormalEventsForResource(client client.Client, + nsName types.NamespacedName, resourceKind string) ([]corev1.Event, error) { + uniqueEventMap := make(map[string]corev1.Event) + + eList := corev1.EventList{} + options := k8sclient.InNamespace(nsName.Namespace) + err := client.List(context.TODO(), options, &eList) + if err != nil { + return nil, err + } + for _, event := range eList.Items { + // Only want events for the kind indicated + if strings.ToLower(event.InvolvedObject.Kind) != strings.ToLower(resourceKind) { + continue + } + // Only get events for the resource.name we're interested in + if event.InvolvedObject.Name != nsName.Name { + continue + } + // Only get abnormal events + if event.Type == "Normal" { + continue + } + // Check if same event reason has already been seen, replace if timestamp is newer + eventFromMap, ok := uniqueEventMap[event.Message] + if !ok { + uniqueEventMap[event.Reason] = event + continue + } + // Found event in map. Overwrite it if this one is newer. + if eventFromMap.ObjectMeta.CreationTimestamp.Time. + Before(event.ObjectMeta.CreationTimestamp.Time) { + uniqueEventMap[event.Reason] = event + } + } + // Turn map into slice of events + matchingEvents := []corev1.Event{} + for _, event := range uniqueEventMap { + matchingEvents = append(matchingEvents, event) + } + + return matchingEvents, err +} + +// LogAbnormalEventsForResource logs unique events of non-normal type for +// a namespaced resource. Useful for logging the most relevant events +// related to a resource we're waiting on. +// The message logged will match what is provided in 'message' +func LogAbnormalEventsForResource( + client client.Client, log logr.Logger, message string, nsName types.NamespacedName, resourceKind string) { + + relevantEvents, err := GetAbnormalEventsForResource(client, + types.NamespacedName{Name: nsName.Name, Namespace: nsName.Namespace}, resourceKind) + if err != nil { + log.Info("Error getting events", + "kind", resourceKind, + "resource", path.Join(nsName.Namespace, nsName.Name), + "error", err) + return + } + for _, rEvent := range relevantEvents { + log.Info(message, + resourceKind, path.Join(nsName.Namespace, nsName.Name), + "eventType", rEvent.Type, + "eventReason", rEvent.Reason, + "eventMessage", rEvent.Message, + "eventFirstTimestamp", rEvent.FirstTimestamp) + } + +} From 6392f848b52f1ec04a04816d629d21c12f49b8f2 Mon Sep 17 00:00:00 2001 From: Derek Whatley Date: Thu, 1 Apr 2021 13:48:02 -0400 Subject: [PATCH 2/3] Fix --- pkg/event/event.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/event/event.go b/pkg/event/event.go index 8deab9784..8ad60b3e0 100644 --- a/pkg/event/event.go +++ b/pkg/event/event.go @@ -39,7 +39,7 @@ func GetAbnormalEventsForResource(client client.Client, continue } // Check if same event reason has already been seen, replace if timestamp is newer - eventFromMap, ok := uniqueEventMap[event.Message] + eventFromMap, ok := uniqueEventMap[event.Reason] if !ok { uniqueEventMap[event.Reason] = event continue From 882a0e1d1ff7a2df8eac1eb651ad3de554e1cd0d Mon Sep 17 00:00:00 2001 From: Derek Whatley Date: Thu, 1 Apr 2021 13:49:11 -0400 Subject: [PATCH 3/3] Clarify comment --- pkg/event/event.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/event/event.go b/pkg/event/event.go index 8ad60b3e0..86e9d5743 100644 --- a/pkg/event/event.go +++ b/pkg/event/event.go @@ -38,7 +38,7 @@ func GetAbnormalEventsForResource(client client.Client, if event.Type == "Normal" { continue } - // Check if same event reason has already been seen, replace if timestamp is newer + // Check if same event reason has already been seen eventFromMap, ok := uniqueEventMap[event.Reason] if !ok { uniqueEventMap[event.Reason] = event