Skip to content

Commit

Permalink
Make logging more concise (#410)
Browse files Browse the repository at this point in the history
This makes logging more concise by changing logger names to something like `controllers.Runner` to `actions-runner-controller.runner` after the standard `controller-rutime.controller` and reducing redundant logs by removing unnecessary requeues. I have also tweaked log messages so that their style is more consistent, which will also help readability. Also, runnerreplicaset-controller lacked useful logs so I have enhanced it.
  • Loading branch information
mumoshu authored Mar 19, 2021
1 parent 07f822b commit bc6e499
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 43 deletions.
17 changes: 10 additions & 7 deletions controllers/runner_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,11 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
// Gracefully handle pod-already-exists errors due to informer cache delay.
// Without this we got a few errors like the below on new runner pod:
// 2021-03-16T00:23:10.116Z ERROR controller-runtime.controller Reconciler error {"controller": "runner-controller", "request": "default/example-runnerdeploy-b2g2g-j4mcp", "error": "pods \"example-runnerdeploy-b2g2g-j4mcp\" already exists"}
log.Info("Runner pod already exists. Probably this pod has been already created in previous reconcilation but the new pod is not yet cached.")
log.Info(
"Failed to create pod due to AlreadyExists error. Probably this pod has been already created in previous reconcilation but is still not in the informer cache. Will retry on pod created. If it doesn't repeat, there's no problem",
)

return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
return ctrl.Result{}, nil
}

log.Error(err, "Failed to create pod resource")
Expand All @@ -187,7 +189,7 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {

if deletionDidTimeout {
log.Info(
"Pod failed to delete itself in a timely manner. "+
fmt.Sprintf("Failed to delete pod within %s. ", deletionTimeout)+
"This is typically the case when a Kubernetes node became unreachable "+
"and the kube controller started evicting nodes. Forcefully deleting the pod to not get stuck.",
"podDeletionTimestamp", pod.DeletionTimestamp,
Expand Down Expand Up @@ -262,9 +264,12 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
if lastCheckTime := runner.Status.LastRegistrationCheckTime; lastCheckTime != nil {
nextCheckTime := lastCheckTime.Add(registrationCheckInterval)
now := time.Now()
if nextCheckTime.After(now) {
requeueAfter := nextCheckTime.Sub(now)

// Requeue scheduled by RequeueAfter can happen a bit earlier (like dozens of milliseconds)
// so to avoid excessive, in-effective retry, we heuristically ignore the remaining delay in case it is
// shorter than 1s
requeueAfter := nextCheckTime.Sub(now) - time.Second
if requeueAfter > 0 {
log.Info(
fmt.Sprintf("Skipped registration check because it's deferred until %s. Retrying in %s at latest", nextCheckTime, requeueAfter),
"lastRegistrationCheckTime", lastCheckTime,
Expand Down Expand Up @@ -795,8 +800,6 @@ func (r *RunnerReconciler) SetupWithManager(mgr ctrl.Manager) error {

r.Recorder = mgr.GetEventRecorderFor(name)

r.Recorder = mgr.GetEventRecorderFor(name)

return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.Runner{}).
Owns(&corev1.Pod{}).
Expand Down
27 changes: 19 additions & 8 deletions controllers/runnerdeployment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,17 +192,28 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
if len(oldSets) > 0 {
readyReplicas := newestSet.Status.ReadyReplicas

if readyReplicas < currentDesiredReplicas {
log.WithValues("runnerreplicaset", types.NamespacedName{
oldSetsCount := len(oldSets)

logWithDebugInfo := log.WithValues(
"newest_runnerreplicaset", types.NamespacedName{
Namespace: newestSet.Namespace,
Name: newestSet.Name,
}).
Info("Waiting until the newest runner replica set to be 100% available",
"ready", readyReplicas,
"desired", currentDesiredReplicas,
)
},
"newest_runnerreplicaset_replicas_ready", readyReplicas,
"newest_runnerreplicaset_replicas_desired", currentDesiredReplicas,
"old_runnerreplicasets_count", oldSetsCount,
)

if readyReplicas < currentDesiredReplicas {
logWithDebugInfo.
Info("Waiting until the newest runnerreplicaset to be 100% available")

return ctrl.Result{}, nil
}

return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
if oldSetsCount > 0 {
logWithDebugInfo.
Info("The newest runnerreplicaset is 100% available. Deleting old runnerreplicasets")
}

for i := range oldSets {
Expand Down
27 changes: 15 additions & 12 deletions controllers/runnerreplicaset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,14 @@ func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
desired = 1
}

log.V(0).Info("debug", "desired", desired, "available", available)

if available > desired {
n := available - desired

// get runners that are currently not busy
var notBusy []v1alpha1.Runner
log.V(0).Info(fmt.Sprintf("Deleting %d runners", n), "desired", desired, "available", available, "ready", ready)

// get runners that are currently offline/not busy/timed-out to register
var deletionCandidates []v1alpha1.Runner

for _, runner := range allRunners.Items {
busy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
if err != nil {
Expand Down Expand Up @@ -168,35 +169,37 @@ func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
"configuredRegistrationTimeout", registrationTimeout,
)

notBusy = append(notBusy, runner)
deletionCandidates = append(deletionCandidates, runner)
}

// offline runners should always be a great target for scale down
if offline {
notBusy = append(notBusy, runner)
deletionCandidates = append(deletionCandidates, runner)
}
} else if !busy {
notBusy = append(notBusy, runner)
deletionCandidates = append(deletionCandidates, runner)
}
}

if len(notBusy) < n {
n = len(notBusy)
if len(deletionCandidates) < n {
n = len(deletionCandidates)
}

for i := 0; i < n; i++ {
if err := r.Client.Delete(ctx, &notBusy[i]); client.IgnoreNotFound(err) != nil {
if err := r.Client.Delete(ctx, &deletionCandidates[i]); client.IgnoreNotFound(err) != nil {
log.Error(err, "Failed to delete runner resource")

return ctrl.Result{}, err
}

r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", notBusy[i].Name))
log.Info("Deleted runner", "runnerreplicaset", rs.ObjectMeta.Name)
r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", deletionCandidates[i].Name))
log.Info("Deleted runner")
}
} else if desired > available {
n := desired - available

log.V(0).Info(fmt.Sprintf("Creating %d runner(s)", n), "desired", desired, "available", available, "ready", ready)

for i := 0; i < n; i++ {
newRunner, err := r.newRunner(rs)
if err != nil {
Expand Down
32 changes: 16 additions & 16 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ const (
)

var (
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")
scheme = runtime.NewScheme()
log = ctrl.Log.WithName("actions-runner-controller")
)

func init() {
Expand Down Expand Up @@ -109,78 +109,78 @@ func main() {
Namespace: namespace,
})
if err != nil {
setupLog.Error(err, "unable to start manager")
log.Error(err, "unable to start manager")
os.Exit(1)
}

runnerReconciler := &controllers.RunnerReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("Runner"),
Log: log.WithName("runner"),
Scheme: mgr.GetScheme(),
GitHubClient: ghClient,
RunnerImage: runnerImage,
DockerImage: dockerImage,
}

if err = runnerReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Runner")
log.Error(err, "unable to create controller", "controller", "Runner")
os.Exit(1)
}

runnerSetReconciler := &controllers.RunnerReplicaSetReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("RunnerReplicaSet"),
Log: log.WithName("runnerreplicaset"),
Scheme: mgr.GetScheme(),
GitHubClient: ghClient,
}

if err = runnerSetReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "RunnerReplicaSet")
log.Error(err, "unable to create controller", "controller", "RunnerReplicaSet")
os.Exit(1)
}

runnerDeploymentReconciler := &controllers.RunnerDeploymentReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("RunnerDeployment"),
Log: log.WithName("runnerdeployment"),
Scheme: mgr.GetScheme(),
CommonRunnerLabels: commonRunnerLabels,
}

if err = runnerDeploymentReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "RunnerDeployment")
log.Error(err, "unable to create controller", "controller", "RunnerDeployment")
os.Exit(1)
}

horizontalRunnerAutoscaler := &controllers.HorizontalRunnerAutoscalerReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("HorizontalRunnerAutoscaler"),
Log: log.WithName("horizontalrunnerautoscaler"),
Scheme: mgr.GetScheme(),
GitHubClient: ghClient,
CacheDuration: syncPeriod - 10*time.Second,
}

if err = horizontalRunnerAutoscaler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "HorizontalRunnerAutoscaler")
log.Error(err, "unable to create controller", "controller", "HorizontalRunnerAutoscaler")
os.Exit(1)
}

if err = (&actionsv1alpha1.Runner{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "Runner")
log.Error(err, "unable to create webhook", "webhook", "Runner")
os.Exit(1)
}
if err = (&actionsv1alpha1.RunnerDeployment{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "RunnerDeployment")
log.Error(err, "unable to create webhook", "webhook", "RunnerDeployment")
os.Exit(1)
}
if err = (&actionsv1alpha1.RunnerReplicaSet{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "RunnerReplicaSet")
log.Error(err, "unable to create webhook", "webhook", "RunnerReplicaSet")
os.Exit(1)
}
// +kubebuilder:scaffold:builder

setupLog.Info("starting manager")
log.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
log.Error(err, "problem running manager")
os.Exit(1)
}
}
Expand Down

0 comments on commit bc6e499

Please sign in to comment.