Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unplanned Maintenance Signals #3060

Merged
merged 22 commits into from
Aug 25, 2023
51 changes: 24 additions & 27 deletions pkg/frontend/frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@ type frontend struct {
baseLog *logrus.Entry
env env.Interface

logMiddleware middleware.LogMiddleware
validateMiddleware middleware.ValidateMiddleware
m middleware.MetricsMiddleware
authMiddleware middleware.AuthMiddleware
apiVersionMiddleware middleware.ApiVersionValidator
logMiddleware middleware.LogMiddleware
validateMiddleware middleware.ValidateMiddleware
m middleware.MetricsMiddleware
authMiddleware middleware.AuthMiddleware
apiVersionMiddleware middleware.ApiVersionValidator
maintenanceMiddleware middleware.MaintenanceMiddleware

dbAsyncOperations database.AsyncOperations
dbClusterManagerConfiguration database.ClusterManagerConfigurations
Expand Down Expand Up @@ -152,6 +153,7 @@ func NewFrontend(ctx context.Context,
dbOpenShiftVersions: dbOpenShiftVersions,
apis: apis,
m: middleware.MetricsMiddleware{Emitter: m},
maintenanceMiddleware: middleware.MaintenanceMiddleware{Emitter: m},
aead: aead,
hiveClusterManager: hiveClusterManager,
kubeActionsFactory: kubeActionsFactory,
Expand Down Expand Up @@ -294,22 +296,17 @@ func (f *frontend) chiAuthenticatedRoutes(router chi.Router) {
})
r.Get("/supportedvmsizes", f.supportedvmsizes)

r.Route("/subscriptions/{subscriptionId}/resourcegroups/{resourceGroupName}/providers/{resourceProviderNamespace}/{resourceType}/{resourceName}/etcdrecovery",
func(r chi.Router) {
r.Post("/", f.postAdminOpenShiftClusterEtcdRecovery)
})

r.Route("/subscriptions/{subscriptionId}/resourcegroups/{resourceGroupName}/providers/{resourceProviderNamespace}/{resourceType}/{resourceName}/kubernetesobjects",
func(r chi.Router) {
r.Get("/", f.getAdminKubernetesObjects)
r.Post("/", f.postAdminKubernetesObjects)
r.Delete("/", f.deleteAdminKubernetesObjects)
},
)

r.Route("/subscriptions/{subscriptionId}", func(r chi.Router) {
r.Route("/resourcegroups/{resourceGroupName}/providers/{resourceProviderNamespace}/{resourceType}/{resourceName}", func(r chi.Router) {
r.Post("/approvecsr", f.postAdminOpenShiftClusterApproveCSR)
// Etcd recovery
r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/etcdrecovery", f.postAdminOpenShiftClusterEtcdRecovery)

// Kubernetes objects
r.Get("/kubernetesobjects", f.getAdminKubernetesObjects)
r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/kubernetesobjects", f.postAdminKubernetesObjects)
r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Delete("/kubernetesobjects", f.deleteAdminKubernetesObjects)

r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/approvecsr", f.postAdminOpenShiftClusterApproveCSR)

// Pod logs
r.Get("/kubernetespodlogs", f.getAdminKubernetesPodLogs)
Expand All @@ -320,23 +317,23 @@ func (f *frontend) chiAuthenticatedRoutes(router chi.Router) {

r.Get("/clusterdeployment", f.getAdminHiveClusterDeployment)

r.Post("/redeployvm", f.postAdminOpenShiftClusterRedeployVM)
r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/redeployvm", f.postAdminOpenShiftClusterRedeployVM)

r.Post("/stopvm", f.postAdminOpenShiftClusterStopVM)
r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/stopvm", f.postAdminOpenShiftClusterStopVM)

r.Post("/startvm", f.postAdminOpenShiftClusterStartVM)
r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/startvm", f.postAdminOpenShiftClusterStartVM)

r.Post("/upgrade", f.postAdminOpenShiftUpgrade)
r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/upgrade", f.postAdminOpenShiftUpgrade)

r.Get("/skus", f.getAdminOpenShiftClusterVMResizeOptions)

r.Post("/resize", f.postAdminOpenShiftClusterVMResize)
r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/resize", f.postAdminOpenShiftClusterVMResize)
niontive marked this conversation as resolved.
Show resolved Hide resolved

r.Post("/reconcilefailednic", f.postAdminReconcileFailedNIC)
r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/reconcilefailednic", f.postAdminReconcileFailedNIC)

r.Post("/cordonnode", f.postAdminOpenShiftClusterCordonNode)
r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/cordonnode", f.postAdminOpenShiftClusterCordonNode)

r.Post("/drainnode", f.postAdminOpenShiftClusterDrainNode)
r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/drainnode", f.postAdminOpenShiftClusterDrainNode)
})
})

Expand Down
51 changes: 51 additions & 0 deletions pkg/frontend/middleware/maintenance.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package middleware

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"net/http"
"path/filepath"
"strings"
"time"

"github.com/Azure/ARO-RP/pkg/metrics"
)

type MaintenanceMiddleware struct {
metrics.Emitter
}

// Emit metric for unplanned maintenance
func (mm MaintenanceMiddleware) UnplannedMaintenanceSignal(h http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithCancel(r.Context())
defer cancel()

resourceID := strings.TrimPrefix(filepath.Dir(r.URL.Path), "/admin")

// Use a do-while loop to ensure we emit the metric at least once
mm.emitMaintenanceSignal("unplanned", resourceID)
niontive marked this conversation as resolved.
Show resolved Hide resolved
go func(ctx context.Context, resourceID string) {
for {
select {
case <-ctx.Done():
return
default:
time.Sleep(1 * time.Minute)
niontive marked this conversation as resolved.
Show resolved Hide resolved
mm.emitMaintenanceSignal("unplanned", resourceID)
}
}
}(ctx, resourceID)

h.ServeHTTP(w, r)
})
}

func (mm MaintenanceMiddleware) emitMaintenanceSignal(maintenanceType, resourceID string) {
maintenanceMetric := "frontend.maintenance." + maintenanceType
mm.EmitGauge(maintenanceMetric, 1, map[string]string{
"resourceID": resourceID,
niontive marked this conversation as resolved.
Show resolved Hide resolved
})
}
53 changes: 53 additions & 0 deletions pkg/frontend/middleware/maintenance_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package middleware

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"net/http"
"testing"

"github.com/golang/mock/gomock"

"github.com/Azure/ARO-RP/pkg/portal/util/responsewriter"
mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics"
)

func TestUnplannedMaintenanceSignal(t *testing.T) {
for _, tt := range []struct {
name string
resourceID string
adminOperation string
}{
{
name: "emit unplanned maintenance signal",
resourceID: "/subscriptions/123/resourcegroups/456/providers/Microsoft.RedHatOpenShift/openShiftClusters/789",
adminOperation: "/startvm",
},
} {
t.Run(tt.name, func(t *testing.T) {
controller := gomock.NewController(t)
defer controller.Finish()

m := mock_metrics.NewMockEmitter(controller)

maintenanceMiddleware := MaintenanceMiddleware{m}

m.EXPECT().EmitGauge("frontend.maintenance.unplanned", int64(1), map[string]string{
"resourceID": tt.resourceID,
niontive marked this conversation as resolved.
Show resolved Hide resolved
})

handlerFunc := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {})
handler := maintenanceMiddleware.UnplannedMaintenanceSignal(handlerFunc)

path := "/admin" + tt.resourceID + tt.adminOperation
r, err := http.NewRequest(http.MethodPost, path, nil)
if err != nil {
t.Fatal(err)
}
w := responsewriter.New(r)

handler.ServeHTTP(w, r)
})
}
}