diff --git a/pkg/monitor/cluster/cluster.go b/pkg/monitor/cluster/cluster.go index 22ee2eef06e..3b1684b3370 100644 --- a/pkg/monitor/cluster/cluster.go +++ b/pkg/monitor/cluster/cluster.go @@ -29,6 +29,7 @@ import ( arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1" aroclient "github.com/Azure/ARO-RP/pkg/operator/clientset/versioned" "github.com/Azure/ARO-RP/pkg/util/steps" + "github.com/Azure/ARO-RP/pkg/validate/dynamic" ) var _ monitoring.Monitor = (*Monitor)(nil) @@ -60,10 +61,11 @@ type Monitor struct { arodl *appsv1.DeploymentList } - wg *sync.WaitGroup + wg *sync.WaitGroup + validator dynamic.Dynamic } -func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftCluster, m metrics.Emitter, hiveRestConfig *rest.Config, hourlyRun bool, wg *sync.WaitGroup) (*Monitor, error) { +func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftCluster, m metrics.Emitter, hiveRestConfig *rest.Config, hourlyRun bool, wg *sync.WaitGroup, validator dynamic.Dynamic) (*Monitor, error) { r, err := azure.ParseResourceID(oc.ID) if err != nil { return nil, err @@ -136,6 +138,7 @@ func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftClu ocpclientset: ocpclientset, hiveclientset: hiveclientset, wg: wg, + validator: validator, }, nil } @@ -210,6 +213,7 @@ func (mon *Monitor) Monitor(ctx context.Context) (errs []error) { mon.emitHiveRegistrationStatus, mon.emitOperatorFlagsAndSupportBanner, mon.emitPucmState, + mon.emitValidatePermissions, mon.emitCertificateExpirationStatuses, mon.emitEtcdCertificateExpiry, mon.emitPrometheusAlerts, // at the end for now because it's the slowest/least reliable diff --git a/pkg/monitor/cluster/validatepermissions.go b/pkg/monitor/cluster/validatepermissions.go new file mode 100644 index 00000000000..eca02d5a778 --- /dev/null +++ b/pkg/monitor/cluster/validatepermissions.go @@ -0,0 +1,46 @@ +package cluster + +// Copyright (c) Microsoft Corporation. +// Licensed under the Apache License 2.0. + +import ( + "context" + + "github.com/Azure/ARO-RP/pkg/validate/dynamic" +) + +/*************************************************************** + Monitor the Cluster Service Prinicpal required permissions: + - Network Contributor role on vnet +****************************************************************/ + +func (mon *Monitor) emitValidatePermissions(ctx context.Context) error { + subnets := []dynamic.Subnet{{ + ID: mon.oc.Properties.MasterProfile.SubnetID, + Path: "properties.masterProfile.subnetId", + }} + + err := mon.validator.ValidateVnet(ctx, mon.oc.Location, subnets, mon.oc.Properties.NetworkProfile.PodCIDR, + mon.oc.Properties.NetworkProfile.ServiceCIDR) + + if err != nil { + mon.emitGauge("cluster.validate.permissions", 1, map[string]string{ + "ValidateVnetPermissions": err.Error(), + }) + } + + err = mon.validator.ValidateSubnets(ctx, mon.oc, subnets) + if err != nil { + mon.emitGauge("cluster.validate.permissions", 1, map[string]string{ + "ValidateSubnet": err.Error(), + }) + } + + err = mon.validator.ValidateDiskEncryptionSets(ctx, mon.oc) + if err != nil { + mon.emitGauge("cluster.validate.permissions", 1, map[string]string{ + "ValidateDiskEncryptionSet": err.Error(), + }) + } + return nil +} diff --git a/pkg/monitor/cluster/validatepermissions_test.go b/pkg/monitor/cluster/validatepermissions_test.go new file mode 100644 index 00000000000..7d7ebf3058e --- /dev/null +++ b/pkg/monitor/cluster/validatepermissions_test.go @@ -0,0 +1,194 @@ +package cluster + +// Copyright (c) Microsoft Corporation. +// Licensed under the Apache License 2.0. + +import ( + "context" + "strconv" + "testing" + + "github.com/golang/mock/gomock" + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + + mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics" +) + +func TestEmitValidatePermissions(t *testing.T) { + ctx := context.Background() + + cli := fake.NewSimpleClientset( + &appsv1.StatefulSet{ // metrics expected + ObjectMeta: metav1.ObjectMeta{ + Name: "name1", + Namespace: "openshift", + }, + Status: appsv1.StatefulSetStatus{ + Replicas: 2, + ReadyReplicas: 1, + }, + }, &appsv1.StatefulSet{ // no metric expected + ObjectMeta: metav1.ObjectMeta{ + Name: "name2", + Namespace: "openshift", + }, + Status: appsv1.StatefulSetStatus{ + Replicas: 2, + ReadyReplicas: 2, + }, + }, &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ // no metric expected -customer + Name: "name2", + Namespace: "customer", + }, + Status: appsv1.StatefulSetStatus{ + Replicas: 2, + ReadyReplicas: 1, + }, + }, + ) + + controller := gomock.NewController(t) + defer controller.Finish() + + m := mock_metrics.NewMockEmitter(controller) + + mon := &Monitor{ + cli: cli, + m: m, + } + + m.EXPECT().EmitGauge("statefulset.count", int64(3), map[string]string{}) + + m.EXPECT().EmitGauge("statefulset.statuses", int64(1), map[string]string{ + "name": "name1", + "namespace": "openshift", + "replicas": strconv.Itoa(2), + "readyReplicas": strconv.Itoa(1), + }) + + err := mon.emitValidatePermissions(ctx) + if err != nil { + t.Fatal(err) + } +} + +/* +func TestValidateVnetPermissions(t *testing.T) { + ctx := context.Background() + + resourceType := "virtualNetworks" + resourceProvider := "Microsoft.Network" + + for _, tt := range []struct { + name string + mocks func(*mock_authorization.MockPermissionsClient, context.CancelFunc) + wantErr string + }{ + { + name: "pass", + mocks: func(permissionsClient *mock_authorization.MockPermissionsClient, cancel context.CancelFunc) { + permissionsClient.EXPECT(). + ListForResource(gomock.Any(), resourceGroupName, resourceProvider, "", resourceType, vnetName). + Return([]mgmtauthorization.Permission{ + { + Actions: &[]string{ + "Microsoft.Network/virtualNetworks/join/action", + "Microsoft.Network/virtualNetworks/read", + "Microsoft.Network/virtualNetworks/write", + "Microsoft.Network/virtualNetworks/subnets/join/action", + "Microsoft.Network/virtualNetworks/subnets/read", + "Microsoft.Network/virtualNetworks/subnets/write", + }, + NotActions: &[]string{}, + }, + }, nil) + }, + }, + { + name: "fail: missing permissions", + mocks: func(permissionsClient *mock_authorization.MockPermissionsClient, cancel context.CancelFunc) { + permissionsClient.EXPECT(). + ListForResource(gomock.Any(), resourceGroupName, resourceProvider, "", resourceType, vnetName). + Do(func(arg0, arg1, arg2, arg3, arg4, arg5 interface{}) { + cancel() + }). + Return( + []mgmtauthorization.Permission{ + { + Actions: &[]string{}, + NotActions: &[]string{}, + }, + }, + nil, + ) + }, + wantErr: "400: InvalidServicePrincipalPermissions: : The cluster service principal (Application ID: fff51942-b1f9-4119-9453-aaa922259eb7) does not have Network Contributor role on vnet '" + vnetID + "'.", + }, + { + name: "fail: not found", + mocks: func(permissionsClient *mock_authorization.MockPermissionsClient, cancel context.CancelFunc) { + permissionsClient.EXPECT(). + ListForResource(gomock.Any(), resourceGroupName, resourceProvider, "", resourceType, vnetName). + Do(func(arg0, arg1, arg2, arg3, arg4, arg5 interface{}) { + cancel() + }). + Return( + nil, + autorest.DetailedError{ + StatusCode: http.StatusNotFound, + }, + ) + }, + wantErr: "400: InvalidLinkedVNet: : The vnet '" + vnetID + "' could not be found.", + }, + { + name: "fail: fp/sp has no permission on the target vnet (forbidden error)", + mocks: func(permissionsClient *mock_authorization.MockPermissionsClient, cancel context.CancelFunc) { + permissionsClient.EXPECT(). + ListForResource(gomock.Any(), resourceGroupName, resourceProvider, "", resourceType, vnetName). + Do(func(arg0, arg1, arg2, arg3, arg4, arg5 interface{}) { + cancel() + }). + Return( + nil, + autorest.DetailedError{ + StatusCode: http.StatusForbidden, + Message: "some forbidden error on the resource.", + }, + ) + }, + + wantErr: "400: InvalidServicePrincipalPermissions: : The cluster service principal (Application ID: fff51942-b1f9-4119-9453-aaa922259eb7) does not have Network Contributor role on vnet '" + vnetID + "'.\nOriginal error message: some forbidden error on the resource.", + }, + } { + t.Run(tt.name, func(t *testing.T) { + controller := gomock.NewController(t) + defer controller.Finish() + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + permissionsClient := mock_authorization.NewMockPermissionsClient(controller) + tt.mocks(permissionsClient, cancel) + + dv := &dynamic{ + appID: "fff51942-b1f9-4119-9453-aaa922259eb7", + authorizerType: AuthorizerClusterServicePrincipal, + log: logrus.NewEntry(logrus.StandardLogger()), + permissions: permissionsClient, + } + + vnetr, err := azure.ParseResourceID(vnetID) + if err != nil { + t.Fatal(err) + } + + err = dv.validateVnetPermissions(ctx, vnetr) + utilerror.AssertErrorMessage(t, err, tt.wantErr) + }) + } +} +*/ diff --git a/pkg/monitor/worker.go b/pkg/monitor/worker.go index 8f0ce09ca33..1befa1c0e57 100644 --- a/pkg/monitor/worker.go +++ b/pkg/monitor/worker.go @@ -12,20 +12,25 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2" "github.com/Azure/go-autorest/autorest/azure" + "github.com/jongio/azidext/go/azidext" "github.com/sirupsen/logrus" "k8s.io/client-go/rest" "github.com/Azure/ARO-RP/pkg/api" + "github.com/Azure/ARO-RP/pkg/env" "github.com/Azure/ARO-RP/pkg/metrics" "github.com/Azure/ARO-RP/pkg/monitor/azure/nsg" "github.com/Azure/ARO-RP/pkg/monitor/cluster" "github.com/Azure/ARO-RP/pkg/monitor/dimension" "github.com/Azure/ARO-RP/pkg/monitor/monitoring" + "github.com/Azure/ARO-RP/pkg/util/azureclient/authz/remotepdp" utillog "github.com/Azure/ARO-RP/pkg/util/log" "github.com/Azure/ARO-RP/pkg/util/recover" "github.com/Azure/ARO-RP/pkg/util/restconfig" + "github.com/Azure/ARO-RP/pkg/validate/dynamic" ) // This function will continue to run until such time as it has a config to add to the global Hive shard map @@ -289,7 +294,48 @@ func (mon *monitor) workOne(ctx context.Context, log *logrus.Entry, doc *api.Ope monitors = append(monitors, nsgMon) } - c, err := cluster.NewMonitor(log, restConfig, doc.OpenShiftCluster, mon.clusterm, hiveRestConfig, hourlyRun, &wg) + var spClientCred azcore.TokenCredential + var pdpClient remotepdp.RemotePDPClient + spp := doc.OpenShiftCluster.Properties.ServicePrincipalProfile + _env, err := env.NewEnv(ctx, log) + if err != nil { + log.Error(err) + return + } + + r, err := azure.ParseResourceID(doc.OpenShiftCluster.ID) + if err != nil { + log.Error(err) + return + } + + sub_ := mon.subs[r.SubscriptionID] + tenantID := sub_.Subscription.Properties.TenantID + options := _env.Environment().ClientSecretCredentialOptions() + spTokenCredential, err := azidentity.NewClientSecretCredential( + tenantID, spp.ClientID, string(spp.ClientSecret), options) + + if err != nil { + log.Error(err) + return + } + + scopes := []string{_env.Environment().ResourceManagerScope} + spAuthorizer := azidext.NewTokenCredentialAdapter(spTokenCredential, scopes) + + spDynamic := dynamic.NewValidator( + log, + _env, + _env.Environment(), + sub_.ID, + spAuthorizer, + spp.ClientID, + dynamic.AuthorizerClusterServicePrincipal, + spClientCred, + pdpClient, + ) + + c, err := cluster.NewMonitor(log, restConfig, doc.OpenShiftCluster, mon.clusterm, hiveRestConfig, hourlyRun, &wg, spDynamic) if err != nil { log.Error(err) mon.m.EmitGauge("monitor.cluster.failedworker", 1, map[string]string{ diff --git a/test/e2e/monitor.go b/test/e2e/monitor.go index d54507d27a6..00cc9f035af 100644 --- a/test/e2e/monitor.go +++ b/test/e2e/monitor.go @@ -13,6 +13,7 @@ import ( "github.com/Azure/ARO-RP/pkg/api" "github.com/Azure/ARO-RP/pkg/metrics/noop" "github.com/Azure/ARO-RP/pkg/monitor/cluster" + "github.com/Azure/ARO-RP/pkg/validate/dynamic" ) var _ = Describe("Monitor", func() { @@ -21,9 +22,10 @@ var _ = Describe("Monitor", func() { By("creating a new monitor instance for the test cluster") var wg sync.WaitGroup wg.Add(1) + var validator dynamic.Dynamic mon, err := cluster.NewMonitor(log, clients.RestConfig, &api.OpenShiftCluster{ ID: resourceIDFromEnv(), - }, &noop.Noop{}, nil, true, &wg) + }, &noop.Noop{}, nil, true, &wg, validator) Expect(err).NotTo(HaveOccurred()) By("running the monitor once")