Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NSG monitoring for preconfiguredNSG #3156

Merged
merged 10 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/aro/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
)

func monitor(ctx context.Context, log *logrus.Entry) error {
_env, err := env.NewCore(ctx, log)
_env, err := env.NewEnv(ctx, log)
if err != nil {
return err
}
Expand Down Expand Up @@ -126,7 +126,7 @@ func monitor(ctx context.Context, log *logrus.Entry) error {
return err
}

mon := pkgmonitor.NewMonitor(log.WithField("component", "monitor"), dialer, dbMonitors, dbOpenShiftClusters, dbSubscriptions, m, clusterm, liveConfig)
mon := pkgmonitor.NewMonitor(log.WithField("component", "monitor"), dialer, dbMonitors, dbOpenShiftClusters, dbSubscriptions, m, clusterm, liveConfig, _env)

return mon.Run(ctx)
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/Azure/azure-sdk-for-go v63.1.0+incompatible
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.1
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.1
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2 v2.2.1
github.com/Azure/go-autorest/autorest v0.11.29
github.com/Azure/go-autorest/autorest/adal v0.9.23
github.com/Azure/go-autorest/autorest/date v0.3.0
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.1 h1:LNHhpdK7hzUcx/k1LIcuh
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.1/go.mod h1:uE9zaUfEQT/nbQjVi2IblCG9iaLtZsuYZ8ne+PuQ02M=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 h1:sXr+ck84g/ZlZUOZiNELInmMgOsuGwdjjVkEIde0OtY=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0/go.mod h1:okt5dMMTOFjX/aovMlrjvvXoPMBVSPzk9185BT0+eZM=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal v1.1.2 h1:mLY+pNLjCUeKhgnAJWAKhEUQM+RJQo2H1fuGSw1Ky1E=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2 v2.2.1 h1:bWh0Z2rOEDfB/ywv/l0iHN1JgyazE6kW/aIA89+CEK0=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2 v2.2.1/go.mod h1:Bzf34hhAE9NSxailk8xVeLEZbUjOXcC+GnU1mMKdhLw=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.0.0 h1:ECsQtyERDVz3NP3kvDOTLvbQhqWp/x9EsGKtb4ogUr8=
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8=
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs=
Expand Down
2 changes: 1 addition & 1 deletion pkg/deploy/assets/rp-production.json

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions pkg/deploy/generator/scripts/rpVMSS.sh
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,18 @@ StartLimitInterval=0
WantedBy=multi-user.target
EOF

# DOMAIN_NAME, CLUSTER_MDSD_ACCOUNT, CLUSTER_MDSD_CONFIG_VERSION, GATEWAY_DOMAINS, GATEWAY_RESOURCEGROUP, MDSD_ENVIRONMENT CLUSTER_MDSD_NAMESPACE
# are not used, but can't easily be refactored out. Should be revisited in the future.
echo "configuring aro-monitor service"
cat >/etc/sysconfig/aro-monitor <<EOF
AZURE_FP_CLIENT_ID='$FPCLIENTID'
DOMAIN_NAME='$LOCATION.$CLUSTERPARENTDOMAINNAME'
CLUSTER_MDSD_ACCOUNT='$CLUSTERMDSDACCOUNT'
CLUSTER_MDSD_CONFIG_VERSION='$CLUSTERMDSDCONFIGVERSION'
GATEWAY_DOMAINS='$GATEWAYDOMAINS'
GATEWAY_RESOURCEGROUP='$GATEWAYRESOURCEGROUPNAME'
MDSD_ENVIRONMENT='$MDSDENVIRONMENT'
CLUSTER_MDSD_NAMESPACE='$CLUSTERMDSDNAMESPACE'
CLUSTER_MDM_ACCOUNT='$CLUSTERMDMACCOUNT'
CLUSTER_MDM_NAMESPACE=BBM
DATABASE_ACCOUNT_NAME='$DATABASEACCOUNTNAME'
Expand All @@ -411,6 +421,14 @@ ExecStart=/usr/bin/docker run \
--name %N \
--rm \
--cap-drop net_raw \
-e AZURE_FP_CLIENT_ID \
-e DOMAIN_NAME \
-e CLUSTER_MDSD_ACCOUNT \
-e CLUSTER_MDSD_CONFIG_VERSION \
-e GATEWAY_DOMAINS \
-e GATEWAY_RESOURCEGROUP \
-e MDSD_ENVIRONMENT \
-e CLUSTER_MDSD_NAMESPACE \
nwnt marked this conversation as resolved.
Show resolved Hide resolved
-e CLUSTER_MDM_ACCOUNT \
-e CLUSTER_MDM_NAMESPACE \
-e DATABASE_ACCOUNT_NAME \
Expand Down
181 changes: 181 additions & 0 deletions pkg/monitor/azure/nsg/nsg.go
nwnt marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
package nsg

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"errors"
"fmt"
"net/http"
"net/netip"
"strings"
"sync"

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2"
"github.com/sirupsen/logrus"

"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/metrics"
"github.com/Azure/ARO-RP/pkg/monitor/dimension"
"github.com/Azure/ARO-RP/pkg/monitor/emitter"
"github.com/Azure/ARO-RP/pkg/monitor/monitoring"
sdknetwork "github.com/Azure/ARO-RP/pkg/util/azureclient/azuresdk/armnetwork"
)

const (
bennerv marked this conversation as resolved.
Show resolved Hide resolved
MetricPreconfiguredNSGEnabled = "monitor.preconfigurednsg.enabled"
MetricFailedNSGMonitorCreation = "monitor.preconfigurednsg.failedmonitorcreation"
MetricInvalidDenyRule = "monitor.preconfigurednsg.invaliddenyrule"
MetricSubnetAccessForbidden = "monitor.preconfigurednsg.subnetaccessforbidden"
MetricSubnetAccessResponseCode = "monitor.preconfigurednsg.subnetaccessresponsecode"
)

var expandNSG = "NetworkSecurityGroup"

var _ monitoring.Monitor = (*NSGMonitor)(nil)

// NSGMonitor is responsible for performing NSG rule validations when preconfiguredNSG is enabled
type NSGMonitor struct {
bennerv marked this conversation as resolved.
Show resolved Hide resolved
log *logrus.Entry
emitter metrics.Emitter
oc *api.OpenShiftCluster

wg *sync.WaitGroup

subnetClient sdknetwork.SubnetsClient
dims map[string]string
}

func NewNSGMonitor(log *logrus.Entry, oc *api.OpenShiftCluster, subscriptionID string, subnetClient sdknetwork.SubnetsClient, emitter metrics.Emitter, wg *sync.WaitGroup) *NSGMonitor {
return &NSGMonitor{
log: log,
emitter: emitter,
oc: oc,

bennerv marked this conversation as resolved.
Show resolved Hide resolved
subnetClient: subnetClient,
wg: wg,

dims: map[string]string{
dimension.ResourceID: oc.ID,
dimension.SubscriptionID: subscriptionID,
dimension.Location: oc.Location,
},
}
}

type subnetNSGConfig struct {
// subnet CIDR range
prefix []netip.Prefix
// The rules from the subnet NSG
nsg *armnetwork.SecurityGroup
}

func (n *NSGMonitor) toSubnetConfig(ctx context.Context, subnetID string) (subnetNSGConfig, error) {
nwnt marked this conversation as resolved.
Show resolved Hide resolved
r, err := arm.ParseResourceID(subnetID)
if err != nil {
return subnetNSGConfig{}, err
}

dims := map[string]string{
dimension.Subnet: r.Name,
dimension.Vnet: r.Parent.Name,
dimension.NSGResourceGroup: r.ResourceGroupName,
}

subnet, err := n.subnetClient.Get(ctx, r.ResourceGroupName, r.Parent.Name, r.Name, &armnetwork.SubnetsClientGetOptions{Expand: &expandNSG})
if err != nil {
var respErr *azcore.ResponseError
if errors.As(err, &respErr); respErr.StatusCode == http.StatusForbidden {
emitter.EmitGauge(n.emitter, MetricSubnetAccessForbidden, int64(1), n.dims, dims)
}
n.log.Errorf("error while getting subnet %s. %s", subnetID, err)
return subnetNSGConfig{}, err
}

var cidrs []string
if subnet.Properties.AddressPrefix != nil {
cidrs = append(cidrs, *subnet.Properties.AddressPrefix)
}
for _, sn := range subnet.Properties.AddressPrefixes {
cidrs = append(cidrs, *sn)
}
prefixes := toPrefixes(n.log, cidrs)
if len(prefixes) == 0 {
return subnetNSGConfig{}, errors.New("no valid subnet ranges found")
}
return subnetNSGConfig{prefixes, subnet.Properties.NetworkSecurityGroup}, nil
}

func (n *NSGMonitor) Monitor(ctx context.Context) []error {
defer n.wg.Done()
masterSubnet, err := n.toSubnetConfig(ctx, n.oc.Properties.MasterProfile.SubnetID)
if err != nil {
// FP has no access to the subnet
return []error{err}
}

// need this to get the right workerProfiles
workerProfiles, _ := api.GetEnrichedWorkerProfiles(n.oc.Properties)
workerSubnets := make([]subnetNSGConfig, 0, len(workerProfiles))
workerPrefixes := make([]netip.Prefix, 0, len(workerProfiles))
for _, wp := range workerProfiles {
// Customer can configure a machineset with an invalid subnet.
// In such case, the subnetID will be empty.
if len(wp.SubnetID) == 0 {
continue
}

s, err := n.toSubnetConfig(ctx, wp.SubnetID)
nwnt marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
// FP has no access to the subnet
return []error{err}
}
workerSubnets = append(workerSubnets, s)
workerPrefixes = append(workerPrefixes, s.prefix...)
}

// to make sure each NSG is processed only once
nsgSet := map[string]*armnetwork.SecurityGroup{}
if masterSubnet.nsg != nil && masterSubnet.nsg.ID != nil {
nsgSet[*masterSubnet.nsg.ID] = masterSubnet.nsg
}
for _, w := range workerSubnets {
if w.nsg != nil && w.nsg.ID != nil {
nsgSet[*w.nsg.ID] = w.nsg
}
}

for nsgID, nsg := range nsgSet {
nwnt marked this conversation as resolved.
Show resolved Hide resolved
for _, rule := range nsg.Properties.SecurityRules {
if rule.Properties.Access != nil && *rule.Properties.Access == armnetwork.SecurityRuleAccessAllow {
// Allow rule - skip.
continue
}
// Deny rule
nsgResource, err := arm.ParseResourceID(nsgID)
if err != nil {
n.log.Errorf("Unable to parse NSG resource ID: %s. %s", nsgID, err)
continue
}

r := newRuleChecker(n.log, masterSubnet.prefix, workerPrefixes, rule)

if r.isInvalidDenyRule() {
dims := map[string]string{
nwnt marked this conversation as resolved.
Show resolved Hide resolved
dimension.NSGResourceGroup: nsgResource.ResourceGroupName,
dimension.NSG: nsgResource.Name,
dimension.NSGRuleName: *rule.Name,
dimension.NSGRuleSources: strings.Join(r.sourceStrings, ","),
dimension.NSGRuleDestinations: strings.Join(r.destinationStrings, ","),
dimension.NSGRuleDirection: string(*rule.Properties.Direction),
dimension.NSGRulePriority: fmt.Sprint(*rule.Properties.Priority),
}
emitter.EmitGauge(n.emitter, MetricInvalidDenyRule, int64(1), n.dims, dims)
}
}
}
return []error{}
}
Loading