Skip to content

Commit

Permalink
test: garbage collection controller removes orphaned nics (#686)
Browse files Browse the repository at this point in the history
* test: garbage collection controller removes orphaned nics

* refactor: breaking logic into modular steps to reduce cyclomatic complexity

* ci: golang-ci lint

* fix: going back to utilization as the default

* refactor: moving env vars to azureEnv struct

* refactor: using azure clients defined inside of env

* refactor: using more generic list approach that can be generalized and used elsewhere

* refactor: using AZURE_RESOURCE_GROUP_MC as a name rather than AZURE_RESOURCE_GROUP

* ci: lint

* test: checkin azure garbage collection into our e2e matrix

* fix: propagating values to makefile

fix: propagating values to makefile

* fix: constructing mc rg

* fix: use CLUSTER_NAME instead of AZURE_CLUSTER_NAME

* test: refactoring to use environment

* fix: propagating location

* refactor: have acr e2e consume from environment vars stored in azureEnv

* fix: removing import

* refactor: renaming CLUSTER_NAME to match all other variables

* refactor: moving env vars outside of az-e2etest since e2etest now holds that state

* ci: make presubmit

* refactor: removing readme

* refactor: using lo.Must() + os.LookupEnv

* revert: go.mod go version change

* refactor: moving azuregc suite to the nodeclaim suite

* Update Makefile-az.mk

* refactor: using env block instead

* fix: comment

---------

Co-authored-by: Alex Leites <[email protected]>
  • Loading branch information
Bryce-Soghigian and tallaxes authored Feb 21, 2025
1 parent b199e51 commit 11f23ea
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 13 deletions.
12 changes: 10 additions & 2 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,17 @@ jobs:
location: ${{ inputs.location }}
- name: run the ${{ inputs.suite }} test suite
if: inputs.suite != 'Nonbehavioral'
env:
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
AZURE_RESOURCE_GROUP: ${{ env.RG_NAME }}
AZURE_LOCATION: ${{ inputs.location }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.E2E_SUBSCRIPTION_ID }}
AZURE_ACR_NAME: ${{ env.ACR_NAME }}
TEST_SUITE: ${{ inputs.suite }}
GIT_REF: ${{ github.sha }}
run: |
AZURE_CLUSTER_NAME=${{ env.CLUSTER_NAME }} AZURE_RESOURCE_GROUP=${{ env.RG_NAME }} make az-creds
CLUSTER_NAME=${{ env.CLUSTER_NAME }} AZURE_ACR_NAME=${{ env.ACR_NAME}} TEST_SUITE="${{ inputs.suite }}" GIT_REF="$(git rev-parse HEAD)" make e2etests
make az-creds
make e2etests
- name: dump logs on failure
uses: ./.github/actions/e2e/dump-logs
if: failure() || cancelled()
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ e2etests: ## Run the e2e suite against your local cluster
# -count 1: prevents caching
# -timeout: If a test binary runs longer than TEST_TIMEOUT, panic
# -v: verbose output
cd test && CLUSTER_NAME=${CLUSTER_NAME} AZURE_ACR_NAME=${AZURE_ACR_NAME} go test \
cd test && AZURE_CLUSTER_NAME=${AZURE_CLUSTER_NAME} AZURE_ACR_NAME=${AZURE_ACR_NAME} AZURE_RESOURCE_GROUP=${AZURE_RESOURCE_GROUP} AZURE_SUBSCRIPTION_ID=${AZURE_SUBSCRIPTION_ID} AZURE_LOCATION=${AZURE_LOCATION} go test \
-p 1 \
-count 1 \
-timeout ${TEST_TIMEOUT} \
Expand Down
37 changes: 31 additions & 6 deletions test/pkg/environment/azure/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@ limitations under the License.
package azure

import (
"fmt"
"os"
"testing"

"github.com/samber/lo"
v1 "k8s.io/api/core/v1"
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"

"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork"
"github.com/Azure/karpenter-provider-azure/pkg/apis/v1alpha2"
"github.com/Azure/karpenter-provider-azure/pkg/test"
"github.com/Azure/karpenter-provider-azure/test/pkg/environment/common"
Expand All @@ -40,16 +44,37 @@ const (

type Environment struct {
*common.Environment
Region string

NodeResourceGroup string
Region string
SubscriptionID string
VNETResourceGroup string
ACRName string
ClusterName string
ClusterResourceGroup string

VNETClient *armnetwork.VirtualNetworksClient
InterfacesClient *armnetwork.InterfacesClient
}

func NewEnvironment(t *testing.T) *Environment {
env := common.NewEnvironment(t)

return &Environment{
Region: "westus2",
Environment: env,
azureEnv := &Environment{
Environment: common.NewEnvironment(t),
SubscriptionID: lo.Must(os.LookupEnv("AZURE_SUBSCRIPTION_ID")),
ClusterName: lo.Must(os.LookupEnv("AZURE_CLUSTER_NAME")),
ClusterResourceGroup: lo.Must(os.LookupEnv("AZURE_RESOURCE_GROUP")),
ACRName: lo.Must(os.LookupEnv("ACR_NAME")),
Region: lo.Ternary(os.Getenv("AZURE_LOCATION") == "", "westus2", os.Getenv("AZURE_LOCATION")),
}

defaultNodeRG := fmt.Sprintf("MC_%s_%s_%s", azureEnv.ClusterResourceGroup, azureEnv.ClusterName, azureEnv.Region)
azureEnv.VNETResourceGroup = lo.Ternary(os.Getenv("VNET_RESOURCE_GROUP") == "", defaultNodeRG, os.Getenv("VNET_RESOURCE_GROUP"))
azureEnv.NodeResourceGroup = defaultNodeRG

cred := lo.Must(azidentity.NewDefaultAzureCredential(nil))
azureEnv.VNETClient = lo.Must(armnetwork.NewVirtualNetworksClient(azureEnv.SubscriptionID, cred, nil))
azureEnv.InterfacesClient = lo.Must(armnetwork.NewInterfacesClient(azureEnv.SubscriptionID, cred, nil))
return azureEnv
}

func (env *Environment) DefaultAKSNodeClass() *v1alpha2.AKSNodeClass {
Expand Down
84 changes: 84 additions & 0 deletions test/pkg/environment/azure/expectations.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
Portions Copyright (c) Microsoft Corporation.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package azure

import (
"context"
"fmt"
"strings"
"time"

"github.com/samber/lo"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"

"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork"
)

func (env *Environment) EventuallyExpectKarpenterNicsToBeDeleted() {
GinkgoHelper()
Eventually(func() bool {
pager := env.InterfacesClient.NewListPager(env.NodeResourceGroup, nil)
for pager.More() {
resp, err := pager.NextPage(env.Context)
if err != nil {
return false
}

for _, nic := range resp.Value {
if nic.Tags != nil {
if _, exists := nic.Tags[strings.ReplaceAll(karpv1.NodePoolLabelKey, "/", "_")]; exists {
return false
}
}
}
}
return true
}).WithTimeout(10*time.Minute).WithPolling(10*time.Second).Should(BeTrue(), "Expected all orphan NICs to be deleted")
}

func (env *Environment) ExpectCreatedInterface(networkInterface armnetwork.Interface) {
GinkgoHelper()
poller, err := env.InterfacesClient.BeginCreateOrUpdate(env.Context, env.NodeResourceGroup, lo.FromPtr(networkInterface.Name), networkInterface, nil)
Expect(err).ToNot(HaveOccurred())
_, err = poller.PollUntilDone(env.Context, nil)
Expect(err).ToNot(HaveOccurred())
}

func (env *Environment) GetClusterSubnet() *armnetwork.Subnet {
GinkgoHelper()
vnet, err := firstVNETInRG(env.Context, env.VNETClient, env.VNETResourceGroup)
Expect(err).ToNot(HaveOccurred())
return vnet.Properties.Subnets[0]
}

// This returns the first vnet we find in the resource group, works for managed vnet, it hasn't been tested on custom vnet.
func firstVNETInRG(ctx context.Context, client *armnetwork.VirtualNetworksClient, vnetRG string) (*armnetwork.VirtualNetwork, error) {
pager := client.NewListPager(vnetRG, nil)
for pager.More() {
resp, err := pager.NextPage(ctx)
if err != nil {
return nil, fmt.Errorf("failed to list virtual networks: %w", err)
}
if len(resp.VirtualNetworkListResult.Value) > 0 {
return resp.VirtualNetworkListResult.Value[0], nil
}
}
return nil, fmt.Errorf("no virtual networks found in resource group: %s", vnetRG)
}
5 changes: 1 addition & 4 deletions test/suites/acr/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package acr

import (
"fmt"
"os"
"testing"
"time"

Expand All @@ -43,9 +42,7 @@ func TestAcr(t *testing.T) {
RegisterFailHandler(Fail)
BeforeSuite(func() {
env = azure.NewEnvironment(t)
acrName := os.Getenv("AZURE_ACR_NAME")
Expect(acrName).NotTo(BeEmpty(), "AZURE_ACR_NAME must be set for the acr test suite")
pauseImage = fmt.Sprintf("%s.azurecr.io/pause:3.6", acrName)
pauseImage = fmt.Sprintf("%s.azurecr.io/pause:3.6", env.ACRName)
})
RunSpecs(t, "Acr")
}
Expand Down
48 changes: 48 additions & 0 deletions test/suites/nodeclaim/azuregarbagecollection_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
Portions Copyright (c) Microsoft Corporation.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package nodeclaim_test

import (
. "github.com/onsi/ginkgo/v2"
"github.com/samber/lo"

"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork"
azkarptest "github.com/Azure/karpenter-provider-azure/pkg/test"
)

var _ = Describe("gc", func() {
It("should garbage collect network interfaces created by karpenter", func() {
env.ExpectCreatedInterface(armnetwork.Interface{
Name: lo.ToPtr("orphan-nic"),
Location: lo.ToPtr(env.Region),
Tags: azkarptest.ManagedTags("default"),
Properties: &armnetwork.InterfacePropertiesFormat{
IPConfigurations: []*armnetwork.InterfaceIPConfiguration{
{
Name: lo.ToPtr("ip-config"),
Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{
Primary: lo.ToPtr(true),
Subnet: env.GetClusterSubnet(),
PrivateIPAllocationMethod: lo.ToPtr(armnetwork.IPAllocationMethodDynamic),
},
},
},
},
})
env.EventuallyExpectKarpenterNicsToBeDeleted()
})
})

0 comments on commit 11f23ea

Please sign in to comment.