From 35ba89efbf7fbedb291b6622bec94eef43b67243 Mon Sep 17 00:00:00 2001 From: karampok Date: Wed, 4 Dec 2024 17:03:02 +0100 Subject: [PATCH] Add zebra -K argument and extend e2e testing To test that during the graceful restart e2e test we must look for the routes on the node because we can ask frr.vtysh during the restart. Signed-off-by: karampok --- charts/frr-k8s/templates/controller.yaml | 2 +- config/all-in-one/frr-k8s-prometheus.yaml | 2 +- config/all-in-one/frr-k8s.yaml | 2 +- config/frr-k8s/frr-cm.yaml | 2 +- e2etests/go.work.sum | 3 +- e2etests/pkg/config/from_containers.go | 11 ++ e2etests/tests/graceful_restart.go | 137 +++++++++++++++++++++- 7 files changed, 148 insertions(+), 11 deletions(-) diff --git a/charts/frr-k8s/templates/controller.yaml b/charts/frr-k8s/templates/controller.yaml index 1dedcf2e..96f44c85 100644 --- a/charts/frr-k8s/templates/controller.yaml +++ b/charts/frr-k8s/templates/controller.yaml @@ -49,7 +49,7 @@ data: # Check /etc/pam.d/frr if you intend to use "vtysh"! # vtysh_enable=yes - zebra_options=" -A 127.0.0.1 -s 90000000" + zebra_options=" -A 127.0.0.1 -K 120 -s 90000000" bgpd_options=" -A 127.0.0.1 {{ if not .Values.frrk8s.frr.acceptIncomingBGPConnections }} -p 0 {{- end }}" ospfd_options=" -A 127.0.0.1" ospf6d_options=" -A ::1" diff --git a/config/all-in-one/frr-k8s-prometheus.yaml b/config/all-in-one/frr-k8s-prometheus.yaml index 0216b3b8..be853022 100644 --- a/config/all-in-one/frr-k8s-prometheus.yaml +++ b/config/all-in-one/frr-k8s-prometheus.yaml @@ -835,7 +835,7 @@ data: # Check /etc/pam.d/frr if you intend to use "vtysh"! # vtysh_enable=yes - zebra_options=" -A 127.0.0.1 -s 90000000" + zebra_options=" -A 127.0.0.1 -K 120 -s 90000000" bgpd_options=" -A 127.0.0.1 -p 0" ospfd_options=" -A 127.0.0.1" ospf6d_options=" -A ::1" diff --git a/config/all-in-one/frr-k8s.yaml b/config/all-in-one/frr-k8s.yaml index a8ee5552..5fa45cb8 100644 --- a/config/all-in-one/frr-k8s.yaml +++ b/config/all-in-one/frr-k8s.yaml @@ -804,7 +804,7 @@ data: # Check /etc/pam.d/frr if you intend to use "vtysh"! # vtysh_enable=yes - zebra_options=" -A 127.0.0.1 -s 90000000" + zebra_options=" -A 127.0.0.1 -K 120 -s 90000000" bgpd_options=" -A 127.0.0.1 -p 0" ospfd_options=" -A 127.0.0.1" ospf6d_options=" -A ::1" diff --git a/config/frr-k8s/frr-cm.yaml b/config/frr-k8s/frr-cm.yaml index 5cf57aa9..ef176f72 100644 --- a/config/frr-k8s/frr-cm.yaml +++ b/config/frr-k8s/frr-cm.yaml @@ -45,7 +45,7 @@ data: # Check /etc/pam.d/frr if you intend to use "vtysh"! # vtysh_enable=yes - zebra_options=" -A 127.0.0.1 -s 90000000" + zebra_options=" -A 127.0.0.1 -K 120 -s 90000000" bgpd_options=" -A 127.0.0.1 -p 0" ospfd_options=" -A 127.0.0.1" ospf6d_options=" -A ::1" diff --git a/e2etests/go.work.sum b/e2etests/go.work.sum index 48b1c9bb..cf4af584 100644 --- a/e2etests/go.work.sum +++ b/e2etests/go.work.sum @@ -818,6 +818,7 @@ github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0Gq github.com/moby/spdystream v0.4.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9ObI= github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= +github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= @@ -1517,8 +1518,6 @@ k8s.io/code-generator v0.29.3/go.mod h1:x47ofBhN4gxYFcxeKA1PYXeaPreAGaDN85Y/lNUs k8s.io/code-generator v0.30.2/go.mod h1:RQP5L67QxqgkVquk704CyvWFIq0e6RCMmLTXxjE8dVA= k8s.io/code-generator v0.31.0/go.mod h1:84y4w3es8rOJOUUP1rLsIiGlO1JuEaPFXQPA9e/K6U0= k8s.io/component-base v0.29.0/go.mod h1:sADonFTQ9Zc9yFLghpDpmNXEdHyQmFIGbiuZbqAXQ1M= -k8s.io/component-base v0.31.0 h1:/KIzGM5EvPNQcYgwq5NwoQBaOlVFrghoVGr8lG6vNRs= -k8s.io/component-base v0.31.0/go.mod h1:TYVuzI1QmN4L5ItVdMSXKvH7/DtvIuas5/mm8YT3rTo= k8s.io/component-helpers v0.28.3 h1:te9ieTGzcztVktUs92X53P6BamAoP73MK0qQP0WmDqc= k8s.io/component-helpers v0.28.3/go.mod h1:oJR7I9ist5UAQ3y/CTdbw6CXxdMZ1Lw2Ua/EZEwnVLs= k8s.io/controller-manager v0.26.0/go.mod h1:GxUYtQDBE/RHh7AnZSZqwi2xBPIXlOaWsnDLflKGYrE= diff --git a/e2etests/pkg/config/from_containers.go b/e2etests/pkg/config/from_containers.go index 0e9ceaed..68188453 100644 --- a/e2etests/pkg/config/from_containers.go +++ b/e2etests/pkg/config/from_containers.go @@ -103,6 +103,17 @@ func EnableGracefulRestart(pc *PeersConfig) { } } +func EnableReceiveAllowAll(pc *PeersConfig) { + t := pc.PeersV4 + for i := 0; i < len(t); i++ { + t[i].Neigh.ToReceive.Allowed.Mode = frrk8sv1beta1.AllowAll + } + t = pc.PeersV6 + for i := 0; i < len(t); i++ { + t[i].Neigh.ToReceive.Allowed.Mode = frrk8sv1beta1.AllowAll + } +} + func EnableAllowAll(pc *PeersConfig) { t := pc.PeersV4 for i := 0; i < len(t); i++ { diff --git a/e2etests/tests/graceful_restart.go b/e2etests/tests/graceful_restart.go index cfea006e..288bce47 100644 --- a/e2etests/tests/graceful_restart.go +++ b/e2etests/tests/graceful_restart.go @@ -3,7 +3,9 @@ package tests import ( + "encoding/json" "fmt" + "net/netip" "time" "github.com/onsi/ginkgo/v2" @@ -11,6 +13,7 @@ import ( "go.universe.tf/e2etest/pkg/frr/container" frrk8sv1beta1 "github.com/metallb/frr-k8s/api/v1beta1" + "github.com/metallb/frrk8stests/pkg/address" "github.com/metallb/frrk8stests/pkg/config" "github.com/metallb/frrk8stests/pkg/dump" "github.com/metallb/frrk8stests/pkg/infra" @@ -18,6 +21,7 @@ import ( "github.com/metallb/frrk8stests/pkg/k8sclient" "github.com/metallb/frrk8stests/pkg/routes" . "github.com/onsi/gomega" + "go.universe.tf/e2etest/pkg/executor" frrconfig "go.universe.tf/e2etest/pkg/frr/config" "go.universe.tf/e2etest/pkg/ipfamily" corev1 "k8s.io/api/core/v1" @@ -68,19 +72,25 @@ var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart", func dump.K8sInfo(testName, reporter) dump.BGPInfo(testName, infra.FRRContainers, cs) } + + err := cleanup(updater) + Expect(err).NotTo(HaveOccurred(), "cleanup config in API and infra containers") }) ginkgo.Context("When restarting the frrk8s deamon pods", func() { + ginkgo.DescribeTable("routes are maintained", func(ipFam ipfamily.Family, prefix, learnRoute string) { - ginkgo.DescribeTable("external BGP peer maintains routes", func(ipFam ipfamily.Family, prefix string) { frrs := config.ContainersForVRF(infra.FRRContainers, "") for _, c := range frrs { - err := container.PairWithNodes(cs, c, ipFam) + err := container.PairWithNodes(cs, c, ipFam, func(frr *container.FRR) { + frr.NeighborConfig.ToAdvertiseV4 = address.FilterForFamily([]string{learnRoute}, ipFam) + frr.NeighborConfig.ToAdvertiseV6 = address.FilterForFamily([]string{learnRoute}, ipFam) + }) Expect(err).NotTo(HaveOccurred(), "set frr config in infra containers failed") } peersConfig := config.PeersForContainers(frrs, ipFam, - config.EnableAllowAll, config.EnableGracefulRestart) + config.EnableAllowAll, config.EnableReceiveAllowAll, config.EnableGracefulRestart) frrConfigCR := frrk8sv1beta1.FRRConfiguration{ ObjectMeta: metav1.ObjectMeta{ @@ -110,12 +120,16 @@ var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart", func return fmt.Errorf("Neigh %s does not have prefix %s: %w", p.FRR.Name, prefix, err) } } + if err := checkPrefixOnNodes(nodes, learnRoute); err != nil { + return err + } return nil } Eventually(check, time.Minute, time.Second).ShouldNot(HaveOccurred(), "route should exist before we restart frr-k8s") + ginkgo.By("GR started") c := make(chan struct{}) go func() { // go restart frr-k8s while Consistently check that route exists defer ginkgo.GinkgoRecover() @@ -128,8 +142,121 @@ var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart", func Consistently(check, 2*time.Minute, time.Second).ShouldNot(HaveOccurred()) Eventually(c, time.Minute, time.Second).Should(BeClosed(), "restart FRRK8s pods are not yet ready") }, - ginkgo.Entry("IPV4", ipfamily.IPv4, "192.168.2.0/24"), - ginkgo.Entry("IPV6", ipfamily.IPv6, "fc00:f853:ccd:e799::/64"), + ginkgo.Entry("IPV4", ipfamily.IPv4, "192.168.2.0/24", "200.200.200.0/24"), + ginkgo.Entry("IPV6", ipfamily.IPv6, "fc00:f853:ccd:e799::/64", "2001:db8::/64"), ) }) }) + +// checkPrefixOnNodes checks that a prefix has at least on route on the +// each node. There is no check on nexthop because not all routes are install +// on the host (iBGP yes, eBGP no when iBGP exist) and there is a different +// behavior between ipv4,ipv6. +func checkPrefixOnNodes(nodes []corev1.Node, prefix string) error { + want, err := netip.ParsePrefix(prefix) + if err != nil { + return err + } + + for _, n := range nodes { + exc := executor.ForContainer(n.Name) + m, err := BGPRoutes(exc, "eth0") + if err != nil { + return err + } + if _, exist := m[want]; !exist { + return fmt.Errorf("local k8s node route %s was not found", prefix) + } + } + return nil +} + +// BGPRoutes executes `ip route show proto bgp` in the executor and returns all +// routes filtered by device e.g. net0. The return is map[destination CIDR]-> set[nextHops]. +// The return json from the kernel varies depend if ipv4 with single entry, or +// ipv4 with multiple entries or ipv6. The use of maps, and using keys the types +// netip.{Prefix,Addr} allows direct comparison. +// TODO: move this to e2etest pkg in metallb repo +func BGPRoutes(exc executor.Executor, dev string) (map[netip.Prefix]map[netip.Addr]struct{}, error) { + ret := make(map[netip.Prefix]map[netip.Addr]struct{}) + + type Nexthop struct { + Gateway string `json:"gateway"` + Dev string `json:"dev"` + Weight int `json:"weight"` + Flags []string `json:"flags"` + } + + type IPRoute struct { + Dst string `json:"dst"` + Nexthops []Nexthop `json:"nexthops,omitempty"` + Gateway string `json:"gateway,omitempty"` + Via *struct { + Family string `json:"family"` + Host string `json:"host"` + } `json:"via,omitempty"` + Dev string `json:"dev"` + } + + for _, proto := range []string{"-4", "-6"} { + out, err := exc.Exec("ip", proto, "--json", "route", "show", "proto", "bgp") + if err != nil { + return nil, fmt.Errorf("%s - %w", out, err) + } + + var routes []IPRoute + err = json.Unmarshal([]byte(out), &routes) + if err != nil { + return nil, fmt.Errorf("failed to parse JSON output: %w", err) + } + + for _, r := range routes { + dst, err := netip.ParsePrefix(r.Dst) + if err != nil { + return nil, fmt.Errorf("invalid prefix %s: %w", r.Dst, err) + } + + nextHops := make(map[netip.Addr]struct{}) + // this is for ipv4 single next-hop + if r.Via != nil { + addr, err := netip.ParseAddr(r.Via.Host) + if err != nil { + return nil, fmt.Errorf("invalid next-hop %s: %w", r.Via.Host, err) + } + if r.Dev == dev { + nextHops[addr] = struct{}{} + } + } + + // this is for ipv4 multiple next-hops + for _, nh := range r.Nexthops { + addr, err := netip.ParseAddr(nh.Gateway) + if err != nil { + return nil, fmt.Errorf("invalid next-hop %s: %w", nh.Gateway, err) + } + + if nh.Dev == dev { + nextHops[addr] = struct{}{} + } + } + + // this is for ipv6 + if r.Gateway != "" { + addr, err := netip.ParseAddr(r.Gateway) + if err != nil { + return nil, fmt.Errorf("invalid next-hop %s: %w", r.Gateway, err) + } + if r.Dev == dev { + nextHops[addr] = struct{}{} + } + } + + if len(nextHops) == 0 { + continue + } + ret[dst] = nextHops + } + } + + return ret, nil +}