diff --git a/charts/frr-k8s/templates/controller.yaml b/charts/frr-k8s/templates/controller.yaml index 1dedcf2e..96f44c85 100644 --- a/charts/frr-k8s/templates/controller.yaml +++ b/charts/frr-k8s/templates/controller.yaml @@ -49,7 +49,7 @@ data: # Check /etc/pam.d/frr if you intend to use "vtysh"! # vtysh_enable=yes - zebra_options=" -A 127.0.0.1 -s 90000000" + zebra_options=" -A 127.0.0.1 -K 120 -s 90000000" bgpd_options=" -A 127.0.0.1 {{ if not .Values.frrk8s.frr.acceptIncomingBGPConnections }} -p 0 {{- end }}" ospfd_options=" -A 127.0.0.1" ospf6d_options=" -A ::1" diff --git a/config/all-in-one/frr-k8s-prometheus.yaml b/config/all-in-one/frr-k8s-prometheus.yaml index 0216b3b8..be853022 100644 --- a/config/all-in-one/frr-k8s-prometheus.yaml +++ b/config/all-in-one/frr-k8s-prometheus.yaml @@ -835,7 +835,7 @@ data: # Check /etc/pam.d/frr if you intend to use "vtysh"! # vtysh_enable=yes - zebra_options=" -A 127.0.0.1 -s 90000000" + zebra_options=" -A 127.0.0.1 -K 120 -s 90000000" bgpd_options=" -A 127.0.0.1 -p 0" ospfd_options=" -A 127.0.0.1" ospf6d_options=" -A ::1" diff --git a/config/all-in-one/frr-k8s.yaml b/config/all-in-one/frr-k8s.yaml index a8ee5552..5fa45cb8 100644 --- a/config/all-in-one/frr-k8s.yaml +++ b/config/all-in-one/frr-k8s.yaml @@ -804,7 +804,7 @@ data: # Check /etc/pam.d/frr if you intend to use "vtysh"! # vtysh_enable=yes - zebra_options=" -A 127.0.0.1 -s 90000000" + zebra_options=" -A 127.0.0.1 -K 120 -s 90000000" bgpd_options=" -A 127.0.0.1 -p 0" ospfd_options=" -A 127.0.0.1" ospf6d_options=" -A ::1" diff --git a/config/frr-k8s/frr-cm.yaml b/config/frr-k8s/frr-cm.yaml index 5cf57aa9..ef176f72 100644 --- a/config/frr-k8s/frr-cm.yaml +++ b/config/frr-k8s/frr-cm.yaml @@ -45,7 +45,7 @@ data: # Check /etc/pam.d/frr if you intend to use "vtysh"! # vtysh_enable=yes - zebra_options=" -A 127.0.0.1 -s 90000000" + zebra_options=" -A 127.0.0.1 -K 120 -s 90000000" bgpd_options=" -A 127.0.0.1 -p 0" ospfd_options=" -A 127.0.0.1" ospf6d_options=" -A ::1" diff --git a/e2etests/go.work.sum b/e2etests/go.work.sum index 48b1c9bb..cf4af584 100644 --- a/e2etests/go.work.sum +++ b/e2etests/go.work.sum @@ -818,6 +818,7 @@ github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0Gq github.com/moby/spdystream v0.4.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9ObI= github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= +github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= @@ -1517,8 +1518,6 @@ k8s.io/code-generator v0.29.3/go.mod h1:x47ofBhN4gxYFcxeKA1PYXeaPreAGaDN85Y/lNUs k8s.io/code-generator v0.30.2/go.mod h1:RQP5L67QxqgkVquk704CyvWFIq0e6RCMmLTXxjE8dVA= k8s.io/code-generator v0.31.0/go.mod h1:84y4w3es8rOJOUUP1rLsIiGlO1JuEaPFXQPA9e/K6U0= k8s.io/component-base v0.29.0/go.mod h1:sADonFTQ9Zc9yFLghpDpmNXEdHyQmFIGbiuZbqAXQ1M= -k8s.io/component-base v0.31.0 h1:/KIzGM5EvPNQcYgwq5NwoQBaOlVFrghoVGr8lG6vNRs= -k8s.io/component-base v0.31.0/go.mod h1:TYVuzI1QmN4L5ItVdMSXKvH7/DtvIuas5/mm8YT3rTo= k8s.io/component-helpers v0.28.3 h1:te9ieTGzcztVktUs92X53P6BamAoP73MK0qQP0WmDqc= k8s.io/component-helpers v0.28.3/go.mod h1:oJR7I9ist5UAQ3y/CTdbw6CXxdMZ1Lw2Ua/EZEwnVLs= k8s.io/controller-manager v0.26.0/go.mod h1:GxUYtQDBE/RHh7AnZSZqwi2xBPIXlOaWsnDLflKGYrE= diff --git a/e2etests/pkg/config/from_containers.go b/e2etests/pkg/config/from_containers.go index 0e9ceaed..68188453 100644 --- a/e2etests/pkg/config/from_containers.go +++ b/e2etests/pkg/config/from_containers.go @@ -103,6 +103,17 @@ func EnableGracefulRestart(pc *PeersConfig) { } } +func EnableReceiveAllowAll(pc *PeersConfig) { + t := pc.PeersV4 + for i := 0; i < len(t); i++ { + t[i].Neigh.ToReceive.Allowed.Mode = frrk8sv1beta1.AllowAll + } + t = pc.PeersV6 + for i := 0; i < len(t); i++ { + t[i].Neigh.ToReceive.Allowed.Mode = frrk8sv1beta1.AllowAll + } +} + func EnableAllowAll(pc *PeersConfig) { t := pc.PeersV4 for i := 0; i < len(t); i++ { diff --git a/e2etests/tests/graceful_restart.go b/e2etests/tests/graceful_restart.go index cfea006e..288bce47 100644 --- a/e2etests/tests/graceful_restart.go +++ b/e2etests/tests/graceful_restart.go @@ -3,7 +3,9 @@ package tests import ( + "encoding/json" "fmt" + "net/netip" "time" "github.com/onsi/ginkgo/v2" @@ -11,6 +13,7 @@ import ( "go.universe.tf/e2etest/pkg/frr/container" frrk8sv1beta1 "github.com/metallb/frr-k8s/api/v1beta1" + "github.com/metallb/frrk8stests/pkg/address" "github.com/metallb/frrk8stests/pkg/config" "github.com/metallb/frrk8stests/pkg/dump" "github.com/metallb/frrk8stests/pkg/infra" @@ -18,6 +21,7 @@ import ( "github.com/metallb/frrk8stests/pkg/k8sclient" "github.com/metallb/frrk8stests/pkg/routes" . "github.com/onsi/gomega" + "go.universe.tf/e2etest/pkg/executor" frrconfig "go.universe.tf/e2etest/pkg/frr/config" "go.universe.tf/e2etest/pkg/ipfamily" corev1 "k8s.io/api/core/v1" @@ -68,19 +72,25 @@ var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart", func dump.K8sInfo(testName, reporter) dump.BGPInfo(testName, infra.FRRContainers, cs) } + + err := cleanup(updater) + Expect(err).NotTo(HaveOccurred(), "cleanup config in API and infra containers") }) ginkgo.Context("When restarting the frrk8s deamon pods", func() { + ginkgo.DescribeTable("routes are maintained", func(ipFam ipfamily.Family, prefix, learnRoute string) { - ginkgo.DescribeTable("external BGP peer maintains routes", func(ipFam ipfamily.Family, prefix string) { frrs := config.ContainersForVRF(infra.FRRContainers, "") for _, c := range frrs { - err := container.PairWithNodes(cs, c, ipFam) + err := container.PairWithNodes(cs, c, ipFam, func(frr *container.FRR) { + frr.NeighborConfig.ToAdvertiseV4 = address.FilterForFamily([]string{learnRoute}, ipFam) + frr.NeighborConfig.ToAdvertiseV6 = address.FilterForFamily([]string{learnRoute}, ipFam) + }) Expect(err).NotTo(HaveOccurred(), "set frr config in infra containers failed") } peersConfig := config.PeersForContainers(frrs, ipFam, - config.EnableAllowAll, config.EnableGracefulRestart) + config.EnableAllowAll, config.EnableReceiveAllowAll, config.EnableGracefulRestart) frrConfigCR := frrk8sv1beta1.FRRConfiguration{ ObjectMeta: metav1.ObjectMeta{ @@ -110,12 +120,16 @@ var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart", func return fmt.Errorf("Neigh %s does not have prefix %s: %w", p.FRR.Name, prefix, err) } } + if err := checkPrefixOnNodes(nodes, learnRoute); err != nil { + return err + } return nil } Eventually(check, time.Minute, time.Second).ShouldNot(HaveOccurred(), "route should exist before we restart frr-k8s") + ginkgo.By("GR started") c := make(chan struct{}) go func() { // go restart frr-k8s while Consistently check that route exists defer ginkgo.GinkgoRecover() @@ -128,8 +142,121 @@ var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart", func Consistently(check, 2*time.Minute, time.Second).ShouldNot(HaveOccurred()) Eventually(c, time.Minute, time.Second).Should(BeClosed(), "restart FRRK8s pods are not yet ready") }, - ginkgo.Entry("IPV4", ipfamily.IPv4, "192.168.2.0/24"), - ginkgo.Entry("IPV6", ipfamily.IPv6, "fc00:f853:ccd:e799::/64"), + ginkgo.Entry("IPV4", ipfamily.IPv4, "192.168.2.0/24", "200.200.200.0/24"), + ginkgo.Entry("IPV6", ipfamily.IPv6, "fc00:f853:ccd:e799::/64", "2001:db8::/64"), ) }) }) + +// checkPrefixOnNodes checks that a prefix has at least on route on the +// each node. There is no check on nexthop because not all routes are install +// on the host (iBGP yes, eBGP no when iBGP exist) and there is a different +// behavior between ipv4,ipv6. +func checkPrefixOnNodes(nodes []corev1.Node, prefix string) error { + want, err := netip.ParsePrefix(prefix) + if err != nil { + return err + } + + for _, n := range nodes { + exc := executor.ForContainer(n.Name) + m, err := BGPRoutes(exc, "eth0") + if err != nil { + return err + } + if _, exist := m[want]; !exist { + return fmt.Errorf("local k8s node route %s was not found", prefix) + } + } + return nil +} + +// BGPRoutes executes `ip route show proto bgp` in the executor and returns all +// routes filtered by device e.g. net0. The return is map[destination CIDR]-> set[nextHops]. +// The return json from the kernel varies depend if ipv4 with single entry, or +// ipv4 with multiple entries or ipv6. The use of maps, and using keys the types +// netip.{Prefix,Addr} allows direct comparison. +// TODO: move this to e2etest pkg in metallb repo +func BGPRoutes(exc executor.Executor, dev string) (map[netip.Prefix]map[netip.Addr]struct{}, error) { + ret := make(map[netip.Prefix]map[netip.Addr]struct{}) + + type Nexthop struct { + Gateway string `json:"gateway"` + Dev string `json:"dev"` + Weight int `json:"weight"` + Flags []string `json:"flags"` + } + + type IPRoute struct { + Dst string `json:"dst"` + Nexthops []Nexthop `json:"nexthops,omitempty"` + Gateway string `json:"gateway,omitempty"` + Via *struct { + Family string `json:"family"` + Host string `json:"host"` + } `json:"via,omitempty"` + Dev string `json:"dev"` + } + + for _, proto := range []string{"-4", "-6"} { + out, err := exc.Exec("ip", proto, "--json", "route", "show", "proto", "bgp") + if err != nil { + return nil, fmt.Errorf("%s - %w", out, err) + } + + var routes []IPRoute + err = json.Unmarshal([]byte(out), &routes) + if err != nil { + return nil, fmt.Errorf("failed to parse JSON output: %w", err) + } + + for _, r := range routes { + dst, err := netip.ParsePrefix(r.Dst) + if err != nil { + return nil, fmt.Errorf("invalid prefix %s: %w", r.Dst, err) + } + + nextHops := make(map[netip.Addr]struct{}) + // this is for ipv4 single next-hop + if r.Via != nil { + addr, err := netip.ParseAddr(r.Via.Host) + if err != nil { + return nil, fmt.Errorf("invalid next-hop %s: %w", r.Via.Host, err) + } + if r.Dev == dev { + nextHops[addr] = struct{}{} + } + } + + // this is for ipv4 multiple next-hops + for _, nh := range r.Nexthops { + addr, err := netip.ParseAddr(nh.Gateway) + if err != nil { + return nil, fmt.Errorf("invalid next-hop %s: %w", nh.Gateway, err) + } + + if nh.Dev == dev { + nextHops[addr] = struct{}{} + } + } + + // this is for ipv6 + if r.Gateway != "" { + addr, err := netip.ParseAddr(r.Gateway) + if err != nil { + return nil, fmt.Errorf("invalid next-hop %s: %w", r.Gateway, err) + } + if r.Dev == dev { + nextHops[addr] = struct{}{} + } + } + + if len(nextHops) == 0 { + continue + } + ret[dst] = nextHops + } + } + + return ret, nil +}