diff --git a/bpf/xdp_udp.bpf.c b/bpf/xdp_udp.bpf.c new file mode 100644 index 0000000..a49e0d1 --- /dev/null +++ b/bpf/xdp_udp.bpf.c @@ -0,0 +1,215 @@ +// +build ignore + +#include +#include +#include +#include +#include +#include +#include +#include + +char __license[] SEC("license") = "GPL"; + +#ifndef memcpy + #define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n)) +#endif + +#define MAX_BACKENDS 128 +#define MAX_UDP_LENGTH 1480 + +#define UDP_PAYLOAD_SIZE(x) (unsigned int)(((bpf_htons(x) - sizeof(struct udphdr)) * 8 ) / 4) + +static __always_inline void ip_from_int(__u32 *buf, __be32 ip) { + buf[0] = (ip >> 0 ) & 0xFF; + buf[1] = (ip >> 8 ) & 0xFF; + buf[2] = (ip >> 16 ) & 0xFF; + buf[3] = (ip >> 24 ) & 0xFF; +} + +static __always_inline void bpf_printk_ip(__be32 ip) { + __u32 ip_parts[4]; + ip_from_int((__u32 *)&ip_parts, ip); + bpf_printk("%d.%d.%d.%d", ip_parts[0], ip_parts[1], ip_parts[2], ip_parts[3]); +} + +static __always_inline __u16 csum_fold_helper(__u64 csum) { + int i; +#pragma unroll + for (i = 0; i < 4; i++) + { + if (csum >> 16) + csum = (csum & 0xffff) + (csum >> 16); + } + return ~csum; +} + +static __always_inline __u16 iph_csum(struct iphdr *iph) { + iph->check = 0; + unsigned long long csum = bpf_csum_diff(0, 0, (unsigned int *)iph, sizeof(struct iphdr), 0); + return csum_fold_helper(csum); +} + +static __always_inline __u16 udp_checksum(struct iphdr *ip, struct udphdr * udp, void * data_end) { + udp->check = 0; + + // So we can overflow a bit make this __u32 + __u32 csum_total = 0; + __u16 *buf = (void *)udp; + + csum_total += (__u16)ip->saddr; + csum_total += (__u16)(ip->saddr >> 16); + csum_total += (__u16)ip->daddr; + csum_total += (__u16)(ip->daddr >> 16); + csum_total += (__u16)(ip->protocol << 8); + csum_total += udp->len; + + // The number of nibbles in the UDP header + Payload + unsigned int udp_packet_nibbles = UDP_PAYLOAD_SIZE(udp->len); + + // Here we only want to iterate through payload + // NOT trailing bits + for (int i = 0; i <= MAX_UDP_LENGTH; i += 2) { + if (i > udp_packet_nibbles) { + break; + } + + if ((void *)(buf + 1) > data_end) { + break; + } + csum_total += *buf; + buf++; + } + + if ((void *)buf + 1 <= data_end) { + csum_total += (*(__u8 *)buf); + } + + return csum_fold_helper(csum_total); +} + +struct backend { + __u32 saddr; + __u32 daddr; + __u16 dport; + __u8 shwaddr[6]; + __u8 dhwaddr[6]; + __u16 ifindex; + // Cksum isn't required for UDP see: + // https://en.wikipedia.org/wiki/User_Datagram_Protocol + __u8 nocksum; + __u8 pad[3]; +}; + + +struct vip_key { + __u32 vip; + __u16 port; + __u8 pad[2]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_BACKENDS); + __type(key, struct vip_key); + __type(value, struct backend); +} backends SEC(".maps"); + +SEC("xdp") +int xdp_prog_func(struct xdp_md *ctx) { + // --------------------------------------------------------------------------- + // Initialize + // --------------------------------------------------------------------------- + + void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + + struct ethhdr *eth = data; + if (data + sizeof(struct ethhdr) > data_end) { + bpf_printk("ABORTED: bad ethhdr!"); + return XDP_ABORTED; + } + + if (bpf_ntohs(eth->h_proto) != ETH_P_IP) { + bpf_printk("PASS: not IP protocol!"); + return XDP_PASS; + } + + struct iphdr *ip = data + sizeof(struct ethhdr); + if (data + sizeof(struct ethhdr) + sizeof(struct iphdr) > data_end) { + bpf_printk("ABORTED: bad iphdr!"); + return XDP_ABORTED; + } + + if (ip->protocol != IPPROTO_UDP) + return XDP_PASS; + + struct udphdr *udp = data + sizeof(struct ethhdr) + sizeof(struct iphdr); + if (data + sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct udphdr) > data_end) { + bpf_printk("ABORTED: bad udphdr!"); + return XDP_ABORTED; + } + + bpf_printk("UDP packet received - daddr:%x, port:%d", ip->daddr, bpf_ntohs(udp->dest)); + + // --------------------------------------------------------------------------- + // Routing + // --------------------------------------------------------------------------- + + struct vip_key key = { + .vip = ip->daddr, + .port = bpf_ntohs(udp->dest) + }; + + struct backend *bk; + bk = bpf_map_lookup_elem(&backends, &key); + if (!bk) { + bpf_printk("no backends for ip %x:%x", key.vip, key.port); + return XDP_PASS; + } + + bpf_printk("got UDP traffic, source address:"); + bpf_printk_ip(ip->saddr); + bpf_printk("destination address:"); + bpf_printk_ip(ip->daddr); + + ip->saddr = bk->saddr; + ip->daddr = bk->daddr; + + bpf_printk("updated saddr to:"); + bpf_printk_ip(ip->saddr); + bpf_printk("updated daddr to:"); + bpf_printk_ip(ip->daddr); + + if (udp->dest != bpf_ntohs(bk->dport)) { + udp->dest = bpf_ntohs(bk->dport); + bpf_printk("updated dport to: %d", bk->dport); + } + + memcpy(eth->h_source, bk->shwaddr, sizeof(eth->h_source)); + bpf_printk("new source hwaddr %x:%x:%x:%x:%x:%x", eth->h_source[0], eth->h_source[1], eth->h_source[2], eth->h_source[3], eth->h_source[4], eth->h_source[5]); + + memcpy(eth->h_dest, bk->dhwaddr, sizeof(eth->h_dest)); + bpf_printk("new dest hwaddr %x:%x:%x:%x:%x:%x", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2], eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]); + + ip->check = iph_csum(ip); + udp->check = 0; + + if (!bk->nocksum){ + udp->check = udp_checksum(ip, udp, data_end); + } + + bpf_printk("destination interface index %d", bk->ifindex); + + int action = bpf_redirect(bk->ifindex, 0); + + bpf_printk("redirect action: %d", action); + + return action; +} + +SEC("xdp") +int bpf_redirect_placeholder(struct xdp_md *ctx) { + bpf_printk("received a packet on dest interface"); + return XDP_PASS; +} \ No newline at end of file diff --git a/userspace/attach-sklookup.c b/userspace-c/attach-sklookup.c similarity index 100% rename from userspace/attach-sklookup.c rename to userspace-c/attach-sklookup.c diff --git a/userspace-go/Makefile b/userspace-go/Makefile new file mode 100644 index 0000000..59b301c --- /dev/null +++ b/userspace-go/Makefile @@ -0,0 +1,27 @@ + + +TAG ?= latest + +LIBBPF ?= ../libbpf/src +CLANG ?= clang +CFLAGS := -O2 -g -Wall -Werror -Wno-unused-value -Wno-pointer-sign -Wcompare-distinct-pointer-types -I$(LIBBPF) $(CFLAGS) + +all: build + +.PHONY: +clean: + rm -f bpf_bpfeb.go + rm -f bpf_bpfeb.o + rm -f bpf_bpfel.go + rm -f bpf_bpfel.o + rm -f blixt-dataplane + +.PHONY: generate +generate: export BPF_CLANG := $(CLANG) +generate: export BPF_CFLAGS := $(CFLAGS) +generate: + go generate ./... + +.PHONY: build +build: generate + go build -o blixt-dataplane diff --git a/userspace-go/README.md b/userspace-go/README.md new file mode 100644 index 0000000..2159da6 --- /dev/null +++ b/userspace-go/README.md @@ -0,0 +1,191 @@ +# Some helpful hints for debugging this program + +## Tracing XDP redirect (on first interface where main XDP program is attached) + +(TODO finish tracing the XDP path through the kernel) +1. Entry at `xdp_do_redirect` + - Frags Don't work `xdp_buff_has_frags` + - If map == XSKMAP -> `__xdp_do_redirect_xsk` + - Returns `__xdp_do_redirect_frame` + +2. Entry `__xdp_do_redirect_frame` (Can't trace internal functions?) + + +## Tracing Once packet meets host end of veth + +(TODO finish tracing the XDP path through the kernel) +__netif_receive_skb_core + + +## Debugging UDP Checksum issues + +We can use TCP dump see if cksum's are correct once the packets reach the container: + +```bash +sudo tcpdump -vvv -i -neep udp` +``` + +`__sum16 __skb_checksum_complete(struct sk_buff *skb)` is the name of the kernel +function which will actually check the cksum, it can be tracked with `bpftrace` +and the following kprobe: + +```bash +kretprobe:__skb_checksum_complete +{ + printf("skb_checksum_complete returned: %x\n", retval); +} +``` + +## Manually Calculating UDP Checksums + +A UDP cksum is calculated with the following: + +```bash +1's Complement { + Source IP + + Destination IP + + 17 (0x0011 - UDP protocol code) + + UDP Packet Length + Source Port + + Destination Port + + UDP Packet Length + + Data +} +``` + +A Raw TCPdump packet is shown below: +```bash +13:23:15.756911 06:56:87:ec:fd:1f > 86:ad:33:29:ff:5e, ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 57, id 20891, offset 0, flags [DF], proto UDP (17), length 33) + 10.8.125.12.58980 > 192.168.10.2.sapv1: [bad udp cksum 0xd301 -> 0xaf43!] UDP, length 5 + 0x0000: 86ad 3329 ff5e 0656 87ec fd1f 0800 4500 + 0x0010: 0021 519b 4000 3911 9e72 0a08 7d0c c0a8 + 0x0020: 0a02 e664 2693 000d d301 7465 7374 0a00 + 0x0030: 0000 0000 d2f2 935d 0000 0000 +``` + +Using this along with our knowledge of a UDP packet we can quickly and manually +calculate the cksum like so: + +```bash +0x0a08 Src IP octet 1 +0x7d0c Src IP octet 2 +0xc0a8 Dst IP octet 1 +0x0a02 Dst IP octet 2 +0x0011 Proto +0x000d Length +0xe664 Src Port +0x2693 Dst Port +0x000d Length +0x7465 Data +0x7374 Data +0x0a00 Data ++ +------------- +50bc -> 1's compliment = af43 +``` + +To play with this same raw data in wireshark we can use the text from the hex dump +and convert it to the following format. With this in a file you can then +"Import from hex dump" in wireshark. + +```bash +13:23:15 +0000 86 ad 33 29 ff 5e 06 56 87 ec fd 1f 08 00 45 00 +0010 00 21 51 9b 40 00 39 11 9e 72 0a 08 7d 0c c0 a8 +0020 0a 02 e6 64 26 93 00 0d d3 01 74 65 73 74 0a 00 +0030 00 00 00 00 d2 f2 93 5d 00 00 00 00 +``` + +![Above Raw packet shown in wireshark](./wireshark.png) + +## Tracing Non XDP stack (native kernel) with PWRU + +Cilium's [PWRU](https://github.com/cilium/pwru) is a great tool for tracing packets +as they make their way through the linux kernel. It is limited in the fact that it +doesn't really track the XDP stack currently, however it's still super helpful +for debugging other issues. + +### Working Trace (manually re-writing Cksums) + +```bash +0xffff96d3956d4f00 8 [ksoftirqd/8] udp4_gro_receive +0xffff96d3956d4f00 8 [ksoftirqd/8] udp_gro_receive +0xffff96d3956d4f00 8 [ksoftirqd/8] skb_defer_rx_timestamp +0xffff96d3956d4f00 8 [ksoftirqd/8] tpacket_rcv +0xffff96d3956d4f00 8 [ksoftirqd/8] skb_push +0xffff96d3956d4f00 8 [ksoftirqd/8] tpacket_get_timestamp +0xffff96d3956d4f00 8 [ksoftirqd/8] consume_skb +0xffff96d3956d4f00 10 [nc] skb_consume_udp +0xffff96d3956d4f00 10 [nc] skb_consume_udp +0xffff96d3956d4f00 10 [nc] __consume_stateless_skb +0xffff96d3956d4f00 10 [nc] skb_release_data +0xffff96d3956d4f00 10 [nc] skb_free_head +0xffff96d3956d4f00 10 [nc] kfree_skbmem +0xffff96d3956d4f00 8 [ksoftirqd/8] ip_rcv_core +0xffff96d3956d4f00 8 [ksoftirqd/8] pskb_trim_rcsum_slow +0xffff96d3956d4f00 8 [ksoftirqd/8] udp_v4_early_demux +0xffff96d3956d4f00 8 [ksoftirqd/8] ip_route_input_noref +0xffff96d3956d4f00 8 [ksoftirqd/8] ip_route_input_rcu +0xffff96d3956d4f00 8 [ksoftirqd/8] ip_route_input_slow +0xffff96d3956d4f00 8 [ksoftirqd/8] fib_validate_source +0xffff96d3956d4f00 8 [ksoftirqd/8] __fib_validate_source +0xffff96d3956d4f00 8 [ksoftirqd/8] ip_local_deliver +0xffff96d3956d4f00 8 [ksoftirqd/8] ip_local_deliver_finish +0xffff96d3956d4f00 8 [ksoftirqd/8] ip_protocol_deliver_rcu +0xffff96d3956d4f00 8 [ksoftirqd/8] raw_local_deliver +0xffff96d3956d4f00 8 [ksoftirqd/8] udp_rcv +0xffff96d3956d4f00 8 [ksoftirqd/8] __udp4_lib_rcv +0xffff96d3956d4f00 8 [ksoftirqd/8] __skb_checksum_complete +0xffff96d3956d4f00 8 [ksoftirqd/8] udp_unicast_rcv_skb +0xffff96d3956d4f00 8 [ksoftirqd/8] udp_queue_rcv_skb +0xffff96d3956d4f00 8 [ksoftirqd/8] udp_queue_rcv_one_skb +0xffff96d3956d4f00 8 [ksoftirqd/8] sk_filter_trim_cap +0xffff96d3956d4f00 8 [ksoftirqd/8] security_sock_rcv_skb +0xffff96d3956d4f00 8 [ksoftirqd/8] selinux_socket_sock_rcv_skb +0xffff96d3956d4f00 8 [ksoftirqd/8] selinux_sock_rcv_skb_compat +0xffff96d3956d4f00 8 [ksoftirqd/8] selinux_netlbl_sock_rcv_skb +0xffff96d3956d4f00 8 [ksoftirqd/8] selinux_xfrm_sock_rcv_skb +0xffff96d3956d4f00 8 [ksoftirqd/8] bpf_lsm_socket_sock_rcv_skb +``` + +### Working Trace (ignoring cksums i.e setting to 0) + +```bash +0xffff96d35c18f000 8 [] udp4_gro_receive +0xffff96d35c18f000 8 [] udp_gro_receive +0xffff96d35c18f000 8 [] skb_defer_rx_timestamp +0xffff96d35c18f000 8 [] tpacket_rcv +0xffff96d35c18f000 8 [] skb_push +0xffff96d35c18f000 8 [] tpacket_get_timestamp +0xffff96d35c18f000 10 [nc] skb_consume_udp +0xffff96d35c18f000 10 [nc] skb_consume_udp +0xffff96d35c18f000 10 [nc] __consume_stateless_skb +0xffff96d35c18f000 10 [nc] skb_release_data +0xffff96d35c18f000 10 [nc] skb_free_head +0xffff96d35c18f000 10 [nc] kfree_skbmem +0xffff96d35c18f000 8 [] consume_skb +0xffff96d35c18f000 8 [] ip_rcv_core +0xffff96d35c18f000 8 [] pskb_trim_rcsum_slow +0xffff96d35c18f000 8 [] udp_v4_early_demux +0xffff96d35c18f000 8 [] ip_route_input_noref +0xffff96d35c18f000 8 [] ip_route_input_rcu +0xffff96d35c18f000 8 [] ip_route_input_slow +0xffff96d35c18f000 8 [] fib_validate_source +0xffff96d35c18f000 8 [] __fib_validate_source +0xffff96d35c18f000 8 [] ip_local_deliver +0xffff96d35c18f000 8 [] ip_local_deliver_finish +0xffff96d35c18f000 8 [] ip_protocol_deliver_rcu +0xffff96d35c18f000 8 [] raw_local_deliver +0xffff96d35c18f000 8 [] udp_rcv +0xffff96d35c18f000 8 [] __udp4_lib_rcv # ----> No CKSUM so we don't call __skb_checksum_complete +0xffff96d35c18f000 8 [] udp_unicast_rcv_skbx_ +0xffff96d35c18f000 8 [] udp_queue_rcv_skb +0xffff96d35c18f000 8 [] udp_queue_rcv_one_skb +0xffff96d35c18f000 8 [] sk_filter_trim_cap +0xffff96d35c18f000 8 [] security_sock_rcv_skb +0xffff96d35c18f000 8 [] selinux_socket_sock_rcv_skb +0xffff96d35c18f000 8 [] selinux_sock_rcv_skb_compat +0xffff96d35c18f000 8 [] selinux_netlbl_sock_rcv_skb +0xffff96d35c18f000 8 [] selinux_xfrm_sock_rcv_skb +0xffff96d35c18f000 8 [] bpf_lsm_socket_sock_rcv_skb +0xffff96d35c18f000 8 [] skb_pull_rcsum +``` \ No newline at end of file diff --git a/userspace-go/bpf_bpfeb.go b/userspace-go/bpf_bpfeb.go new file mode 100644 index 0000000..2100aff --- /dev/null +++ b/userspace-go/bpf_bpfeb.go @@ -0,0 +1,139 @@ +// Code generated by bpf2go; DO NOT EDIT. +//go:build arm64be || armbe || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64 +// +build arm64be armbe mips mips64 mips64p32 ppc64 s390 s390x sparc sparc64 + +package main + +import ( + "bytes" + _ "embed" + "fmt" + "io" + + "github.com/cilium/ebpf" +) + +type bpfBackend struct { + Saddr uint32 + Daddr uint32 + Dport uint16 + Shwaddr [6]uint8 + Dhwaddr [6]uint8 + Ifindex uint16 + Nocksum uint8 + Pad [3]uint8 +} + +type bpfVipKey struct { + Vip uint32 + Port uint16 + Pad [2]uint8 +} + +// loadBpf returns the embedded CollectionSpec for bpf. +func loadBpf() (*ebpf.CollectionSpec, error) { + reader := bytes.NewReader(_BpfBytes) + spec, err := ebpf.LoadCollectionSpecFromReader(reader) + if err != nil { + return nil, fmt.Errorf("can't load bpf: %w", err) + } + + return spec, err +} + +// loadBpfObjects loads bpf and converts it into a struct. +// +// The following types are suitable as obj argument: +// +// *bpfObjects +// *bpfPrograms +// *bpfMaps +// +// See ebpf.CollectionSpec.LoadAndAssign documentation for details. +func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { + spec, err := loadBpf() + if err != nil { + return err + } + + return spec.LoadAndAssign(obj, opts) +} + +// bpfSpecs contains maps and programs before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfSpecs struct { + bpfProgramSpecs + bpfMapSpecs +} + +// bpfSpecs contains programs before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfProgramSpecs struct { + BpfRedirectPlaceholder *ebpf.ProgramSpec `ebpf:"bpf_redirect_placeholder"` + XdpProgFunc *ebpf.ProgramSpec `ebpf:"xdp_prog_func"` +} + +// bpfMapSpecs contains maps before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfMapSpecs struct { + Backends *ebpf.MapSpec `ebpf:"backends"` +} + +// bpfObjects contains all objects after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfObjects struct { + bpfPrograms + bpfMaps +} + +func (o *bpfObjects) Close() error { + return _BpfClose( + &o.bpfPrograms, + &o.bpfMaps, + ) +} + +// bpfMaps contains all maps after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfMaps struct { + Backends *ebpf.Map `ebpf:"backends"` +} + +func (m *bpfMaps) Close() error { + return _BpfClose( + m.Backends, + ) +} + +// bpfPrograms contains all programs after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfPrograms struct { + BpfRedirectPlaceholder *ebpf.Program `ebpf:"bpf_redirect_placeholder"` + XdpProgFunc *ebpf.Program `ebpf:"xdp_prog_func"` +} + +func (p *bpfPrograms) Close() error { + return _BpfClose( + p.BpfRedirectPlaceholder, + p.XdpProgFunc, + ) +} + +func _BpfClose(closers ...io.Closer) error { + for _, closer := range closers { + if err := closer.Close(); err != nil { + return err + } + } + return nil +} + +// Do not access this directly. +//go:embed bpf_bpfeb.o +var _BpfBytes []byte diff --git a/userspace-go/bpf_bpfeb.o b/userspace-go/bpf_bpfeb.o new file mode 100644 index 0000000..816372b Binary files /dev/null and b/userspace-go/bpf_bpfeb.o differ diff --git a/userspace-go/bpf_bpfel.go b/userspace-go/bpf_bpfel.go new file mode 100644 index 0000000..83d4939 --- /dev/null +++ b/userspace-go/bpf_bpfel.go @@ -0,0 +1,139 @@ +// Code generated by bpf2go; DO NOT EDIT. +//go:build 386 || amd64 || amd64p32 || arm || arm64 || mips64le || mips64p32le || mipsle || ppc64le || riscv64 +// +build 386 amd64 amd64p32 arm arm64 mips64le mips64p32le mipsle ppc64le riscv64 + +package main + +import ( + "bytes" + _ "embed" + "fmt" + "io" + + "github.com/cilium/ebpf" +) + +type bpfBackend struct { + Saddr uint32 + Daddr uint32 + Dport uint16 + Shwaddr [6]uint8 + Dhwaddr [6]uint8 + Ifindex uint16 + Nocksum uint8 + Pad [3]uint8 +} + +type bpfVipKey struct { + Vip uint32 + Port uint16 + Pad [2]uint8 +} + +// loadBpf returns the embedded CollectionSpec for bpf. +func loadBpf() (*ebpf.CollectionSpec, error) { + reader := bytes.NewReader(_BpfBytes) + spec, err := ebpf.LoadCollectionSpecFromReader(reader) + if err != nil { + return nil, fmt.Errorf("can't load bpf: %w", err) + } + + return spec, err +} + +// loadBpfObjects loads bpf and converts it into a struct. +// +// The following types are suitable as obj argument: +// +// *bpfObjects +// *bpfPrograms +// *bpfMaps +// +// See ebpf.CollectionSpec.LoadAndAssign documentation for details. +func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { + spec, err := loadBpf() + if err != nil { + return err + } + + return spec.LoadAndAssign(obj, opts) +} + +// bpfSpecs contains maps and programs before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfSpecs struct { + bpfProgramSpecs + bpfMapSpecs +} + +// bpfSpecs contains programs before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfProgramSpecs struct { + BpfRedirectPlaceholder *ebpf.ProgramSpec `ebpf:"bpf_redirect_placeholder"` + XdpProgFunc *ebpf.ProgramSpec `ebpf:"xdp_prog_func"` +} + +// bpfMapSpecs contains maps before they are loaded into the kernel. +// +// It can be passed ebpf.CollectionSpec.Assign. +type bpfMapSpecs struct { + Backends *ebpf.MapSpec `ebpf:"backends"` +} + +// bpfObjects contains all objects after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfObjects struct { + bpfPrograms + bpfMaps +} + +func (o *bpfObjects) Close() error { + return _BpfClose( + &o.bpfPrograms, + &o.bpfMaps, + ) +} + +// bpfMaps contains all maps after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfMaps struct { + Backends *ebpf.Map `ebpf:"backends"` +} + +func (m *bpfMaps) Close() error { + return _BpfClose( + m.Backends, + ) +} + +// bpfPrograms contains all programs after they have been loaded into the kernel. +// +// It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. +type bpfPrograms struct { + BpfRedirectPlaceholder *ebpf.Program `ebpf:"bpf_redirect_placeholder"` + XdpProgFunc *ebpf.Program `ebpf:"xdp_prog_func"` +} + +func (p *bpfPrograms) Close() error { + return _BpfClose( + p.BpfRedirectPlaceholder, + p.XdpProgFunc, + ) +} + +func _BpfClose(closers ...io.Closer) error { + for _, closer := range closers { + if err := closer.Close(); err != nil { + return err + } + } + return nil +} + +// Do not access this directly. +//go:embed bpf_bpfel.o +var _BpfBytes []byte diff --git a/userspace-go/bpf_bpfel.o b/userspace-go/bpf_bpfel.o new file mode 100644 index 0000000..3dec66e Binary files /dev/null and b/userspace-go/bpf_bpfel.o differ diff --git a/userspace-go/go.mod b/userspace-go/go.mod new file mode 100644 index 0000000..230b395 --- /dev/null +++ b/userspace-go/go.mod @@ -0,0 +1,7 @@ +module github.com/astoycos/net-ebpf-playground/userspace-go + +go 1.19 + +require github.com/cilium/ebpf v0.9.3 + +require golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec // indirect diff --git a/userspace-go/go.sum b/userspace-go/go.sum new file mode 100644 index 0000000..83fa812 --- /dev/null +++ b/userspace-go/go.sum @@ -0,0 +1,10 @@ +github.com/cilium/ebpf v0.9.3 h1:5KtxXZU+scyERvkJMEm16TbScVvuuMrlhPly78ZMbSc= +github.com/cilium/ebpf v0.9.3/go.mod h1:w27N4UjpaQ9X/DGrSugxUG+H+NhgntDuPb5lCzxCn8A= +github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss= +github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k= +golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec h1:BkDtF2Ih9xZ7le9ndzTA7KJow28VbQW3odyk/8drmuI= +golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= diff --git a/userspace-go/kprobe-tmp.txt b/userspace-go/kprobe-tmp.txt new file mode 100644 index 0000000..4d0905c --- /dev/null +++ b/userspace-go/kprobe-tmp.txt @@ -0,0 +1,44 @@ +kretprobe:xdp_do_redirect +{ + printf("returned: %d\n", retval); +} +kprobe:veth_xdp_rcv_one +{ + printf("xdp receive one xdp_frame arg: %d\n", arg1); +} +kretprobe:veth_xdp_rcv_one +{ + printf("xdp receive one returned: %d\n", retval); +} +kprobe:veth_xdp_rcv_skb +{ + printf("xdp receive skb returned: %s\n", str(arg1)); +} +kretprobe:veth_poll +{ + printf("xdp receive returned: %d\n", retval); +} +kretprobe:ip_rcv_core +{ + if (retval < 15) { + printf("ip receive returned: %d\n", retval); + } +} +kprobe:kfree_skb_reason +{ + printf("SKB free reason: %d\n", arg1); +} +kretprobe:__udp4_lib_rcv +{ + printf("udb lib receive returned: %d\n", retval); + +} +kretprobe:udp_unicast_rcv_skb +{ + printf("udb udp_unicast_rcv_skb receive returned: %d\n", retval); + +} +kretprobe:__skb_checksum_complete +{ + printf("skb_checksum_complete returned: %x\n", retval); +} diff --git a/userspace-go/userspace-go b/userspace-go/userspace-go new file mode 100755 index 0000000..c576959 Binary files /dev/null and b/userspace-go/userspace-go differ diff --git a/userspace-go/wireshark.png b/userspace-go/wireshark.png new file mode 100644 index 0000000..13dabbf Binary files /dev/null and b/userspace-go/wireshark.png differ diff --git a/userspace-go/xdp_udp.go b/userspace-go/xdp_udp.go new file mode 100644 index 0000000..de147d6 --- /dev/null +++ b/userspace-go/xdp_udp.go @@ -0,0 +1,162 @@ +package main + +import ( + "bytes" + "encoding/binary" + "encoding/hex" + "fmt" + "log" + "net" + "os" + "os/exec" + "regexp" + "strings" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/link" +) + +//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc $BPF_CLANG -cflags $BPF_CFLAGS bpf ../bpf/xdp_udp.bpf.c -- -I../libbpf/src +func main() { + if len(os.Args) < 3 { + log.Fatalf("Please specify a main and destination network interface") + } + + ifaceName := os.Args[1] + iface, err := net.InterfaceByName(ifaceName) + if err != nil { + log.Fatalf("lookup network iface %q: %s", ifaceName, err) + } + ifaceName2 := os.Args[2] + iface2, err := net.InterfaceByName(ifaceName2) + if err != nil { + log.Fatalf("lookup network iface %s: %s", ifaceName, err) + } + + objs := bpfObjects{} + if err := loadBpfObjects(&objs, nil); err != nil { + log.Fatalf("loading objects: %s", err) + } + defer objs.Close() + + l, err := link.AttachXDP(link.XDPOptions{ + Program: objs.XdpProgFunc, + Interface: iface.Index, + }) + if err != nil { + log.Fatalf("could not attach XDP program: %s", err) + } + defer l.Close() + + l2, err := link.AttachXDP(link.XDPOptions{ + Program: objs.BpfRedirectPlaceholder, + Interface: iface2.Index, + }) + if err != nil { + log.Fatalf("could not attach XDP program: %s", err) + } + defer l2.Close() + + log.Printf("Attached XDP program to iface %q (index %d)", iface.Name, iface.Index) + log.Printf("Press Ctrl-C to exit and remove the program") + + b := bpfBackend{ + Saddr: ip2int("10.8.125.12"), + Daddr: ip2int("192.168.10.2"), + Dport: 9875, + // Host-Side Veth Mac + Shwaddr: hwaddr2bytes("06:56:87:ec:fd:1f"), + // Container-Side Veth Mac + Dhwaddr: hwaddr2bytes("86:ad:33:29:ff:5e"), + Nocksum: 0, + Ifindex: 8, + } + + key := bpfVipKey{ + Vip: ip2int("10.8.125.12"), + //Vip: ip2int("192.168.10.1"), + Port: 8888, + } + + if err := objs.Backends.Update(key, b, ebpf.UpdateAny); err != nil { + fmt.Println(err.Error()) + os.Exit(1) + } + + for { + } +} + +func ip2int(ip string) uint32 { + ipaddr := net.ParseIP(ip) + return binary.LittleEndian.Uint32(ipaddr.To4()) +} + +// feed from interfaces2hwaddr +func hwaddr2bytes(hwaddr string) [6]byte { + parts := strings.Split(hwaddr, ":") + if len(parts) != 6 { + panic("invalid hwaddr") + } + + var hwaddrB [6]byte + for i, hexPart := range parts { + bs, err := hex.DecodeString(hexPart) + if err != nil { + panic(err) + } + if len(bs) != 1 { + panic("invalid hwaddr part") + } + hwaddrB[i] = bs[0] + } + + return hwaddrB +} + +type networkInterface struct { + name string + hwaddr [6]uint8 + ifindex uint16 +} + +// interface to hwaddr in hex +func interfaces2hwaddr() (interfaces map[string]networkInterface) { + ints, err := net.Interfaces() + if err != nil { + panic(err) + } + + for _, in := range ints { + interfaces[in.Name] = networkInterface{ + name: in.Name, + hwaddr: hwaddr2bytes(in.HardwareAddr.String()), + ifindex: uint16(in.Index), + } + } + + return +} + +var routeRE = regexp.MustCompile(`^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)(/[0-9]+)? dev (\S+)`) + +// ip to interface name +func routes() (routes map[string]string) { + stdout, stderr := new(bytes.Buffer), new(bytes.Buffer) + cmd := exec.Command("ip", "route") + cmd.Stdout = stdout + cmd.Stderr = stderr + if err := cmd.Run(); err != nil { + panic(err.Error() + stderr.String()) + } + + for _, line := range strings.Split(stdout.String(), "\n") { + matches := routeRE.FindAllStringSubmatch(line, -1) + if len(matches) == 1 { + submatches := matches[0] + routes[submatches[1]] = submatches[3] + } + } + + return +}