Skip to content

Commit

Permalink
bpf: make padding length runtime
Browse files Browse the repository at this point in the history
  • Loading branch information
hack3ric committed Sep 27, 2024
1 parent 41f3143 commit 891f0fa
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 53 deletions.
41 changes: 23 additions & 18 deletions bpf/egress.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
#include "main.h"

// Extend socket buffer and move n bytes from front to back.
static inline int mangle_data(struct __sk_buff* skb, __u16 offset, __be32* csum_diff) {
static inline int mangle_data(struct __sk_buff* skb, __u16 offset, __be32* csum_diff,
__u8 padding_len) {
__u16 data_len = skb->len - offset;
try_shot(bpf_skb_change_tail(skb, skb->len + RESERVE_LEN, 0));
__u8 buf[RESERVE_LEN + 4] = {};
__u32 copy_len = min(data_len, RESERVE_LEN);
size_t reserve_len = TCP_UDP_HEADER_DIFF + padding_len;
try_shot(bpf_skb_change_tail(skb, skb->len + reserve_len, 0));
__u8 buf[MAX_RESERVE_LEN + 4] = {};
__u32 copy_len = min(data_len, reserve_len);

if (likely(copy_len > 0)) {
if (likely(copy_len > 0 && copy_len < 28)) {
// HACK: make verifier happy
// Probably related:
// https://lore.kernel.org/bpf/[email protected]/T/
Expand All @@ -26,14 +28,16 @@ static inline int mangle_data(struct __sk_buff* skb, __u16 offset, __be32* csum_
try_shot(bpf_skb_store_bytes(skb, skb->len - copy_len, buf + 1, copy_len, 0));

// Fix checksum when moved bytes does not align with u16 boundaries
if (copy_len == RESERVE_LEN && data_len % 2 != 0)
*csum_diff = bpf_csum_diff((__be32*)(buf + 1), copy_len, (__be32*)buf, sizeof(buf), *csum_diff);

#if PADDING_LEN != 0
__builtin_memset(buf, ';', PADDING_LEN);
*csum_diff = bpf_csum_diff(NULL, 0, (__be32*)buf, PADDING_LEN, *csum_diff);
try_shot(bpf_skb_store_bytes(skb, offset + TCP_UDP_HEADER_DIFF, buf, PADDING_LEN, 0));
#endif
if (copy_len == reserve_len && data_len % 2 != 0)
*csum_diff =
bpf_csum_diff((__be32*)(buf + 1), copy_len, (__be32*)buf, sizeof(buf), *csum_diff);

if (padding_len > 0) {
padding_len = min(padding_len, MAX_PADDING_LEN);
__builtin_memset(buf, ';', sizeof(buf));
*csum_diff = bpf_csum_diff(NULL, 0, (__be32*)buf, padding_len, *csum_diff);
try_shot(bpf_skb_store_bytes(skb, offset + TCP_UDP_HEADER_DIFF, buf, padding_len, 0));
}
}

return TC_ACT_OK;
Expand Down Expand Up @@ -110,7 +114,7 @@ int egress_handler(struct __sk_buff* skb) {
if (likely(conn->state == CONN_ESTABLISHED)) {
seq = conn->seq;
ack_seq = conn->ack_seq;
conn->seq += payload_len + PADDING_LEN;
conn->seq += payload_len + conn->padding_len;
} else {
if (conn->state == CONN_IDLE) {
__u32 cooldown = conn_cooldown(conn);
Expand Down Expand Up @@ -153,22 +157,23 @@ int egress_handler(struct __sk_buff* skb) {
conn_cwnd = conn->cwnd;
bpf_spin_unlock(&conn->lock);

size_t reserve_len = TCP_UDP_HEADER_DIFF + conn->padding_len;
if (ipv4) {
__be16 old_len = ipv4->tot_len;
__be16 new_len = htons(ntohs(old_len) + RESERVE_LEN);
__be16 new_len = htons(ntohs(old_len) + reserve_len);
ipv4->tot_len = new_len;
ipv4->protocol = IPPROTO_TCP;

int off = ETH_HLEN + IPV4_CSUM_OFF;
try_shot(bpf_l3_csum_replace(skb, off, old_len, new_len, 2));
try_shot(bpf_l3_csum_replace(skb, off, htons(IPPROTO_UDP), htons(IPPROTO_TCP), 2));
} else if (ipv6) {
ipv6->payload_len = htons(ntohs(ipv6->payload_len) + RESERVE_LEN);
ipv6->payload_len = htons(ntohs(ipv6->payload_len) + reserve_len);
ipv6->nexthdr = IPPROTO_TCP;
}

__be32 csum_diff = 0;
try_tc(mangle_data(skb, ip_end + sizeof(*udp), &csum_diff));
try_tc(mangle_data(skb, ip_end + sizeof(*udp), &csum_diff, conn->padding_len));
decl_shot(struct tcphdr, tcp, ip_end, skb);
update_tcp_header(tcp, payload_len, seq, ack_seq, conn_cwnd);

Expand All @@ -182,7 +187,7 @@ int egress_handler(struct __sk_buff* skb) {
tcp->check = old_udp_csum;
bpf_l4_csum_replace(skb, csum_off, 0, csum_diff, 0);

__be16 new_len = htons(udp_len + RESERVE_LEN);
__be16 new_len = htons(udp_len + reserve_len);
struct ph_part old_ph = {.protocol = IPPROTO_UDP, .len = old_udp.len};
struct ph_part new_ph = {.protocol = IPPROTO_TCP, .len = new_len};
csum_diff = bpf_csum_diff((__be32*)&old_ph, sizeof(old_ph), (__be32*)&new_ph, sizeof(new_ph), 0);
Expand Down
46 changes: 26 additions & 20 deletions bpf/ingress.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,33 @@
// applied to every data packet. For the same reason, middleboxes probably only append options like
// MSS on handshake packets since there is no data at the end to move, so not finishing this is
// probably going to be fine.
static inline int restore_data(struct xdp_md* xdp, __u16 offset, __u32 buf_len, __be32* csum_diff) {
__u8 buf[RESERVE_LEN + 4] = {};
__u16 data_len = buf_len - offset - PADDING_LEN;
__u32 copy_len = min(data_len, RESERVE_LEN);

#if PADDING_LEN != 0
try_drop(bpf_xdp_load_bytes(xdp, offset, buf, PADDING_LEN));
*csum_diff = bpf_csum_diff((__be32*)buf, PADDING_LEN, NULL, 0, *csum_diff);
buf[0] = 0;
#endif

if (copy_len > 0) {
static inline int restore_data(struct xdp_md* xdp, __u16 offset, __u32 buf_len, __be32* csum_diff,
__u8 padding_len) {
size_t reserve_len = TCP_UDP_HEADER_DIFF + padding_len;
__u8 buf[MAX_RESERVE_LEN + 4] = {};
__u16 data_len = buf_len - offset - padding_len;
__u32 copy_len = min(data_len, reserve_len);

if (padding_len > 0) {
if (padding_len > MAX_PADDING_LEN) padding_len = MAX_PADDING_LEN;
try_drop(bpf_xdp_load_bytes(xdp, offset, buf, padding_len));
*csum_diff = bpf_csum_diff((__be32*)buf, padding_len, NULL, 0, *csum_diff);
buf[0] = 0;
}

if (likely(copy_len > 0 && copy_len <= MAX_RESERVE_LEN)) {
// HACK: see egress.c
if (copy_len < 2) copy_len = 1;
try_drop(bpf_xdp_load_bytes(xdp, buf_len - copy_len, buf + 1, copy_len));
try_drop(bpf_xdp_store_bytes(xdp, offset - TCP_UDP_HEADER_DIFF, buf + 1, copy_len));

// Fix checksum when moved bytes does not align with u16 boundaries
if (copy_len == RESERVE_LEN && data_len % 2 != 0)
*csum_diff = bpf_csum_diff((__be32*)buf, sizeof(buf), (__be32*)(buf + 1), copy_len, *csum_diff);
if (copy_len == reserve_len && data_len % 2 != 0)
*csum_diff =
bpf_csum_diff((__be32*)buf, sizeof(buf), (__be32*)(buf + 1), copy_len, *csum_diff);
}

try_drop(bpf_xdp_adjust_tail(xdp, -(int)RESERVE_LEN));
try_drop(bpf_xdp_adjust_tail(xdp, -(int)reserve_len));
return XDP_PASS;
}

Expand Down Expand Up @@ -302,18 +306,19 @@ int ingress_handler(struct xdp_md* xdp) {
}
if (will_drop) return XDP_DROP;

size_t reserve_len = TCP_UDP_HEADER_DIFF + conn->padding_len;
if (ipv4) {
__be16 old_len = ipv4->tot_len;
__be16 new_len = htons(ntohs(old_len) - RESERVE_LEN);
__be16 new_len = htons(ntohs(old_len) - reserve_len);
ipv4->tot_len = new_len;
ipv4->protocol = IPPROTO_UDP;

__u32 ipv4_csum = (__u16)~ntohs(ipv4->check);
ipv4_csum -= RESERVE_LEN;
ipv4_csum -= reserve_len;
ipv4_csum += IPPROTO_UDP - IPPROTO_TCP;
ipv4->check = htons(csum_fold(ipv4_csum));
} else if (ipv6) {
ipv6->payload_len = htons(ntohs(ipv6->payload_len) - RESERVE_LEN);
ipv6->payload_len = htons(ntohs(ipv6->payload_len) - reserve_len);
ipv6->nexthdr = IPPROTO_UDP;
}

Expand All @@ -322,11 +327,12 @@ int ingress_handler(struct xdp_md* xdp) {
__u32 csum = (__u16)~ntohs(tcp->check);

__be32 csum_diff = 0;
try_xdp(restore_data(xdp, ip_end + sizeof(*tcp), ip_end + ip_payload_len, &csum_diff));
try_xdp(restore_data(xdp, ip_end + sizeof(*tcp), ip_end + ip_payload_len, &csum_diff,
conn->padding_len));
decl_drop(struct udphdr, udp, ip_end, xdp);
csum += u32_fold(ntohl(csum_diff));

__u16 udp_len = ip_payload_len - RESERVE_LEN;
__u16 udp_len = ip_payload_len - reserve_len;
udp->len = htons(udp_len);

udp->check = 0;
Expand Down
5 changes: 3 additions & 2 deletions bpf/main.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ extern struct mimic_rb_map {

#define IPV4_CSUM_OFF (offsetof(struct iphdr, check))
#define TCP_UDP_HEADER_DIFF (sizeof(struct tcphdr) - sizeof(struct udphdr))
#define PADDING_LEN 0
#define RESERVE_LEN (TCP_UDP_HEADER_DIFF + PADDING_LEN)
// #define PADDING_LEN 0
// #define RESERVE_LEN (TCP_UDP_HEADER_DIFF + PADDING_LEN)
#define MAX_RESERVE_LEN (TCP_UDP_HEADER_DIFF + MAX_PADDING_LEN)

struct ph_part {
__u8 _pad;
Expand Down
32 changes: 19 additions & 13 deletions common/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ static inline void cleanup_malloc_str(char** ptr) { cleanup_malloc((void*)ptr);
// Mainly used for limiting loop counts
#define MAX_PACKET_SIZE 10000

#define MAX_PADDING_LEN 16

// Used for reading packet data in bulk
#define SEGMENT_SIZE 64

Expand Down Expand Up @@ -202,20 +204,20 @@ struct filter_settings {
struct {
union { struct {
union {
struct { int interval, retry; };
struct { int i, r; };
int array[2];
struct { __u16 interval, retry; };
struct { __u16 i, r; };
__u16 array[2];
};
} handshake, h; };
union { struct {
union {
struct { int time, interval, retry, stale; };
struct { int t, i, r, s; };
int array[4];
struct { __u16 time, interval, retry, stale; };
struct { __u16 t, i, r, s; };
__u16 array[4];
};
} keepalive, k; };
};
int array[6];
__u16 array[6];
};
};
// clang-format on
Expand Down Expand Up @@ -249,26 +251,30 @@ struct conn_tuple {

struct connection {
struct bpf_spin_lock lock;
__u32 seq, ack_seq;
__u32 cwnd;

enum conn_state {
CONN_IDLE,
CONN_SYN_SENT,
CONN_SYN_RECV,
CONN_ESTABLISHED,
} state;
__u32 seq, ack_seq;
__u64 pktbuf;
__u32 cwnd;
__u16 peer_mss;
bool keepalive_sent;
} state : 2;
bool keepalive_sent : 1;
__u8 padding_len : 5;
__u8 cooldown_mul;
__u16 peer_mss;
struct filter_settings settings;

__u64 retry_tstamp, reset_tstamp, stale_tstamp;
__u64 pktbuf;
};

static __always_inline struct connection conn_init(struct filter_settings* settings, __u64 tstamp) {
struct connection conn = {.cwnd = INIT_CWND};
__builtin_memcpy(&conn.settings, settings, sizeof(*settings));
conn.retry_tstamp = conn.reset_tstamp = conn.stale_tstamp = tstamp;
conn.padding_len = 4;
return conn;
}

Expand Down

0 comments on commit 891f0fa

Please sign in to comment.