Skip to content

Commit 319fc77

Browse files
committed
Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull BPF fixes from Daniel Borkmann: - Fix a soft-lockup in BPF arena_map_free on 64k page size kernels (Alan Maguire) - Fix a missing allocation failure check in BPF verifier's acquire_lock_state (Kumar Kartikeya Dwivedi) - Fix a NULL-pointer dereference in trace_kfree_skb by adding kfree_skb to the raw_tp_null_args set (Kuniyuki Iwashima) - Fix a deadlock when freeing BPF cgroup storage (Abel Wu) - Fix a syzbot-reported deadlock when holding BPF map's freeze_mutex (Andrii Nakryiko) - Fix a use-after-free issue in bpf_test_init when eth_skb_pkt_type is accessing skb data not containing an Ethernet header (Shigeru Yoshida) - Fix skipping non-existing keys in generic_map_lookup_batch (Yan Zhai) - Several BPF sockmap fixes to address incorrect TCP copied_seq calculations, which prevented correct data reads from recv(2) in user space (Jiayuan Chen) - Two fixes for BPF map lookup nullness elision (Daniel Xu) - Fix a NULL-pointer dereference from vmlinux BTF lookup in bpf_sk_storage_tracing_allowed (Jared Kangas) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: selftests: bpf: test batch lookup on array of maps with holes bpf: skip non exist keys in generic_map_lookup_batch bpf: Handle allocation failure in acquire_lock_state bpf: verifier: Disambiguate get_constant_map_key() errors bpf: selftests: Test constant key extraction on irrelevant maps bpf: verifier: Do not extract constant map keys for irrelevant maps bpf: Fix softlockup in arena_map_free on 64k page kernel net: Add rx_skb of kfree_skb to raw_tp_null_args[]. bpf: Fix deadlock when freeing cgroup storage selftests/bpf: Add strparser test for bpf selftests/bpf: Fix invalid flag of recv() bpf: Disable non stream socket for strparser bpf: Fix wrong copied_seq calculation strparser: Add read_sock callback bpf: avoid holding freeze_mutex during mmap operation bpf: unify VM_WRITE vs VM_MAYWRITE use in BPF map mmaping logic selftests/bpf: Adjust data size to have ETH_HLEN bpf, test_run: Fix use-after-free issue in eth_skb_pkt_type() bpf: Remove unnecessary BTF lookups in bpf_sk_storage_tracing_allowed
2 parents 27eddbf + dbf7cc5 commit 319fc77

24 files changed

+726
-140
lines changed

Documentation/networking/strparser.rst

+8-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ Functions
112112
Callbacks
113113
=========
114114

115-
There are six callbacks:
115+
There are seven callbacks:
116116

117117
::
118118

@@ -182,6 +182,13 @@ There are six callbacks:
182182
the length of the message. skb->len - offset may be greater
183183
then full_len since strparser does not trim the skb.
184184

185+
::
186+
187+
int (*read_sock)(struct strparser *strp, read_descriptor_t *desc,
188+
sk_read_actor_t recv_actor);
189+
190+
The read_sock callback is used by strparser instead of
191+
sock->ops->read_sock, if provided.
185192
::
186193

187194
int (*read_sock_done)(struct strparser *strp, int err);

include/linux/skmsg.h

+2
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ struct sk_psock {
9191
struct sk_psock_progs progs;
9292
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
9393
struct strparser strp;
94+
u32 copied_seq;
95+
u32 ingress_bytes;
9496
#endif
9597
struct sk_buff_head ingress_skb;
9698
struct list_head ingress_msg;

include/net/strparser.h

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ struct strparser;
4343
struct strp_callbacks {
4444
int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
4545
void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
46+
int (*read_sock)(struct strparser *strp, read_descriptor_t *desc,
47+
sk_read_actor_t recv_actor);
4648
int (*read_sock_done)(struct strparser *strp, int err);
4749
void (*abort_parser)(struct strparser *strp, int err);
4850
void (*lock)(struct strparser *strp);

include/net/tcp.h

+8
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,9 @@ void tcp_get_info(struct sock *, struct tcp_info *);
743743
/* Read 'sendfile()'-style from a TCP socket */
744744
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
745745
sk_read_actor_t recv_actor);
746+
int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
747+
sk_read_actor_t recv_actor, bool noack,
748+
u32 *copied_seq);
746749
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
747750
struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off);
748751
void tcp_read_done(struct sock *sk, size_t len);
@@ -2613,6 +2616,11 @@ struct sk_psock;
26132616
#ifdef CONFIG_BPF_SYSCALL
26142617
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
26152618
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
2619+
#ifdef CONFIG_BPF_STREAM_PARSER
2620+
struct strparser;
2621+
int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc,
2622+
sk_read_actor_t recv_actor);
2623+
#endif /* CONFIG_BPF_STREAM_PARSER */
26162624
#endif /* CONFIG_BPF_SYSCALL */
26172625

26182626
#ifdef CONFIG_INET

kernel/bpf/arena.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
*/
4040

4141
/* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */
42-
#define GUARD_SZ (1ull << sizeof_field(struct bpf_insn, off) * 8)
42+
#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
4343
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
4444

4545
struct bpf_arena {

kernel/bpf/bpf_cgrp_storage.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
153153

154154
static void cgroup_storage_map_free(struct bpf_map *map)
155155
{
156-
bpf_local_storage_map_free(map, &cgroup_cache, NULL);
156+
bpf_local_storage_map_free(map, &cgroup_cache, &bpf_cgrp_storage_busy);
157157
}
158158

159159
/* *gfp_flags* is a hidden argument provided by the verifier */

kernel/bpf/btf.c

+2
Original file line numberDiff line numberDiff line change
@@ -6507,6 +6507,8 @@ static const struct bpf_raw_tp_null_args raw_tp_null_args[] = {
65076507
/* rxrpc */
65086508
{ "rxrpc_recvdata", 0x1 },
65096509
{ "rxrpc_resend", 0x10 },
6510+
/* skb */
6511+
{"kfree_skb", 0x1000},
65106512
/* sunrpc */
65116513
{ "xs_stream_read_data", 0x1 },
65126514
/* ... from xprt_cong_event event class */

kernel/bpf/ringbuf.c

-4
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,6 @@ static int ringbuf_map_mmap_kern(struct bpf_map *map, struct vm_area_struct *vma
268268
/* allow writable mapping for the consumer_pos only */
269269
if (vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != PAGE_SIZE)
270270
return -EPERM;
271-
} else {
272-
vm_flags_clear(vma, VM_MAYWRITE);
273271
}
274272
/* remap_vmalloc_range() checks size and offset constraints */
275273
return remap_vmalloc_range(vma, rb_map->rb,
@@ -289,8 +287,6 @@ static int ringbuf_map_mmap_user(struct bpf_map *map, struct vm_area_struct *vma
289287
* position, and the ring buffer data itself.
290288
*/
291289
return -EPERM;
292-
} else {
293-
vm_flags_clear(vma, VM_MAYWRITE);
294290
}
295291
/* remap_vmalloc_range() checks size and offset constraints */
296292
return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF);

kernel/bpf/syscall.c

+22-21
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ static const struct vm_operations_struct bpf_map_default_vmops = {
10351035
static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
10361036
{
10371037
struct bpf_map *map = filp->private_data;
1038-
int err;
1038+
int err = 0;
10391039

10401040
if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record))
10411041
return -ENOTSUPP;
@@ -1059,24 +1059,33 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
10591059
err = -EACCES;
10601060
goto out;
10611061
}
1062+
bpf_map_write_active_inc(map);
10621063
}
1064+
out:
1065+
mutex_unlock(&map->freeze_mutex);
1066+
if (err)
1067+
return err;
10631068

10641069
/* set default open/close callbacks */
10651070
vma->vm_ops = &bpf_map_default_vmops;
10661071
vma->vm_private_data = map;
10671072
vm_flags_clear(vma, VM_MAYEXEC);
1073+
/* If mapping is read-only, then disallow potentially re-mapping with
1074+
* PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing
1075+
* means that as far as BPF map's memory-mapped VMAs are concerned,
1076+
* VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set,
1077+
* both should be set, so we can forget about VM_MAYWRITE and always
1078+
* check just VM_WRITE
1079+
*/
10681080
if (!(vma->vm_flags & VM_WRITE))
1069-
/* disallow re-mapping with PROT_WRITE */
10701081
vm_flags_clear(vma, VM_MAYWRITE);
10711082

10721083
err = map->ops->map_mmap(map, vma);
1073-
if (err)
1074-
goto out;
1084+
if (err) {
1085+
if (vma->vm_flags & VM_WRITE)
1086+
bpf_map_write_active_dec(map);
1087+
}
10751088

1076-
if (vma->vm_flags & VM_MAYWRITE)
1077-
bpf_map_write_active_inc(map);
1078-
out:
1079-
mutex_unlock(&map->freeze_mutex);
10801089
return err;
10811090
}
10821091

@@ -1968,8 +1977,6 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
19681977
return err;
19691978
}
19701979

1971-
#define MAP_LOOKUP_RETRIES 3
1972-
19731980
int generic_map_lookup_batch(struct bpf_map *map,
19741981
const union bpf_attr *attr,
19751982
union bpf_attr __user *uattr)
@@ -1979,8 +1986,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
19791986
void __user *values = u64_to_user_ptr(attr->batch.values);
19801987
void __user *keys = u64_to_user_ptr(attr->batch.keys);
19811988
void *buf, *buf_prevkey, *prev_key, *key, *value;
1982-
int err, retry = MAP_LOOKUP_RETRIES;
19831989
u32 value_size, cp, max_count;
1990+
int err;
19841991

19851992
if (attr->batch.elem_flags & ~BPF_F_LOCK)
19861993
return -EINVAL;
@@ -2026,14 +2033,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
20262033
err = bpf_map_copy_value(map, key, value,
20272034
attr->batch.elem_flags);
20282035

2029-
if (err == -ENOENT) {
2030-
if (retry) {
2031-
retry--;
2032-
continue;
2033-
}
2034-
err = -EINTR;
2035-
break;
2036-
}
2036+
if (err == -ENOENT)
2037+
goto next_key;
20372038

20382039
if (err)
20392040
goto free_buf;
@@ -2048,12 +2049,12 @@ int generic_map_lookup_batch(struct bpf_map *map,
20482049
goto free_buf;
20492050
}
20502051

2052+
cp++;
2053+
next_key:
20512054
if (!prev_key)
20522055
prev_key = buf_prevkey;
20532056

20542057
swap(prev_key, key);
2055-
retry = MAP_LOOKUP_RETRIES;
2056-
cp++;
20572058
cond_resched();
20582059
}
20592060

kernel/bpf/verifier.c

+23-8
Original file line numberDiff line numberDiff line change
@@ -1501,6 +1501,8 @@ static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum r
15011501
struct bpf_reference_state *s;
15021502

15031503
s = acquire_reference_state(env, insn_idx);
1504+
if (!s)
1505+
return -ENOMEM;
15041506
s->type = type;
15051507
s->id = id;
15061508
s->ptr = ptr;
@@ -9149,10 +9151,11 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
91499151
return 0;
91509152
}
91519153

9152-
/* Returns constant key value if possible, else negative error */
9153-
static s64 get_constant_map_key(struct bpf_verifier_env *env,
9154+
/* Returns constant key value in `value` if possible, else negative error */
9155+
static int get_constant_map_key(struct bpf_verifier_env *env,
91549156
struct bpf_reg_state *key,
9155-
u32 key_size)
9157+
u32 key_size,
9158+
s64 *value)
91569159
{
91579160
struct bpf_func_state *state = func(env, key);
91589161
struct bpf_reg_state *reg;
@@ -9179,8 +9182,10 @@ static s64 get_constant_map_key(struct bpf_verifier_env *env,
91799182
/* First handle precisely tracked STACK_ZERO */
91809183
for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
91819184
zero_size++;
9182-
if (zero_size >= key_size)
9185+
if (zero_size >= key_size) {
9186+
*value = 0;
91839187
return 0;
9188+
}
91849189

91859190
/* Check that stack contains a scalar spill of expected size */
91869191
if (!is_spilled_scalar_reg(&state->stack[spi]))
@@ -9203,9 +9208,12 @@ static s64 get_constant_map_key(struct bpf_verifier_env *env,
92039208
if (err < 0)
92049209
return err;
92059210

9206-
return reg->var_off.value;
9211+
*value = reg->var_off.value;
9212+
return 0;
92079213
}
92089214

9215+
static bool can_elide_value_nullness(enum bpf_map_type type);
9216+
92099217
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
92109218
struct bpf_call_arg_meta *meta,
92119219
const struct bpf_func_proto *fn,
@@ -9354,9 +9362,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
93549362
err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
93559363
if (err)
93569364
return err;
9357-
meta->const_map_key = get_constant_map_key(env, reg, key_size);
9358-
if (meta->const_map_key < 0 && meta->const_map_key != -EOPNOTSUPP)
9359-
return meta->const_map_key;
9365+
if (can_elide_value_nullness(meta->map_ptr->map_type)) {
9366+
err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
9367+
if (err < 0) {
9368+
meta->const_map_key = -1;
9369+
if (err == -EOPNOTSUPP)
9370+
err = 0;
9371+
else
9372+
return err;
9373+
}
9374+
}
93609375
break;
93619376
case ARG_PTR_TO_MAP_VALUE:
93629377
if (type_may_be_null(arg_type) && register_is_null(reg))

net/bpf/test_run.c

+1-4
Original file line numberDiff line numberDiff line change
@@ -660,12 +660,9 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
660660
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
661661
void *data;
662662

663-
if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
663+
if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom)
664664
return ERR_PTR(-EINVAL);
665665

666-
if (user_size > size)
667-
return ERR_PTR(-EMSGSIZE);
668-
669666
size = SKB_DATA_ALIGN(size);
670667
data = kzalloc(size + headroom + tailroom, GFP_USER);
671668
if (!data)

net/core/bpf_sk_storage.c

+1-12
Original file line numberDiff line numberDiff line change
@@ -355,11 +355,6 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
355355

356356
static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
357357
{
358-
const struct btf *btf_vmlinux;
359-
const struct btf_type *t;
360-
const char *tname;
361-
u32 btf_id;
362-
363358
if (prog->aux->dst_prog)
364359
return false;
365360

@@ -374,13 +369,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
374369
return true;
375370
case BPF_TRACE_FENTRY:
376371
case BPF_TRACE_FEXIT:
377-
btf_vmlinux = bpf_get_btf_vmlinux();
378-
if (IS_ERR_OR_NULL(btf_vmlinux))
379-
return false;
380-
btf_id = prog->aux->attach_btf_id;
381-
t = btf_type_by_id(btf_vmlinux, btf_id);
382-
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
383-
return !!strncmp(tname, "bpf_sk_storage",
372+
return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage",
384373
strlen("bpf_sk_storage"));
385374
default:
386375
return false;

net/core/skmsg.c

+7
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,9 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
549549
return num_sge;
550550
}
551551

552+
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
553+
psock->ingress_bytes += len;
554+
#endif
552555
copied = len;
553556
msg->sg.start = 0;
554557
msg->sg.size = copied;
@@ -1144,6 +1147,10 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
11441147
if (!ret)
11451148
sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
11461149

1150+
if (sk_is_tcp(sk)) {
1151+
psock->strp.cb.read_sock = tcp_bpf_strp_read_sock;
1152+
psock->copied_seq = tcp_sk(sk)->copied_seq;
1153+
}
11471154
return ret;
11481155
}
11491156

net/core/sock_map.c

+4-1
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,10 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
303303

304304
write_lock_bh(&sk->sk_callback_lock);
305305
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
306-
ret = sk_psock_init_strp(sk, psock);
306+
if (sk_is_tcp(sk))
307+
ret = sk_psock_init_strp(sk, psock);
308+
else
309+
ret = -EOPNOTSUPP;
307310
if (ret) {
308311
write_unlock_bh(&sk->sk_callback_lock);
309312
sk_psock_put(sk, psock);

0 commit comments

Comments
 (0)