Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/v1/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions api/v1/tetragon/codegen/eventchecker/eventchecker.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

605 changes: 308 additions & 297 deletions api/v1/tetragon/tetragon.pb.go

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions api/v1/tetragon/tetragon.proto
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,7 @@ message KprobeArgument {
KprobeBpfProg bpf_prog_arg = 31;
}
string label = 18;
int32 resolve_err_depth = 32;
}

enum KprobeAction {
Expand Down
1 change: 1 addition & 0 deletions bpf/lib/generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ struct msg_generic_kprobe {
__u32 tid; // Thread ID that triggered the event
__u64 kernel_stack_id; // Kernel stack trace ID on u32 and potential error, see flag in msg_common.flags
__u64 user_stack_id; // User Stack trace ID
__s32 resolve_err_depth[MAX_POSSIBLE_ARGS];
/* anything above is shared with the userspace so it should match structs MsgGenericKprobe and MsgGenericTracepoint in Go */
char args[24000];
unsigned long a0, a1, a2, a3, a4;
Expand Down
35 changes: 26 additions & 9 deletions bpf/process/generic_calls.h
Original file line number Diff line number Diff line change
Expand Up @@ -396,28 +396,38 @@ extract_arg_depth(u32 i, struct extract_arg_data *data)
{
if (i >= MAX_BTF_ARG_DEPTH || !data->btf_config[i].is_initialized)
return 1;

*data->arg = *data->arg + data->btf_config[i].offset;
if (data->btf_config[i].is_pointer)
probe_read((void *)data->arg, sizeof(char *), (void *)*data->arg);

if (data->btf_config[i].is_pointer) {
if (probe_read((void *)data->arg, sizeof(char *), (void *)*data->arg) < 0) {
*data->resolve_err_depth = i + 1;
return 1;
}
}

return 0;
}

#ifdef __LARGE_BPF_PROG
FUNC_INLINE void extract_arg(struct event_config *config, int index, unsigned long *a)
FUNC_INLINE void extract_arg(struct event_config *config, int index, unsigned long *a,
__s32 *resolve_err_depth)
{
struct config_btf_arg *btf_config;

if (index >= EVENT_CONFIG_MAX_ARG)
return;

asm volatile("%[index] &= %1 ;\n"
: [index] "+r"(index)
: "i"(MAX_SELECTORS_MASK));

if (index >= EVENT_CONFIG_MAX_ARG)
return;

btf_config = config->btf_arg[index];
if (btf_config->is_initialized) {
struct extract_arg_data extract_data = {
.btf_config = btf_config,
.arg = a,
.resolve_err_depth = resolve_err_depth,
};
#ifndef __V61_BPF_PROG
#pragma unroll
Expand All @@ -431,7 +441,10 @@ FUNC_INLINE void extract_arg(struct event_config *config, int index, unsigned lo
}
}
#else
FUNC_INLINE void extract_arg(struct event_config *config, int index, unsigned long *a) {}
FUNC_INLINE void extract_arg(struct event_config *config, int index, unsigned long *a,
__s32 *resolve_err_depth)
{
}
#endif /* __LARGE_BPF_PROG */

FUNC_INLINE int arg_idx(int index)
Expand Down Expand Up @@ -565,9 +578,13 @@ FUNC_INLINE long generic_read_arg(void *ctx, int index, long off, struct bpf_map
ty = config->arg[index];
am = config->arm[index];

#ifdef __LARGE_BPF_PROG
e->resolve_err_depth[index] = 0;
#endif

#if defined(GENERIC_TRACEPOINT) || defined(GENERIC_USDT)
a = (&e->a0)[index];
extract_arg(config, index, &a);
extract_arg(config, index, &a, &e->resolve_err_depth[index]);
#else
arg_index = config->idx[index];
asm volatile("%[arg_index] &= %1 ;\n"
Expand All @@ -588,7 +605,7 @@ FUNC_INLINE long generic_read_arg(void *ctx, int index, long off, struct bpf_map
else
a = (&e->a0)[arg_index];

extract_arg(config, index, &a);
extract_arg(config, index, &a, &e->resolve_err_depth[index]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so if we fail to resolve we still continue and store the 'unresolved data' which likely null, right? I think we can skip it

but how about we make this more generic and we start each argument data with '__u32 error' status and in case of failure we write just error != 0 as argument value.. in case of resolve failure we encode the depth into it

then on user space side the getArg would just read first uint32 from event reader to get the argument status
so all the extra retrieval of error depth from each probe type would not be needed, also probably the argument index will be clear, because you have the error for argument you are about to read, or skip reading if it's != 0

we could use this to store other errors that happen during argument encoding, which there are plenty

and into final event instead of the depth value, I'd add error string that in case of resolve failure would contain something like: "failed to resolve current->file->f_inode"

seems like this could save some cycles and prevent bogus arguments values, because IIUC if we bail from argument storing in the middle on kernel side the user side still tries to read the whole part

Copy link
Contributor Author

@andrewstrohman andrewstrohman Nov 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so if we fail to resolve we still continue and store the 'unresolved data' which likely null, right? I think we can skip it

I think "skip it" means skipping calling read_arg for the arg. This way, e->args isn't filled with the bogus value.

but how about we make this more generic and we start each argument data with '__u32 error' status and in case of failure we write just error != 0 as argument value.. in case of resolve failure we encode the depth into it

OK, so each argument value in e->args is proceeded by a u32 that holds the depth of failure, or 0 if no failure occurred. If the u32 is non-zero, no bogus arg value follows -- the next bytes in e->args are about the subsequent arg.

we could use this to store other errors that happen during argument encoding, which there are plenty

I think these other errors mean passing NULL pointers to read_arg() (which are problems that pre-dated resolve functionality). Is that what you're thinking here or something else?

and into final event instead of the depth value, I'd add error string that in case of resolve failure would contain something like: "failed to resolve current->file->f_inode"

Yes, I agree adding a string that shows where we failed to dererefernce, is better than just exposing the depth of the error. So, from an implementation perspective, this means instead of adding member int32 resolve_err_depth to KprobeArgument, I would add a string member/attribute to KprobeArgument that describes the error that happened when reading the arg value. If the string is empty, it means there was no error reading the arg. See below for another possible way to interpret this. Regardless, in userspace, I would use the resolve failure depth send from the bpf program to determine where the NULL pointer was and only expose that to the user.

seems like this could save some cycles and prevent bogus arguments values, because IIUC if we bail from argument storing in the middle on kernel side the user side still tries to read the whole part

In your suggested approach, I like that we don't send a bogus value from bpf to userspace. I like that the argument read outcome sits directly in front of the argument value in e->args, so that they are coupled and are less like to get mixed up.

What should getArg() return when it sees a non-zero u32 proceeding the value it wants to read? Should I create something like:

type MsgGenericKprobeArgFailed struct {
	Index         uint64
	Value      string
	Label         string
}

In this approach, there wouldn't be an error string associated with every KprobeArgument. The error string would only be associated with KprobeArgument_ArgFailed. I'm not sure if there would be a problem if the user expected a receive an KprobeArgument_UintArg, for example, in a specific position within the args list, but instead found type KprobeArgument_ArgFailed? If you don't see an issue with this way of communicating the failure, I think it's the cleanest method, because we don't need to fabricate a bogus arg value in userspace as a placeholder.

While I like not transmitting a bogus argument to userspace within e->args, and I like not mixing up the arg read outcomes, I am worried that this approach will require changes in bpf programs which will result in their instruction count exceeded 4k (on kernels that don't support __LARGE_BPF_PROG).

While experimenting with what I currently have, I found that in many code paths, I didn't have any instructions to spare for 4.19. I couldn't even check for an int32 being 0 without crossing the 4k boundary. In the current approach, I was able to avoid adding any new instructions in the mostly full programs by completely isolating the change from 4.19 with __LARGE_BPF_PROG. But with this proposal, I think I'll need to make 4.19 aware of the uint32 value that proceeds each arg, so I worry that I won't be able to make it fit. I'll keep a close eye on this and let you know if I hit this 4k instruction count problem and cannot get around it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these other errors mean passing NULL pointers to read_arg() (which are problems that pre-dated resolve functionality). Is that what you're thinking here or something else?

I was thinking about it more like general approach to for read_arg plus for resolve status, but your point about 4.19 seems valid.. I guess we could start with just resolve error if the rest wont fit in.. perhaps there's a way to split read_arg into 2 tailcalls .. on the other hand it's just 4.19 issue, and I wonder how long this will be around

or we make it just for __LARGE_BPF_PROG which might mean bit uglier code for user space, but I guess we have to try first to see ;-)

I'm not sure if there would be a problem if the user expected a receive an KprobeArgument_UintArg, for example, in a specific position within the args list, but instead found type KprobeArgument_ArgFailed?

the KprobeArgument_ArgFailed makes sense to me, I think it's ok to find unexpected KprobeArgument_UintArg, because we were not able to read expected type anyway.. @kkourt any idea about that?


if (should_offload_path(ty))
return generic_path_offload(ctx, ty, a, index, off, tailcals);
Expand Down
5 changes: 5 additions & 0 deletions bpf/process/types/basic.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ struct config_reg_arg {
struct extract_arg_data {
struct config_btf_arg *btf_config;
unsigned long *arg;
__s32 *resolve_err_depth;
};

#define MAX_BTF_ARG_DEPTH 10
Expand Down Expand Up @@ -2033,6 +2034,10 @@ selector_arg_offset(__u8 *f, struct msg_generic_kprobe *e, __u32 selidx,
if (index > 5)
return 0;

#ifdef __LARGE_BPF_PROG
if (e->resolve_err_depth[index])
return 0;
#endif
args = get_arg(e, index);
switch (filter->type) {
case fd_ty:
Expand Down
2 changes: 2 additions & 0 deletions contrib/tester-progs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,5 @@ regs-override
uretprobe
uprobe-resolve
uprobe-resolve.btf
uprobe-null
uprobe-null.btf
10 changes: 9 additions & 1 deletion contrib/tester-progs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ PROGS = sigkill-tester \
usdt-override \
usdt-resolve \
uretprobe \
uprobe-resolve
uprobe-resolve \
uprobe-null

PROGS += $(PROGS_ARCH)

Expand Down Expand Up @@ -120,6 +121,13 @@ uprobe-resolve: uprobe-resolve.c
llvm-objcopy -I binary -O elf64-x86-64 --rename-section .data=.BTF [email protected] [email protected]
rm [email protected]

uprobe-null: uprobe-null.c
$(GCC) -ggdb3 -O2 -Wall $< -o $@
pahole -J $@
llvm-objcopy --dump-section [email protected] $@
llvm-objcopy -I binary -O elf64-x86-64 --rename-section .data=.BTF [email protected] [email protected]
rm [email protected]

ifeq ($(shell uname -m),x86_64)
regs-override: regs-override.c
$(GCC) -Wall -fcf-protection=none $< -o $@
Expand Down
62 changes: 62 additions & 0 deletions contrib/tester-progs/uprobe-null.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//go:build ignore

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <stdbool.h>

struct mysubstruct {
int32_t val;
};

struct mystruct {
struct mysubstruct *subp;
};

void usage(char *argv0)
{
fprintf(stderr, "Usage: %s <type>\n", argv0);
fprintf(stderr, "type can be one of: first, middle, nonull\n");
}

// without noinline, the symbol is found, but no event fires
__attribute__((noinline)) int func(struct mystruct *ms) {
if (!ms)
return -1;

if (!ms->subp)
return -1;

printf("%d\n", ms->subp->val);
return 0;
}

int main(int argc, char *argv[])
{

if (argc < 2) {
usage(argv[0]);
exit(1);
}

char *type = argv[1];

if (!strcmp(type, "first")) {
func(NULL);
} else if (!strcmp(type, "middle")) {
struct mystruct ms;
ms.subp = NULL;
func(&ms);
} else if (!strcmp(type, "nonull")) {
struct mystruct ms;
struct mysubstruct mss;
ms.subp = &mss;
mss.val = 0;
func(&ms);
} else {
usage(argv[0]);
exit(1);
}
exit(0);
}
Loading
Loading