Skip to content

Commit 5031f90

Browse files
fix stacked vmlinux struct parsing issue
1 parent 95a6240 commit 5031f90

File tree

4 files changed

+137
-45
lines changed

4 files changed

+137
-45
lines changed

pythonbpf/vmlinux_parser/class_handler.py

Lines changed: 67 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,37 @@ def get_module_symbols(module_name: str):
1616
return [name for name in dir(imported_module)], imported_module
1717

1818

19+
def unwrap_pointer_type(type_obj: Any) -> Any:
20+
"""
21+
Recursively unwrap all pointer layers to get the base type.
22+
23+
This handles multiply nested pointers like LP_LP_struct_attribute_group
24+
and returns the base type (struct_attribute_group).
25+
26+
Stops unwrapping when reaching a non-pointer type (one without _type_ attribute).
27+
28+
Args:
29+
type_obj: The type object to unwrap
30+
31+
Returns:
32+
The base type after unwrapping all pointer layers
33+
"""
34+
current_type = type_obj
35+
# Keep unwrapping while it's a pointer/array type (has _type_)
36+
# But stop if _type_ is just a string or basic type marker
37+
while hasattr(current_type, "_type_"):
38+
next_type = current_type._type_
39+
# Stop if _type_ is a string (like 'c' for c_char)
40+
if isinstance(next_type, str):
41+
break
42+
current_type = next_type
43+
return current_type
44+
45+
1946
def process_vmlinux_class(
20-
node,
21-
llvm_module,
22-
handler: DependencyHandler,
47+
node,
48+
llvm_module,
49+
handler: DependencyHandler,
2350
):
2451
symbols_in_module, imported_module = get_module_symbols("vmlinux")
2552
if node.name in symbols_in_module:
@@ -30,10 +57,10 @@ def process_vmlinux_class(
3057

3158

3259
def process_vmlinux_post_ast(
33-
elem_type_class,
34-
llvm_handler,
35-
handler: DependencyHandler,
36-
processing_stack=None,
60+
elem_type_class,
61+
llvm_handler,
62+
handler: DependencyHandler,
63+
processing_stack=None,
3764
):
3865
# Initialize processing stack on first call
3966
if processing_stack is None:
@@ -113,7 +140,7 @@ def process_vmlinux_post_ast(
113140

114141
# Process pointer to ctype
115142
if isinstance(elem_type, type) and issubclass(
116-
elem_type, ctypes._Pointer
143+
elem_type, ctypes._Pointer
117144
):
118145
# Get the pointed-to type
119146
pointed_type = elem_type._type_
@@ -126,7 +153,7 @@ def process_vmlinux_post_ast(
126153

127154
# Process function pointers (CFUNCTYPE)
128155
elif hasattr(elem_type, "_restype_") and hasattr(
129-
elem_type, "_argtypes_"
156+
elem_type, "_argtypes_"
130157
):
131158
# This is a CFUNCTYPE or similar
132159
logger.info(
@@ -158,13 +185,19 @@ def process_vmlinux_post_ast(
158185
if hasattr(elem_type, "_length_") and is_complex_type:
159186
type_length = elem_type._length_
160187

161-
if containing_type.__module__ == "vmlinux":
162-
new_dep_node.add_dependent(
163-
elem_type._type_.__name__
164-
if hasattr(elem_type._type_, "__name__")
165-
else str(elem_type._type_)
188+
# Unwrap all pointer layers to get the base type for dependency tracking
189+
base_type = unwrap_pointer_type(elem_type)
190+
base_type_module = getattr(base_type, "__module__", None)
191+
192+
if base_type_module == "vmlinux":
193+
base_type_name = (
194+
base_type.__name__
195+
if hasattr(base_type, "__name__")
196+
else str(base_type)
166197
)
167-
elif containing_type.__module__ == ctypes.__name__:
198+
new_dep_node.add_dependent(base_type_name)
199+
elif base_type_module == ctypes.__name__ or base_type_module is None:
200+
# Handle ctypes or types with no module (like some internal ctypes types)
168201
if isinstance(elem_type, type):
169202
if issubclass(elem_type, ctypes.Array):
170203
ctype_complex_type = ctypes.Array
@@ -178,7 +211,7 @@ def process_vmlinux_post_ast(
178211
raise TypeError("Unsupported ctypes subclass")
179212
else:
180213
raise ImportError(
181-
f"Unsupported module of {containing_type}"
214+
f"Unsupported module of {base_type}: {base_type_module}"
182215
)
183216
logger.debug(
184217
f"{containing_type} containing type of parent {elem_name} with {elem_type} and ctype {ctype_complex_type} and length {type_length}"
@@ -191,11 +224,16 @@ def process_vmlinux_post_ast(
191224
elem_name, ctype_complex_type
192225
)
193226
new_dep_node.set_field_type(elem_name, elem_type)
194-
if containing_type.__module__ == "vmlinux":
227+
228+
# Check the containing_type module to decide whether to recurse
229+
containing_type_module = getattr(containing_type, "__module__", None)
230+
if containing_type_module == "vmlinux":
231+
# Also unwrap containing_type to get base type name
232+
base_containing_type = unwrap_pointer_type(containing_type)
195233
containing_type_name = (
196-
containing_type.__name__
197-
if hasattr(containing_type, "__name__")
198-
else str(containing_type)
234+
base_containing_type.__name__
235+
if hasattr(base_containing_type, "__name__")
236+
else str(base_containing_type)
199237
)
200238

201239
# Check for self-reference or already processed
@@ -212,21 +250,21 @@ def process_vmlinux_post_ast(
212250
)
213251
new_dep_node.set_field_ready(elem_name, True)
214252
else:
215-
# Process recursively - THIS WAS MISSING
253+
# Process recursively - use base containing type, not the pointer wrapper
216254
new_dep_node.add_dependent(containing_type_name)
217255
process_vmlinux_post_ast(
218-
containing_type,
256+
base_containing_type,
219257
llvm_handler,
220258
handler,
221259
processing_stack,
222260
)
223261
new_dep_node.set_field_ready(elem_name, True)
224-
elif containing_type.__module__ == ctypes.__name__:
262+
elif containing_type_module == ctypes.__name__ or containing_type_module is None:
225263
logger.debug(f"Processing ctype internal{containing_type}")
226264
new_dep_node.set_field_ready(elem_name, True)
227265
else:
228266
raise TypeError(
229-
"Module not supported in recursive resolution"
267+
f"Module not supported in recursive resolution: {containing_type_module}"
230268
)
231269
else:
232270
new_dep_node.add_dependent(
@@ -245,9 +283,12 @@ def process_vmlinux_post_ast(
245283
raise ValueError(
246284
f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"
247285
)
248-
286+
elif module_name == ctypes.__name__ or module_name is None:
287+
# Handle ctypes types - these don't need processing, just return
288+
logger.debug(f"Skipping ctypes type {current_symbol_name}")
289+
return True
249290
else:
250-
raise ImportError("UNSUPPORTED Module")
291+
raise ImportError(f"UNSUPPORTED Module {module_name}")
251292

252293
logger.info(
253294
f"{current_symbol_name} processed and handler readiness {handler.is_ready}"

pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,14 @@ def debug_info_generation(
4646

4747
if struct.name.startswith("struct_"):
4848
struct_name = struct.name.removeprefix("struct_")
49+
# Create struct type with all members
50+
struct_type = generator.create_struct_type_with_name(
51+
struct_name, members, struct.__sizeof__() * 8, is_distinct=True
52+
)
4953
else:
50-
raise ValueError("Unions are not supported in the current version")
51-
# Create struct type with all members
52-
struct_type = generator.create_struct_type_with_name(
53-
struct_name, members, struct.__sizeof__() * 8, is_distinct=True
54-
)
55-
54+
logger.warning("Blindly handling Unions present in vmlinux dependencies")
55+
struct_type = None
56+
# raise ValueError("Unions are not supported in the current version")
5657
return struct_type
5758

5859

@@ -62,7 +63,7 @@ def _get_field_debug_type(
6263
generator: DebugInfoGenerator,
6364
parent_struct: DependencyNode,
6465
generated_debug_info: List[Tuple[DependencyNode, Any]],
65-
) -> tuple[Any, int]:
66+
) -> tuple[Any, int] | None:
6667
"""
6768
Determine the appropriate debug type for a field based on its Python/ctypes type.
6869
@@ -78,7 +79,11 @@ def _get_field_debug_type(
7879
"""
7980
# Handle complex types (arrays, pointers)
8081
if field.ctype_complex_type is not None:
81-
if issubclass(field.ctype_complex_type, ctypes.Array):
82+
#TODO: Check if this is a CFUNCTYPE (function pointer), but sadly it just checks callable for now
83+
if callable(field.ctype_complex_type):
84+
# Handle function pointer types, create a void pointer as a placeholder
85+
return generator.create_pointer_type(None), 64
86+
elif issubclass(field.ctype_complex_type, ctypes.Array):
8287
# Handle array types
8388
element_type, base_type_size = _get_basic_debug_type(
8489
field.containing_type, generator

pythonbpf/vmlinux_parser/ir_gen/ir_generation.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111

1212

1313
class IRGenerator:
14+
# This field keeps track of the non_struct names to avoid duplicate name errors.
15+
type_number = 0
16+
unprocessed_store = []
1417
# get the assignments dict and add this stuff to it.
1518
def __init__(self, llvm_module, handler: DependencyHandler, assignments):
1619
self.llvm_module = llvm_module
@@ -68,6 +71,7 @@ def struct_processor(self, struct, processing_stack=None):
6871
dep_node_from_dependency, processing_stack
6972
)
7073
else:
74+
print(struct)
7175
raise RuntimeError(
7276
f"Warning: Dependency {dependency} not found in handler"
7377
)
@@ -129,15 +133,28 @@ def gen_ir(self, struct, generated_debug_info):
129133

130134
for field_name, field in struct.fields.items():
131135
# does not take arrays and similar types into consideration yet.
132-
if field.ctype_complex_type is not None and issubclass(
136+
if callable(field.ctype_complex_type):
137+
# Function pointer case - generate a simple field accessor
138+
field_co_re_name, returned = self._struct_name_generator(
139+
struct, field, field_index
140+
)
141+
print(field_co_re_name)
142+
field_index += 1
143+
globvar = ir.GlobalVariable(
144+
self.llvm_module, ir.IntType(64), name=field_co_re_name
145+
)
146+
globvar.linkage = "external"
147+
globvar.set_metadata("llvm.preserve.access.index", debug_info)
148+
self.generated_field_names[struct.name][field_name] = globvar
149+
elif field.ctype_complex_type is not None and issubclass(
133150
field.ctype_complex_type, ctypes.Array
134151
):
135152
array_size = field.type_size
136153
containing_type = field.containing_type
137154
if containing_type.__module__ == ctypes.__name__:
138155
containing_type_size = ctypes.sizeof(containing_type)
139156
if array_size == 0:
140-
field_co_re_name = self._struct_name_generator(
157+
field_co_re_name, returned = self._struct_name_generator(
141158
struct, field, field_index, True, 0, containing_type_size
142159
)
143160
globvar = ir.GlobalVariable(
@@ -149,7 +166,7 @@ def gen_ir(self, struct, generated_debug_info):
149166
field_index += 1
150167
continue
151168
for i in range(0, array_size):
152-
field_co_re_name = self._struct_name_generator(
169+
field_co_re_name, returned = self._struct_name_generator(
153170
struct, field, field_index, True, i, containing_type_size
154171
)
155172
globvar = ir.GlobalVariable(
@@ -163,11 +180,12 @@ def gen_ir(self, struct, generated_debug_info):
163180
array_size = field.type_size
164181
containing_type = field.containing_type
165182
if containing_type.__module__ == "vmlinux":
183+
print(struct)
166184
containing_type_size = self.handler[
167185
containing_type.__name__
168186
].current_offset
169187
for i in range(0, array_size):
170-
field_co_re_name = self._struct_name_generator(
188+
field_co_re_name, returned = self._struct_name_generator(
171189
struct, field, field_index, True, i, containing_type_size
172190
)
173191
globvar = ir.GlobalVariable(
@@ -178,7 +196,7 @@ def gen_ir(self, struct, generated_debug_info):
178196
self.generated_field_names[struct.name][field_name] = globvar
179197
field_index += 1
180198
else:
181-
field_co_re_name = self._struct_name_generator(
199+
field_co_re_name, returned = self._struct_name_generator(
182200
struct, field, field_index
183201
)
184202
field_index += 1
@@ -198,7 +216,7 @@ def _struct_name_generator(
198216
is_indexed: bool = False,
199217
index: int = 0,
200218
containing_type_size: int = 0,
201-
) -> str:
219+
) -> tuple[str, bool]:
202220
# TODO: Does not support Unions as well as recursive pointer and array type naming
203221
if is_indexed:
204222
name = (
@@ -208,7 +226,7 @@ def _struct_name_generator(
208226
+ "$"
209227
+ f"0:{field_index}:{index}"
210228
)
211-
return name
229+
return name, True
212230
elif struct.name.startswith("struct_"):
213231
name = (
214232
"llvm."
@@ -217,9 +235,18 @@ def _struct_name_generator(
217235
+ "$"
218236
+ f"0:{field_index}"
219237
)
220-
return name
238+
return name, True
221239
else:
222-
print(self.handler[struct.name])
223-
raise TypeError(
224-
"Name generation cannot occur due to type name not starting with struct"
240+
logger.warning(
241+
"Blindly handling non-struct type to avoid type errors in vmlinux IR generation. Possibly a union."
225242
)
243+
self.type_number += 1
244+
unprocessed_type = "unprocessed_type_" + str(self.handler[struct.name].name)
245+
if self.unprocessed_store.__contains__(unprocessed_type):
246+
return unprocessed_type + "_" + str(self.type_number), False
247+
else:
248+
self.unprocessed_store.append(unprocessed_type)
249+
return unprocessed_type, False
250+
# raise TypeError(
251+
# "Name generation cannot occur due to type name not starting with struct"
252+
# )
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from vmlinux import struct_kobj_type
2+
from pythonbpf import bpf, section, bpfglobal, compile_to_ir
3+
import logging
4+
from ctypes import c_void_p
5+
6+
7+
@bpf
8+
@section("kprobe/blk_mq_start_request")
9+
def example(ctx: c_void_p):
10+
print(f"data lengt")
11+
12+
13+
@bpf
14+
@bpfglobal
15+
def LICENSE() -> str:
16+
return "GPL"
17+
18+
19+
compile_to_ir("requests.py", "requests.ll", loglevel=logging.INFO)

0 commit comments

Comments
 (0)