Skip to content

Commit d7bf3a4

Browse files
committed
Handle ARM syscalls correctly and update test output
Also added custom `.clang-format` and reformatted plugin.cpp
1 parent b4d5f06 commit d7bf3a4

File tree

3 files changed

+155
-141
lines changed

3 files changed

+155
-141
lines changed

Diff for: .clang-format

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
BasedOnStyle: Google
2+
IndentWidth: 4
3+
Language: Cpp
4+
ColumnLimit: 100

Diff for: plugin.cpp

+151-139
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,31 @@ extern "C" {
22
#include "qemu/qemu-plugin.h"
33
}
44

5+
#include <string.h>
6+
57
#include <algorithm>
68
#include <fstream>
79
#include <iostream>
810
#include <optional>
911
#include <vector>
1012

13+
// Syscall numbers taken from
14+
// https://chromium.googlesource.com/chromiumos/docs/+/HEAD/constants/syscalls.md
15+
#define ARM32_MMAP2 192
16+
#define ARM32_OPENAT 322
17+
#define X86_64_MMAP 9
18+
#define X86_64_OPENAT 257
19+
20+
static int64_t MMAP = 0;
21+
static int64_t OPENAT = 0;
22+
1123
QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
1224

1325
using namespace std;
1426

1527
typedef struct addr_range {
16-
uint64_t start_addr;
17-
uint64_t end_addr;
28+
uint64_t start_addr;
29+
uint64_t end_addr;
1830
} addr_range;
1931

2032
typedef struct shared_obj {
@@ -46,9 +58,9 @@ static ofstream outfile;
4658
// Get the addresses of the first and last bytes of the last instruction in a
4759
// block
4860
static uint64_t tb_last_insn_vaddr(struct qemu_plugin_tb *tb) {
49-
uint64_t last_idx = qemu_plugin_tb_n_insns(tb) - 1;
50-
struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, last_idx);
51-
return qemu_plugin_insn_vaddr(insn);
61+
uint64_t last_idx = qemu_plugin_tb_n_insns(tb) - 1;
62+
struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, last_idx);
63+
return qemu_plugin_insn_vaddr(insn);
5264
}
5365

5466
static uint64_t elf_image_bias(uint64_t vaddr) {
@@ -59,7 +71,7 @@ static uint64_t elf_image_bias(uint64_t vaddr) {
5971
if (get_interp_load_bias() <= vaddr) {
6072
potential_load_biases.push_back(get_interp_load_bias());
6173
}
62-
for (auto& sec : sections) {
74+
for (auto &sec : sections) {
6375
if (sec.load_bias <= vaddr) {
6476
potential_load_biases.push_back(sec.load_bias);
6577
}
@@ -77,166 +89,166 @@ static size_t find_section(uint64_t bias) {
7789
return SIZE_MAX;
7890
}
7991

80-
static bool dynamically_linked() {
81-
return get_interp_load_bias() != 0;
82-
}
92+
static bool dynamically_linked() { return get_interp_load_bias() != 0; }
8393

8494
// Write the destination of an indirect jump/call to the output file
8595
static void mark_indirect_branch(uint64_t callsite, uint64_t dst) {
86-
uint64_t dst_image_bias = elf_image_bias(dst);
87-
uint64_t dst_image_offset;
88-
const char *so_name;
89-
if (dst_image_bias == get_load_bias()) {
90-
dst_image_offset = 0;
91-
so_name = "binary";
92-
} else if (dst_image_bias == get_interp_load_bias()) {
93-
dst_image_offset = 0;
94-
so_name = "interpreter";
95-
} else {
96-
size_t idx = find_section(dst_image_bias);
97-
mapped_section sec = sections[idx];
98-
dst_image_offset = sec.image_offset;
99-
so_name = sec.so_name;
100-
}
101-
102-
if (dynamically_linked()) {
103-
dst -= dst_image_bias - dst_image_offset;
104-
callsite -= get_load_bias();
105-
}
106-
107-
outfile << "0x" << hex << callsite << ",0x" << hex << dst << "," << so_name << endl;
96+
uint64_t dst_image_bias = elf_image_bias(dst);
97+
uint64_t dst_image_offset;
98+
const char *so_name;
99+
if (dst_image_bias == get_load_bias()) {
100+
dst_image_offset = 0;
101+
so_name = "binary";
102+
} else if (dst_image_bias == get_interp_load_bias()) {
103+
dst_image_offset = 0;
104+
so_name = "interpreter";
105+
} else {
106+
size_t idx = find_section(dst_image_bias);
107+
mapped_section sec = sections[idx];
108+
dst_image_offset = sec.image_offset;
109+
so_name = sec.so_name;
110+
}
111+
112+
if (dynamically_linked()) {
113+
dst -= dst_image_bias - dst_image_offset;
114+
callsite -= get_load_bias();
115+
}
116+
117+
outfile << "0x" << hex << callsite << ",0x" << hex << dst << "," << so_name << endl;
108118
}
109119

110120
// The default callback for when a block is executed
111121
static void block_exec_handler(unsigned int vcpu_idx, void *start) {
112-
uint64_t start_vaddr = (uint64_t)start;
113-
if (indirect_taken.has_value()) {
114-
mark_indirect_branch(indirect_taken.value(), start_vaddr);
115-
indirect_taken = {};
116-
}
122+
uint64_t start_vaddr = (uint64_t)start;
123+
if (indirect_taken.has_value()) {
124+
mark_indirect_branch(indirect_taken.value(), start_vaddr);
125+
indirect_taken = {};
126+
}
117127
}
118128

119129
// Callback for executing blocks ending in an indirect jump/call
120130
static void indirect_block_exec_handler(unsigned int vcpu_idx, void *tb_idx) {
121-
addr_range block_addr = indirect_blocks[(size_t)tb_idx];
131+
addr_range block_addr = indirect_blocks[(size_t)tb_idx];
122132

123-
// Check if the previous block ended in an indirect jump/call
124-
if (indirect_taken.has_value()) {
125-
mark_indirect_branch(indirect_taken.value(), block_addr.start_addr);
126-
}
133+
// Check if the previous block ended in an indirect jump/call
134+
if (indirect_taken.has_value()) {
135+
mark_indirect_branch(indirect_taken.value(), block_addr.start_addr);
136+
}
127137

128-
indirect_taken = block_addr.end_addr;
138+
indirect_taken = block_addr.end_addr;
129139
}
130140

131141
// Register a callback for each time a block is executed
132-
static void block_trans_handler(qemu_plugin_id_t id,
133-
struct qemu_plugin_tb *tb) {
134-
static uint64_t start_vaddr;
135-
start_vaddr = qemu_plugin_tb_vaddr(tb);
136-
uint64_t last_insn = tb_last_insn_vaddr(tb);
137-
uint64_t bias = 0;
138-
139-
// If an interpreter was loaded, add the binary bias to the input callsites
140-
if (dynamically_linked()) {
141-
bias = get_load_bias();
142-
}
143-
144-
for (uint64_t &addr : callsites) {
145-
if (last_insn == (addr + bias)) {
146-
indirect_blocks.push_back({
147-
.start_addr = start_vaddr,
148-
.end_addr = last_insn,
149-
});
150-
qemu_plugin_register_vcpu_tb_exec_cb(tb, indirect_block_exec_handler,
151-
QEMU_PLUGIN_CB_NO_REGS,
152-
(void *)indirect_tb_idx++);
153-
return;
154-
}
155-
}
156-
qemu_plugin_register_vcpu_tb_exec_cb(
157-
tb, block_exec_handler, QEMU_PLUGIN_CB_NO_REGS, (void *)start_vaddr);
158-
}
142+
static void block_trans_handler(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) {
143+
static uint64_t start_vaddr;
144+
start_vaddr = qemu_plugin_tb_vaddr(tb);
145+
uint64_t last_insn = tb_last_insn_vaddr(tb);
146+
uint64_t bias = 0;
147+
148+
// If an interpreter was loaded, add the binary bias to the input callsites
149+
if (dynamically_linked()) {
150+
bias = get_load_bias();
151+
}
159152

160-
static void syscall_handler(qemu_plugin_id_t id, unsigned int vcpu_index,
161-
int64_t num, uint64_t a1, uint64_t a2,
162-
uint64_t a3, uint64_t a4, uint64_t a5,
163-
uint64_t a6, uint64_t a7, uint64_t a8) {
164-
switch (num) {
165-
// TODO: What header(s) define syscall numbers? It'd be nice to replace these magic numbers
166-
// Map a shared object file name to a `mapped_section` when entering an mmap syscall
167-
case 9: {
168-
int fd = (int)a5;
169-
uint64_t load_bias = a1;
170-
uint64_t image_offset = a6;
171-
172-
auto matching_fd = [&](shared_obj so) { return so.fd == fd; };
173-
// file descriptors can be reused so search for the /last/ ocurrence of an opened file with a file descriptor matching the mmap call
174-
auto so = find_if(shared_objects.rbegin(), shared_objects.rend(), matching_fd);
175-
if (so != shared_objects.rend()) {
176-
mapped_section sec = {
177-
.load_bias = load_bias,
178-
.image_offset = image_offset,
179-
.so_name = so->filename,
180-
};
181-
sections.push_back(sec);
182-
}
183-
break;
153+
for (uint64_t &addr : callsites) {
154+
if (last_insn == (addr + bias)) {
155+
indirect_blocks.push_back({
156+
.start_addr = start_vaddr,
157+
.end_addr = last_insn,
158+
});
159+
qemu_plugin_register_vcpu_tb_exec_cb(tb, indirect_block_exec_handler,
160+
QEMU_PLUGIN_CB_NO_REGS, (void *)indirect_tb_idx++);
161+
return;
184162
}
185-
// TODO: Is the open syscall also used to open shared objects?
186-
// openat
187-
case 257: {
188-
// Store the file name passed to the openat syscall
189-
shared_obj lib = {
190-
.filename = (char *)a2,
191-
.fd = -1,
163+
}
164+
qemu_plugin_register_vcpu_tb_exec_cb(tb, block_exec_handler, QEMU_PLUGIN_CB_NO_REGS,
165+
(void *)start_vaddr);
166+
}
167+
168+
static void syscall_handler(qemu_plugin_id_t id, unsigned int vcpu_index, int64_t num, uint64_t a1,
169+
uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5, uint64_t a6,
170+
uint64_t a7, uint64_t a8) {
171+
if (num == MMAP) {
172+
// Map a shared object file name to a `mapped_section` when entering an
173+
// mmap syscall
174+
int fd = (int)a5;
175+
uint64_t load_bias = a1;
176+
uint64_t image_offset = a6;
177+
178+
auto matching_fd = [&](shared_obj so) { return so.fd == fd; };
179+
// file descriptors can be reused so search for the /last/ ocurrence of
180+
// an opened file with a file descriptor matching the mmap call
181+
auto so = find_if(shared_objects.rbegin(), shared_objects.rend(), matching_fd);
182+
if (so != shared_objects.rend()) {
183+
mapped_section sec = {
184+
.load_bias = load_bias,
185+
.image_offset = image_offset,
186+
.so_name = so->filename,
192187
};
193-
shared_objects.push_back(lib);
194-
break;
195-
}
196-
default: {
197-
break;
188+
sections.push_back(sec);
198189
}
190+
} else if (num == OPENAT) {
191+
// TODO: Is the open syscall also used to open shared objects?
192+
// Store the file name passed to the openat syscall
193+
shared_obj lib = {
194+
.filename = (char *)a2,
195+
.fd = -1,
196+
};
197+
shared_objects.push_back(lib);
199198
}
200199
}
201200

202-
static void syscall_ret_handler(qemu_plugin_id_t id, unsigned int vcpu_idx, int64_t num, int64_t ret) {
201+
static void syscall_ret_handler(qemu_plugin_id_t id, unsigned int vcpu_idx, int64_t num,
202+
int64_t ret) {
203203
// If the openat syscall returned a valid file descriptor
204-
if ((num == 257) && (ret != -1)) {
204+
if ((num == OPENAT) && (ret != -1)) {
205205
// Store the file descriptor returned by the syscall
206206
shared_objects.back().fd = ret;
207207
}
208208
}
209209

210-
extern int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
211-
int argc, char **argv) {
212-
if (argc < 2) {
213-
cout << "Usage: /path/to/qemu \\\n";
214-
cout << "\t-plugin "
215-
"/path/to/"
216-
"libibresolver.so,arg=\"callsites.txt\",arg=\"output.csv\" \\\n";
217-
cout << "\t$BINARY" << endl;
218-
return -1;
219-
}
220-
221-
fstream input(argv[0]);
222-
outfile = ofstream(argv[1]);
223-
if (input.fail()) {
224-
cout << "Could not open file " << argv[0] << endl;
225-
return -2;
226-
}
227-
uint64_t addr;
228-
while (input >> hex >> addr) {
229-
callsites.push_back(addr);
230-
}
231-
cout << "Loaded input file with " << callsites.size() << " indirect callsites"
232-
<< endl;
233-
outfile << "callsite,destination offset,destination image" << endl;
234-
// Register a callback for each time a block is translated
235-
qemu_plugin_register_vcpu_tb_trans_cb(id, block_trans_handler);
236-
237-
// Register callbacks for entering and returning from syscalls
238-
// This is used to determine the load biases and image offsets for dynamically linked shared objects
239-
qemu_plugin_register_vcpu_syscall_cb(id, syscall_handler);
240-
qemu_plugin_register_vcpu_syscall_ret_cb(id, syscall_ret_handler);
241-
return 0;
210+
extern int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, int argc,
211+
char **argv) {
212+
if (argc < 2) {
213+
cout << "Usage: /path/to/qemu \\\n";
214+
cout << "\t-plugin "
215+
"/path/to/"
216+
"libibresolver.so,arg=\"callsites.txt\",arg=\"output.csv\" \\\n";
217+
cout << "\t$BINARY" << endl;
218+
return -1;
219+
}
220+
221+
fstream input(argv[0]);
222+
outfile = ofstream(argv[1]);
223+
if (input.fail()) {
224+
cout << "Could not open file " << argv[0] << endl;
225+
return -2;
226+
}
227+
228+
if (!strcmp(info->target_name, "x86_64")) {
229+
MMAP = X86_64_MMAP;
230+
OPENAT = X86_64_OPENAT;
231+
} else if (!strcmp(info->target_name, "arm")) {
232+
MMAP = ARM32_MMAP2;
233+
OPENAT = ARM32_OPENAT;
234+
} else {
235+
cout << "Unsupported architecture " << info->target_name << endl;
236+
return -3;
237+
}
238+
239+
uint64_t addr;
240+
while (input >> hex >> addr) {
241+
callsites.push_back(addr);
242+
}
243+
cout << "Loaded input file with " << callsites.size() << " indirect callsites" << endl;
244+
outfile << "callsite,destination offset,destination image" << endl;
245+
// Register a callback for each time a block is translated
246+
qemu_plugin_register_vcpu_tb_trans_cb(id, block_trans_handler);
247+
248+
// Register callbacks for entering and returning from syscalls
249+
// This is used to determine the load biases and image offsets for
250+
// dynamically linked shared objects
251+
qemu_plugin_register_vcpu_syscall_cb(id, syscall_handler);
252+
qemu_plugin_register_vcpu_syscall_ret_cb(id, syscall_ret_handler);
253+
return 0;
242254
}

Diff for: tests/arm32/fn_ptr-static.csv

-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ callsite,destination offset,destination image
2727
0x50654,0x50658,binary
2828
0x50654,0x50658,binary
2929
0x50654,0x50658,binary
30-
0x50654,0x50658,binary
31-
0x50654,0x50658,binary
3230
0x506f4,0x32268,binary
3331
0x108b4,0x10dd8,binary
3432
0x1048c,0x10160,binary

0 commit comments

Comments
 (0)