@@ -2,19 +2,31 @@ extern "C" {
2
2
#include " qemu/qemu-plugin.h"
3
3
}
4
4
5
+ #include < string.h>
6
+
5
7
#include < algorithm>
6
8
#include < fstream>
7
9
#include < iostream>
8
10
#include < optional>
9
11
#include < vector>
10
12
13
+ // Syscall numbers taken from
14
+ // https://chromium.googlesource.com/chromiumos/docs/+/HEAD/constants/syscalls.md
15
+ #define ARM32_MMAP2 192
16
+ #define ARM32_OPENAT 322
17
+ #define X86_64_MMAP 9
18
+ #define X86_64_OPENAT 257
19
+
20
+ static int64_t MMAP = 0 ;
21
+ static int64_t OPENAT = 0 ;
22
+
11
23
QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
12
24
13
25
using namespace std ;
14
26
15
27
typedef struct addr_range {
16
- uint64_t start_addr;
17
- uint64_t end_addr;
28
+ uint64_t start_addr;
29
+ uint64_t end_addr;
18
30
} addr_range;
19
31
20
32
typedef struct shared_obj {
@@ -46,9 +58,9 @@ static ofstream outfile;
46
58
// Get the addresses of the first and last bytes of the last instruction in a
47
59
// block
48
60
static uint64_t tb_last_insn_vaddr (struct qemu_plugin_tb *tb) {
49
- uint64_t last_idx = qemu_plugin_tb_n_insns (tb) - 1 ;
50
- struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn (tb, last_idx);
51
- return qemu_plugin_insn_vaddr (insn);
61
+ uint64_t last_idx = qemu_plugin_tb_n_insns (tb) - 1 ;
62
+ struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn (tb, last_idx);
63
+ return qemu_plugin_insn_vaddr (insn);
52
64
}
53
65
54
66
static uint64_t elf_image_bias (uint64_t vaddr) {
@@ -59,7 +71,7 @@ static uint64_t elf_image_bias(uint64_t vaddr) {
59
71
if (get_interp_load_bias () <= vaddr) {
60
72
potential_load_biases.push_back (get_interp_load_bias ());
61
73
}
62
- for (auto & sec : sections) {
74
+ for (auto & sec : sections) {
63
75
if (sec.load_bias <= vaddr) {
64
76
potential_load_biases.push_back (sec.load_bias );
65
77
}
@@ -77,166 +89,166 @@ static size_t find_section(uint64_t bias) {
77
89
return SIZE_MAX;
78
90
}
79
91
80
- static bool dynamically_linked () {
81
- return get_interp_load_bias () != 0 ;
82
- }
92
+ static bool dynamically_linked () { return get_interp_load_bias () != 0 ; }
83
93
84
94
// Write the destination of an indirect jump/call to the output file
85
95
static void mark_indirect_branch (uint64_t callsite, uint64_t dst) {
86
- uint64_t dst_image_bias = elf_image_bias (dst);
87
- uint64_t dst_image_offset;
88
- const char *so_name;
89
- if (dst_image_bias == get_load_bias ()) {
90
- dst_image_offset = 0 ;
91
- so_name = " binary" ;
92
- } else if (dst_image_bias == get_interp_load_bias ()) {
93
- dst_image_offset = 0 ;
94
- so_name = " interpreter" ;
95
- } else {
96
- size_t idx = find_section (dst_image_bias);
97
- mapped_section sec = sections[idx];
98
- dst_image_offset = sec.image_offset ;
99
- so_name = sec.so_name ;
100
- }
101
-
102
- if (dynamically_linked ()) {
103
- dst -= dst_image_bias - dst_image_offset;
104
- callsite -= get_load_bias ();
105
- }
106
-
107
- outfile << " 0x" << hex << callsite << " ,0x" << hex << dst << " ," << so_name << endl;
96
+ uint64_t dst_image_bias = elf_image_bias (dst);
97
+ uint64_t dst_image_offset;
98
+ const char *so_name;
99
+ if (dst_image_bias == get_load_bias ()) {
100
+ dst_image_offset = 0 ;
101
+ so_name = " binary" ;
102
+ } else if (dst_image_bias == get_interp_load_bias ()) {
103
+ dst_image_offset = 0 ;
104
+ so_name = " interpreter" ;
105
+ } else {
106
+ size_t idx = find_section (dst_image_bias);
107
+ mapped_section sec = sections[idx];
108
+ dst_image_offset = sec.image_offset ;
109
+ so_name = sec.so_name ;
110
+ }
111
+
112
+ if (dynamically_linked ()) {
113
+ dst -= dst_image_bias - dst_image_offset;
114
+ callsite -= get_load_bias ();
115
+ }
116
+
117
+ outfile << " 0x" << hex << callsite << " ,0x" << hex << dst << " ," << so_name << endl;
108
118
}
109
119
110
120
// The default callback for when a block is executed
111
121
static void block_exec_handler (unsigned int vcpu_idx, void *start) {
112
- uint64_t start_vaddr = (uint64_t )start;
113
- if (indirect_taken.has_value ()) {
114
- mark_indirect_branch (indirect_taken.value (), start_vaddr);
115
- indirect_taken = {};
116
- }
122
+ uint64_t start_vaddr = (uint64_t )start;
123
+ if (indirect_taken.has_value ()) {
124
+ mark_indirect_branch (indirect_taken.value (), start_vaddr);
125
+ indirect_taken = {};
126
+ }
117
127
}
118
128
119
129
// Callback for executing blocks ending in an indirect jump/call
120
130
static void indirect_block_exec_handler (unsigned int vcpu_idx, void *tb_idx) {
121
- addr_range block_addr = indirect_blocks[(size_t )tb_idx];
131
+ addr_range block_addr = indirect_blocks[(size_t )tb_idx];
122
132
123
- // Check if the previous block ended in an indirect jump/call
124
- if (indirect_taken.has_value ()) {
125
- mark_indirect_branch (indirect_taken.value (), block_addr.start_addr );
126
- }
133
+ // Check if the previous block ended in an indirect jump/call
134
+ if (indirect_taken.has_value ()) {
135
+ mark_indirect_branch (indirect_taken.value (), block_addr.start_addr );
136
+ }
127
137
128
- indirect_taken = block_addr.end_addr ;
138
+ indirect_taken = block_addr.end_addr ;
129
139
}
130
140
131
141
// Register a callback for each time a block is executed
132
- static void block_trans_handler (qemu_plugin_id_t id,
133
- struct qemu_plugin_tb *tb) {
134
- static uint64_t start_vaddr;
135
- start_vaddr = qemu_plugin_tb_vaddr (tb);
136
- uint64_t last_insn = tb_last_insn_vaddr (tb);
137
- uint64_t bias = 0 ;
138
-
139
- // If an interpreter was loaded, add the binary bias to the input callsites
140
- if (dynamically_linked ()) {
141
- bias = get_load_bias ();
142
- }
143
-
144
- for (uint64_t &addr : callsites) {
145
- if (last_insn == (addr + bias)) {
146
- indirect_blocks.push_back ({
147
- .start_addr = start_vaddr,
148
- .end_addr = last_insn,
149
- });
150
- qemu_plugin_register_vcpu_tb_exec_cb (tb, indirect_block_exec_handler,
151
- QEMU_PLUGIN_CB_NO_REGS,
152
- (void *)indirect_tb_idx++);
153
- return ;
154
- }
155
- }
156
- qemu_plugin_register_vcpu_tb_exec_cb (
157
- tb, block_exec_handler, QEMU_PLUGIN_CB_NO_REGS, (void *)start_vaddr);
158
- }
142
+ static void block_trans_handler (qemu_plugin_id_t id, struct qemu_plugin_tb *tb) {
143
+ static uint64_t start_vaddr;
144
+ start_vaddr = qemu_plugin_tb_vaddr (tb);
145
+ uint64_t last_insn = tb_last_insn_vaddr (tb);
146
+ uint64_t bias = 0 ;
147
+
148
+ // If an interpreter was loaded, add the binary bias to the input callsites
149
+ if (dynamically_linked ()) {
150
+ bias = get_load_bias ();
151
+ }
159
152
160
- static void syscall_handler (qemu_plugin_id_t id, unsigned int vcpu_index,
161
- int64_t num, uint64_t a1, uint64_t a2,
162
- uint64_t a3, uint64_t a4, uint64_t a5,
163
- uint64_t a6, uint64_t a7, uint64_t a8) {
164
- switch (num) {
165
- // TODO: What header(s) define syscall numbers? It'd be nice to replace these magic numbers
166
- // Map a shared object file name to a `mapped_section` when entering an mmap syscall
167
- case 9 : {
168
- int fd = (int )a5;
169
- uint64_t load_bias = a1;
170
- uint64_t image_offset = a6;
171
-
172
- auto matching_fd = [&](shared_obj so) { return so.fd == fd; };
173
- // file descriptors can be reused so search for the /last/ ocurrence of an opened file with a file descriptor matching the mmap call
174
- auto so = find_if (shared_objects.rbegin (), shared_objects.rend (), matching_fd);
175
- if (so != shared_objects.rend ()) {
176
- mapped_section sec = {
177
- .load_bias = load_bias,
178
- .image_offset = image_offset,
179
- .so_name = so->filename ,
180
- };
181
- sections.push_back (sec);
182
- }
183
- break ;
153
+ for (uint64_t &addr : callsites) {
154
+ if (last_insn == (addr + bias)) {
155
+ indirect_blocks.push_back ({
156
+ .start_addr = start_vaddr,
157
+ .end_addr = last_insn,
158
+ });
159
+ qemu_plugin_register_vcpu_tb_exec_cb (tb, indirect_block_exec_handler,
160
+ QEMU_PLUGIN_CB_NO_REGS, (void *)indirect_tb_idx++);
161
+ return ;
184
162
}
185
- // TODO: Is the open syscall also used to open shared objects?
186
- // openat
187
- case 257 : {
188
- // Store the file name passed to the openat syscall
189
- shared_obj lib = {
190
- .filename = (char *)a2,
191
- .fd = -1 ,
163
+ }
164
+ qemu_plugin_register_vcpu_tb_exec_cb (tb, block_exec_handler, QEMU_PLUGIN_CB_NO_REGS,
165
+ (void *)start_vaddr);
166
+ }
167
+
168
+ static void syscall_handler (qemu_plugin_id_t id, unsigned int vcpu_index, int64_t num, uint64_t a1,
169
+ uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5, uint64_t a6,
170
+ uint64_t a7, uint64_t a8) {
171
+ if (num == MMAP) {
172
+ // Map a shared object file name to a `mapped_section` when entering an
173
+ // mmap syscall
174
+ int fd = (int )a5;
175
+ uint64_t load_bias = a1;
176
+ uint64_t image_offset = a6;
177
+
178
+ auto matching_fd = [&](shared_obj so) { return so.fd == fd; };
179
+ // file descriptors can be reused so search for the /last/ ocurrence of
180
+ // an opened file with a file descriptor matching the mmap call
181
+ auto so = find_if (shared_objects.rbegin (), shared_objects.rend (), matching_fd);
182
+ if (so != shared_objects.rend ()) {
183
+ mapped_section sec = {
184
+ .load_bias = load_bias,
185
+ .image_offset = image_offset,
186
+ .so_name = so->filename ,
192
187
};
193
- shared_objects.push_back (lib);
194
- break ;
195
- }
196
- default : {
197
- break ;
188
+ sections.push_back (sec);
198
189
}
190
+ } else if (num == OPENAT) {
191
+ // TODO: Is the open syscall also used to open shared objects?
192
+ // Store the file name passed to the openat syscall
193
+ shared_obj lib = {
194
+ .filename = (char *)a2,
195
+ .fd = -1 ,
196
+ };
197
+ shared_objects.push_back (lib);
199
198
}
200
199
}
201
200
202
- static void syscall_ret_handler (qemu_plugin_id_t id, unsigned int vcpu_idx, int64_t num, int64_t ret) {
201
+ static void syscall_ret_handler (qemu_plugin_id_t id, unsigned int vcpu_idx, int64_t num,
202
+ int64_t ret) {
203
203
// If the openat syscall returned a valid file descriptor
204
- if ((num == 257 ) && (ret != -1 )) {
204
+ if ((num == OPENAT ) && (ret != -1 )) {
205
205
// Store the file descriptor returned by the syscall
206
206
shared_objects.back ().fd = ret;
207
207
}
208
208
}
209
209
210
- extern int qemu_plugin_install (qemu_plugin_id_t id, const qemu_info_t *info,
211
- int argc, char **argv) {
212
- if (argc < 2 ) {
213
- cout << " Usage: /path/to/qemu \\\n " ;
214
- cout << " \t -plugin "
215
- " /path/to/"
216
- " libibresolver.so,arg=\" callsites.txt\" ,arg=\" output.csv\" \\\n " ;
217
- cout << " \t $BINARY" << endl;
218
- return -1 ;
219
- }
220
-
221
- fstream input (argv[0 ]);
222
- outfile = ofstream (argv[1 ]);
223
- if (input.fail ()) {
224
- cout << " Could not open file " << argv[0 ] << endl;
225
- return -2 ;
226
- }
227
- uint64_t addr;
228
- while (input >> hex >> addr) {
229
- callsites.push_back (addr);
230
- }
231
- cout << " Loaded input file with " << callsites.size () << " indirect callsites"
232
- << endl;
233
- outfile << " callsite,destination offset,destination image" << endl;
234
- // Register a callback for each time a block is translated
235
- qemu_plugin_register_vcpu_tb_trans_cb (id, block_trans_handler);
236
-
237
- // Register callbacks for entering and returning from syscalls
238
- // This is used to determine the load biases and image offsets for dynamically linked shared objects
239
- qemu_plugin_register_vcpu_syscall_cb (id, syscall_handler);
240
- qemu_plugin_register_vcpu_syscall_ret_cb (id, syscall_ret_handler);
241
- return 0 ;
210
+ extern int qemu_plugin_install (qemu_plugin_id_t id, const qemu_info_t *info, int argc,
211
+ char **argv) {
212
+ if (argc < 2 ) {
213
+ cout << " Usage: /path/to/qemu \\\n " ;
214
+ cout << " \t -plugin "
215
+ " /path/to/"
216
+ " libibresolver.so,arg=\" callsites.txt\" ,arg=\" output.csv\" \\\n " ;
217
+ cout << " \t $BINARY" << endl;
218
+ return -1 ;
219
+ }
220
+
221
+ fstream input (argv[0 ]);
222
+ outfile = ofstream (argv[1 ]);
223
+ if (input.fail ()) {
224
+ cout << " Could not open file " << argv[0 ] << endl;
225
+ return -2 ;
226
+ }
227
+
228
+ if (!strcmp (info->target_name , " x86_64" )) {
229
+ MMAP = X86_64_MMAP;
230
+ OPENAT = X86_64_OPENAT;
231
+ } else if (!strcmp (info->target_name , " arm" )) {
232
+ MMAP = ARM32_MMAP2;
233
+ OPENAT = ARM32_OPENAT;
234
+ } else {
235
+ cout << " Unsupported architecture " << info->target_name << endl;
236
+ return -3 ;
237
+ }
238
+
239
+ uint64_t addr;
240
+ while (input >> hex >> addr) {
241
+ callsites.push_back (addr);
242
+ }
243
+ cout << " Loaded input file with " << callsites.size () << " indirect callsites" << endl;
244
+ outfile << " callsite,destination offset,destination image" << endl;
245
+ // Register a callback for each time a block is translated
246
+ qemu_plugin_register_vcpu_tb_trans_cb (id, block_trans_handler);
247
+
248
+ // Register callbacks for entering and returning from syscalls
249
+ // This is used to determine the load biases and image offsets for
250
+ // dynamically linked shared objects
251
+ qemu_plugin_register_vcpu_syscall_cb (id, syscall_handler);
252
+ qemu_plugin_register_vcpu_syscall_ret_cb (id, syscall_ret_handler);
253
+ return 0 ;
242
254
}
0 commit comments