Line data Source code
1 : #include "fd_vm.h"
2 : #include "../../ballet/sbpf/fd_sbpf_instr.h"
3 : #include "../../ballet/sbpf/fd_sbpf_opcodes.h"
4 : #include "../../ballet/murmur3/fd_murmur3.h"
5 :
6 : /* fd_vm_disasm_printf appends to the *_len string in the max byte
7 : buffer buf the printf of the remaining args. On input, assumes *_len
8 : is strlen(buf) and *_len is in [0,max). On output, even on error
9 : cases, the leading string in buf will be unchanged, *len will be
10 : strlen(buf) and *len will be [*len_as_it_was_on_input,max).
11 :
12 : Returns:
13 :
14 : FD_VM_SUCCESS - success. buf and *len updated.
15 :
16 : FD_VM_ERR_FULL - not enough room in buf to hold result. As many
17 : bytes as possible were written to buf and *len==max-1 on return.
18 :
19 : FD_VM_ERR_IO - printf format parse error. buf and *len unchanged but
20 : trailing bytes of buf might have been clobbered. */
21 :
22 : /* FIXME: REWORK API TO USE FD_CSTR_PRINTF HERE? (OR CONSIDER ADDING
23 : FD_VM_DISASM_PRINTF AS A FD_CSTR API) */
24 :
25 : #include <stdio.h>
26 : #include <stdarg.h>
27 :
28 : static int
29 : fd_vm_disasm_printf( char * buf,
30 : ulong max,
31 : ulong * _len,
32 : char const * fmt, ... ) __attribute__((format(printf,4,5)));
33 :
34 : static int
35 : fd_vm_disasm_printf( char * buf,
36 : ulong max,
37 : ulong * _len,
38 28108308 : char const * fmt, ... ) {
39 28108308 : ulong len = *_len; /* In [0,max) */
40 28108308 : ulong rem = max - len; /* In (0,max] */
41 :
42 28108308 : va_list ap;
43 28108308 : va_start( ap, fmt );
44 28108308 : int ret = vsnprintf( buf + len, rem, fmt, ap );
45 28108308 : va_end( ap );
46 :
47 28108308 : if( FD_UNLIKELY( ret<0 ) ) { /* Parse error */
48 0 : buf[len] = '\0'; /* Guarantee '\0' termination */
49 0 : return FD_VM_ERR_IO;
50 0 : }
51 :
52 28108308 : ulong append_len = (ulong)ret; /* Guaranteed safe */
53 :
54 28108308 : if( FD_UNLIKELY( append_len>=rem ) ) { /* Truncated output */
55 0 : buf[max-1UL] = '\0'; /* Guarantee '\0' termination */
56 0 : *_len = max-1UL;
57 0 : return FD_VM_ERR_FULL;
58 0 : }
59 :
60 28108308 : *_len = len + append_len;
61 28108308 : return FD_VM_SUCCESS;
62 28108308 : }
63 :
64 : /* OUT_PRINTF is a convenience macro to do boilerplate error trapping
65 : on fd_vm_disasm_printf. */
66 :
67 28108308 : #define OUT_PRINTF( ... ) do { \
68 28108308 : int _err = fd_vm_disasm_printf( out, out_max, _out_len, __VA_ARGS__ ); \
69 28108308 : if( FD_UNLIKELY( _err ) ) return _err; \
70 28108308 : } while(0)
71 :
72 : /* fd_vm_disasm_instr_* are pretty printers for single word instructions.
73 : They do not validate their input arguments. Return out, out_max,
74 : _out_len and return error code have the same interpretation as their
75 : public facing wrappers. */
76 :
77 : static int
78 : fd_vm_disasm_instr_alu( fd_sbpf_instr_t instr,
79 : char const * suffix,
80 : char * out,
81 : ulong out_max,
82 7491759 : ulong * _out_len ) {
83 :
84 7491759 : char * op_name;
85 7491759 : switch( instr.opcode.normal.op_mode ) {
86 469560 : case FD_SBPF_OPCODE_ALU_OP_MODE_ADD: op_name = "add"; break;
87 466773 : case FD_SBPF_OPCODE_ALU_OP_MODE_SUB: op_name = "sub"; break;
88 467028 : case FD_SBPF_OPCODE_ALU_OP_MODE_MUL: op_name = "mul"; break;
89 468531 : case FD_SBPF_OPCODE_ALU_OP_MODE_DIV: op_name = "div"; break;
90 468021 : case FD_SBPF_OPCODE_ALU_OP_MODE_OR: op_name = "or"; break;
91 467013 : case FD_SBPF_OPCODE_ALU_OP_MODE_AND: op_name = "and"; break;
92 469251 : case FD_SBPF_OPCODE_ALU_OP_MODE_LSH: op_name = "lsh"; break;
93 467364 : case FD_SBPF_OPCODE_ALU_OP_MODE_RSH: op_name = "rsh"; break;
94 467880 : case FD_SBPF_OPCODE_ALU_OP_MODE_NEG: op_name = "neg"; break;
95 469461 : case FD_SBPF_OPCODE_ALU_OP_MODE_MOD: op_name = "mod"; break;
96 468939 : case FD_SBPF_OPCODE_ALU_OP_MODE_XOR: op_name = "xor"; break;
97 468300 : case FD_SBPF_OPCODE_ALU_OP_MODE_MOV: op_name = "mov"; break;
98 468492 : case FD_SBPF_OPCODE_ALU_OP_MODE_ARSH: op_name = "arsh"; break;
99 468588 : case FD_SBPF_OPCODE_ALU_OP_MODE_END: op_name = "end"; break;
100 936558 : default: return FD_VM_ERR_INVAL;
101 7491759 : }
102 :
103 6555201 : if( FD_UNLIKELY( instr.opcode.normal.op_mode==FD_SBPF_OPCODE_ALU_OP_MODE_NEG ) ) {
104 467880 : OUT_PRINTF( "%s%s r%d", op_name, suffix, instr.dst_reg );
105 467880 : return FD_VM_SUCCESS;
106 467880 : }
107 :
108 6087321 : switch( instr.opcode.normal.op_src ) {
109 3043260 : case FD_SBPF_OPCODE_SOURCE_MODE_IMM:
110 3043260 : OUT_PRINTF( "%s%s r%d, %d", op_name, suffix, instr.dst_reg, (int)instr.imm );
111 3043260 : return FD_VM_SUCCESS;
112 3044061 : case FD_SBPF_OPCODE_SOURCE_MODE_REG:
113 3044061 : OUT_PRINTF( "%s%s r%d, r%d", op_name, suffix, instr.dst_reg, instr.src_reg );
114 3044061 : return FD_VM_SUCCESS;
115 0 : default: break;
116 6087321 : }
117 :
118 0 : return FD_VM_ERR_INVAL;
119 6087321 : }
120 :
121 : static int
122 : fd_vm_disasm_instr_jmp( fd_sbpf_instr_t instr,
123 : ulong pc,
124 : char const * suffix,
125 : fd_sbpf_syscalls_t const * syscalls,
126 : char * out,
127 : ulong out_max,
128 7502163 : ulong * _out_len ) {
129 :
130 7502163 : char * op_name;
131 7502163 : switch( instr.opcode.normal.op_mode ) {
132 469752 : case FD_SBPF_OPCODE_JMP_OP_MODE_JA: op_name = "ja"; break;
133 466641 : case FD_SBPF_OPCODE_JMP_OP_MODE_JEQ: op_name = "jeq"; break;
134 471039 : case FD_SBPF_OPCODE_JMP_OP_MODE_JGT: op_name = "jgt"; break;
135 467022 : case FD_SBPF_OPCODE_JMP_OP_MODE_JGE: op_name = "jge"; break;
136 470964 : case FD_SBPF_OPCODE_JMP_OP_MODE_JSET: op_name = "jset"; break;
137 466776 : case FD_SBPF_OPCODE_JMP_OP_MODE_JNE: op_name = "jne"; break;
138 467802 : case FD_SBPF_OPCODE_JMP_OP_MODE_JSGT: op_name = "jsgt"; break;
139 469074 : case FD_SBPF_OPCODE_JMP_OP_MODE_JSGE: op_name = "jsge"; break;
140 469824 : case FD_SBPF_OPCODE_JMP_OP_MODE_CALL: op_name = "call"; break;
141 469584 : case FD_SBPF_OPCODE_JMP_OP_MODE_EXIT: op_name = "exit"; break;
142 470478 : case FD_SBPF_OPCODE_JMP_OP_MODE_JLT: op_name = "jlt"; break;
143 467139 : case FD_SBPF_OPCODE_JMP_OP_MODE_JLE: op_name = "jle"; break;
144 467706 : case FD_SBPF_OPCODE_JMP_OP_MODE_JSLT: op_name = "jslt"; break;
145 468159 : case FD_SBPF_OPCODE_JMP_OP_MODE_JSLE: op_name = "jsle"; break;
146 940203 : default: return FD_VM_ERR_INVAL;
147 7502163 : }
148 :
149 6561960 : if( FD_UNLIKELY( instr.opcode.normal.op_mode==FD_SBPF_OPCODE_JMP_OP_MODE_CALL ) ) {
150 469824 : switch ( instr.opcode.normal.op_src ) {
151 234804 : case FD_SBPF_OPCODE_SOURCE_MODE_IMM: {
152 234804 : fd_sbpf_syscalls_t const * syscall = syscalls ? fd_sbpf_syscalls_query_const( syscalls, (ulong)instr.imm, NULL ) : NULL;
153 234804 : if( syscall ) { /* FIXME: THESE CODE PATHS CURRENTLY NOT EXERCISED BY UNIT TEST */
154 0 : char const * name = syscall->name;
155 0 : if( name ) OUT_PRINTF( "syscall%s %s", suffix, name );
156 0 : else OUT_PRINTF( "syscall%s 0x%08x", suffix, instr.imm );
157 234804 : } else {
158 234804 : uint pc = fd_pchash_inverse( instr.imm ); /* FIXME: is pchash in the right place? */
159 234804 : if( pc<(10<<17) ) OUT_PRINTF( "%s%s function_%u", op_name, suffix, pc ); /* FIXME: hardcoded constant */
160 88047 : else OUT_PRINTF( "%s%s function_%#x", op_name, suffix, instr.imm );
161 234804 : }
162 234804 : return FD_VM_SUCCESS;
163 234804 : }
164 235020 : case FD_SBPF_OPCODE_SOURCE_MODE_REG:
165 235020 : OUT_PRINTF( "%sx%s r%u", op_name, suffix, instr.imm );
166 235020 : return FD_VM_SUCCESS;
167 0 : default: break;
168 469824 : }
169 0 : return FD_VM_ERR_INVAL;
170 469824 : }
171 :
172 6092136 : if( FD_UNLIKELY( instr.opcode.normal.op_mode==FD_SBPF_OPCODE_JMP_OP_MODE_EXIT ) ) {
173 469584 : OUT_PRINTF( "%s%s", op_name, suffix );
174 469584 : return FD_VM_SUCCESS;
175 469584 : }
176 :
177 5622552 : if( FD_UNLIKELY( instr.opcode.normal.op_mode==FD_SBPF_OPCODE_JMP_OP_MODE_JA ) ) {
178 469752 : OUT_PRINTF( "%s%s lbb_%ld", op_name, suffix, (long)pc+(long)instr.offset+1L );
179 469752 : return FD_VM_SUCCESS;
180 469752 : }
181 :
182 5152800 : switch( instr.opcode.normal.op_src ) {
183 2577315 : case FD_SBPF_OPCODE_SOURCE_MODE_IMM:
184 2577315 : OUT_PRINTF( "%s%s r%d, %d, lbb_%ld", op_name, suffix, instr.dst_reg, (int)instr.imm, (long)pc+(long)instr.offset+1L );
185 2577315 : return FD_VM_SUCCESS;
186 2575485 : case FD_SBPF_OPCODE_SOURCE_MODE_REG:
187 2575485 : OUT_PRINTF( "%s%s r%d, r%d, lbb_%ld", op_name, suffix, instr.dst_reg, instr.src_reg, (long)pc+(long)instr.offset+1L );
188 2575485 : return FD_VM_SUCCESS;
189 0 : break;
190 0 : default: break;
191 5152800 : }
192 :
193 0 : return FD_VM_ERR_INVAL;
194 5152800 : }
195 :
196 : static int
197 : fd_vm_disasm_instr_ldx( fd_sbpf_instr_t instr,
198 : char * out,
199 : ulong out_max,
200 3749811 : ulong * _out_len ) {
201 :
202 3749811 : char * op_name;
203 3749811 : switch( instr.opcode.mem.op_size ) {
204 937140 : case FD_SBPF_OPCODE_SIZE_MODE_WORD: op_name = "ldxw"; break;
205 935406 : case FD_SBPF_OPCODE_SIZE_MODE_HALF: op_name = "ldxh"; break;
206 938931 : case FD_SBPF_OPCODE_SIZE_MODE_BYTE: op_name = "ldxb"; break;
207 938334 : case FD_SBPF_OPCODE_SIZE_MODE_DOUB: op_name = "ldxdw"; break;
208 0 : default: return FD_VM_ERR_INVAL;
209 3749811 : }
210 :
211 3749811 : if( instr.offset<0 ) OUT_PRINTF( "%s r%d, [r%d-0x%x]", op_name, instr.dst_reg, instr.src_reg, (ushort)-instr.offset );
212 1876233 : else OUT_PRINTF( "%s r%d, [r%d+0x%x]", op_name, instr.dst_reg, instr.src_reg, (ushort) instr.offset );
213 3749811 : return FD_VM_SUCCESS;
214 3749811 : }
215 :
216 : static int
217 : fd_vm_disasm_instr_stx( fd_sbpf_instr_t instr,
218 : char * out,
219 : ulong out_max,
220 3752208 : ulong * _out_len ) {
221 :
222 3752208 : char * op_name;
223 3752208 : switch( instr.opcode.mem.op_size ) {
224 939231 : case FD_SBPF_OPCODE_SIZE_MODE_WORD: op_name = "stxw"; break;
225 937776 : case FD_SBPF_OPCODE_SIZE_MODE_HALF: op_name = "stxh"; break;
226 938136 : case FD_SBPF_OPCODE_SIZE_MODE_BYTE: op_name = "stxb"; break;
227 937065 : case FD_SBPF_OPCODE_SIZE_MODE_DOUB: op_name = "stxdw"; break;
228 0 : default: return FD_VM_ERR_INVAL;
229 3752208 : }
230 :
231 3752208 : if( instr.offset<0 ) OUT_PRINTF( "%s [r%d-0x%x], r%d", op_name, instr.dst_reg, (ushort)-instr.offset, instr.src_reg );
232 1872705 : else OUT_PRINTF( "%s [r%d+0x%x], r%d", op_name, instr.dst_reg, (ushort) instr.offset, instr.src_reg );
233 3752208 : return FD_VM_SUCCESS;
234 3752208 : }
235 :
236 : int
237 : fd_vm_disasm_instr( ulong const * text,
238 : ulong text_cnt,
239 : ulong pc,
240 : fd_sbpf_syscalls_t const * syscalls,
241 : char * out,
242 : ulong out_max,
243 30000033 : ulong * _out_len ) {
244 :
245 30000033 : if( FD_UNLIKELY( (!text) | (!text_cnt) | (!out) | (!out_max) | (!_out_len) ) ) return FD_VM_ERR_INVAL;
246 30000018 : if( FD_UNLIKELY( (*_out_len)>=out_max ) ) return FD_VM_ERR_INVAL;
247 :
248 30000015 : fd_sbpf_instr_t i0 = fd_sbpf_instr( text[0] );
249 :
250 30000015 : switch( i0.opcode.any.op_class ) {
251 :
252 3753156 : case FD_SBPF_OPCODE_CLASS_LD: {
253 3753156 : if( FD_UNLIKELY( text_cnt<2UL ) ) return FD_VM_ERR_INVAL;
254 3738210 : fd_sbpf_instr_t i1 = fd_sbpf_instr( text[1] );
255 : /* FIXME: VALIDATE I1 IS PROPER */
256 3738210 : OUT_PRINTF( "lddw r%d, 0x%lx", i0.dst_reg, (ulong)((ulong)i0.imm | (ulong)((ulong)i1.imm << 32UL)) );
257 3738210 : return FD_VM_SUCCESS;
258 3738210 : }
259 :
260 3750918 : case FD_SBPF_OPCODE_CLASS_ST: { /* FIXME: FIGURE OUT WHAT'S UP HERE */
261 3750918 : OUT_PRINTF( "FIXME: %016lx (ST)", text[0] );
262 3750918 : return FD_VM_SUCCESS;
263 3750918 : }
264 :
265 3749811 : case FD_SBPF_OPCODE_CLASS_LDX: return fd_vm_disasm_instr_ldx( i0, out, out_max, _out_len);
266 3752208 : case FD_SBPF_OPCODE_CLASS_STX: return fd_vm_disasm_instr_stx( i0, out, out_max, _out_len );
267 3747018 : case FD_SBPF_OPCODE_CLASS_ALU: return fd_vm_disasm_instr_alu( i0, "", out, out_max, _out_len );
268 3751272 : case FD_SBPF_OPCODE_CLASS_JMP: return fd_vm_disasm_instr_jmp( i0, pc, "", syscalls, out, out_max, _out_len );
269 3750891 : case FD_SBPF_OPCODE_CLASS_JMP32: return fd_vm_disasm_instr_jmp( i0, pc, "32", syscalls, out, out_max, _out_len );
270 3744741 : case FD_SBPF_OPCODE_CLASS_ALU64: return fd_vm_disasm_instr_alu( i0, "64", out, out_max, _out_len );
271 0 : default: break;
272 30000015 : }
273 0 : return FD_VM_ERR_INVAL;
274 30000015 : }
275 :
276 : int
277 : fd_vm_disasm_program( ulong const * text,
278 : ulong text_cnt,
279 : fd_sbpf_syscalls_t const * syscalls,
280 : char * out,
281 : ulong out_max,
282 15 : ulong * _out_len ) {
283 :
284 15 : if( FD_UNLIKELY( ((!text) & (!!text_cnt)) | (!out) | (!out_max) | (!_out_len) ) ) return FD_VM_ERR_INVAL;
285 3 : if( FD_UNLIKELY( (*_out_len)>=out_max ) ) return FD_VM_ERR_INVAL;
286 :
287 : /* Construct the mapping of pc to labels and functions. FIXME: This
288 : is currently not an algo efficient implementation. Note: if the
289 : same instruction is the targeted by multiple calls / exits / jmps,
290 : it will appear multiple times in the label_pc and/or func_pc
291 : arrays. But that's okay because use the target instruction as the
292 : label and function name. */
293 :
294 0 : ulong func_pc [ 65536 ]; ulong func_cnt = 0UL;
295 0 : ulong label_pc[ 65536 ]; ulong label_cnt = 0UL;
296 :
297 0 : for( ulong i=0UL; i<text_cnt; i++ ) {
298 0 : fd_sbpf_instr_t instr = fd_sbpf_instr( text[i] );
299 0 : if ( instr.opcode.raw==FD_SBPF_OP_CALL_IMM ) func_cnt++;
300 0 : else if( instr.opcode.raw==FD_SBPF_OP_EXIT ) func_cnt++;
301 0 : else if( instr.opcode.raw==FD_SBPF_OP_CALL_REG ) continue;
302 0 : else if( ( (instr.opcode.any.op_class==FD_SBPF_OPCODE_CLASS_JMP ) |
303 0 : (instr.opcode.any.op_class==FD_SBPF_OPCODE_CLASS_JMP32) ) ) label_cnt++;
304 0 : }
305 :
306 0 : if( FD_UNLIKELY( (func_cnt>65536UL) | (label_cnt>65536UL) ) ) return FD_VM_ERR_UNSUP;
307 :
308 0 : func_cnt = 0UL;
309 0 : label_cnt = 0UL;
310 :
311 0 : for( ulong i=0UL; i<text_cnt; i++ ) {
312 0 : fd_sbpf_instr_t instr = fd_sbpf_instr( text[i] );
313 0 : if ( instr.opcode.raw==FD_SBPF_OP_CALL_IMM ) func_pc[ func_cnt++ ] = i + instr.imm + 1UL; /* FIXME: what if out of bounds? */
314 0 : else if( instr.opcode.raw==FD_SBPF_OP_EXIT ) func_pc[ func_cnt++ ] = i + instr.imm + 1UL; /* FIXME: what if out of bounds? */
315 0 : else if( instr.opcode.raw==FD_SBPF_OP_CALL_REG ) continue;
316 0 : else if( ( (instr.opcode.any.op_class==FD_SBPF_OPCODE_CLASS_JMP ) |
317 0 : (instr.opcode.any.op_class==FD_SBPF_OPCODE_CLASS_JMP32) ) )
318 0 : label_pc[ label_cnt++ ] = (ulong)((long)i + (long)instr.offset + 1L); /* FIXME: casting and what if out of bounds? */
319 0 : }
320 :
321 : /* Output the program */
322 :
323 0 : OUT_PRINTF( "function_0:\n" );
324 :
325 0 : for( ulong i=0UL; i<text_cnt; i++ ) {
326 :
327 : /* Print functions / labels */
328 : /* FIXME: What if there is a func_pc and a label_pc that target
329 : for the same instruction? It is possible given the above logic.
330 : Probably should print both. */
331 : /* FIXME: Algo efficiency! */
332 :
333 0 : int found = 0;
334 0 : for( ulong j=0UL; j<label_cnt; j++ ) if( label_pc[j]==i ) { found = 1; OUT_PRINTF( "lbb_%lu:\n", i ); break; }
335 0 : if( !found ) for( ulong j=0UL; j<func_cnt; j++ ) if( func_pc[j]==i ) { OUT_PRINTF( "\nfunction_%lu:\n", i ); break; }
336 :
337 : /* Print instruction */
338 :
339 : /* FIXME: WHAT ABOUT LABELS IN THE MIDDLE OF MULTIWORD INSTRUCTIONS!
340 : AND NOT JUST FOR DISASSEMBLY ... POTENTIAL CONSENSUS FAILURE
341 : MECHANISM! */
342 :
343 0 : fd_sbpf_instr_t instr = fd_sbpf_instr( text[i] );
344 0 : ulong extra_cnt = fd_ulong_if( instr.opcode.any.op_class==FD_SBPF_OPCODE_CLASS_LD, 1UL, 0UL );
345 0 : if( FD_UNLIKELY( (i+extra_cnt)>=text_cnt ) ) return FD_VM_ERR_INVAL; /* Truncated multiword instruction at end of text */
346 :
347 0 : OUT_PRINTF( " " );
348 0 : int err = fd_vm_disasm_instr( text+i, text_cnt-i, i, syscalls, out, out_max, _out_len );
349 0 : if( FD_UNLIKELY( err ) ) return err;
350 0 : OUT_PRINTF( "\n" );
351 :
352 0 : i += extra_cnt;
353 :
354 : /* Print any trailing function */
355 : /* FIXME: this is probably not necessary if the function/label print
356 : above just prints both, unless trying print a function label that
357 : happens to immediately after the end of a program. */
358 : /* FIXME: Algo efficiency? */
359 :
360 0 : if( FD_UNLIKELY( (instr.opcode.raw==FD_SBPF_OP_JA) & ((i+1UL)<text_cnt) ) ) {
361 0 : found = 0;
362 0 : for( ulong j=0UL; j<label_cnt; j++ ) if( label_pc[j]==(i+1UL) ) { found = 1; break; }
363 0 : if( !found ) OUT_PRINTF( "\nfunction_%lu:\n", i+1UL );
364 0 : }
365 0 : }
366 :
367 0 : return FD_VM_SUCCESS;
368 0 : }
369 :
370 : #undef OUT_PRINTF
|