Line data Source code
1 : #include "fd_vm_syscall.h"
2 :
3 : #include "../../../ballet/base64/fd_base64.h"
4 : #include "../../../ballet/utf8/fd_utf8.h"
5 : #include "../../runtime/sysvar/fd_sysvar.h"
6 : #include "../../runtime/sysvar/fd_sysvar_clock.h"
7 : #include "../../runtime/sysvar/fd_sysvar_epoch_schedule.h"
8 : #include "../../runtime/sysvar/fd_sysvar_fees.h"
9 : #include "../../runtime/context/fd_exec_txn_ctx.h"
10 : #include "../../runtime/context/fd_exec_instr_ctx.h"
11 : #include "../../runtime/fd_account.h"
12 :
13 : int
14 : fd_vm_syscall_abort( FD_PARAM_UNUSED void * _vm,
15 : FD_PARAM_UNUSED ulong r1,
16 : FD_PARAM_UNUSED ulong r2,
17 : FD_PARAM_UNUSED ulong r3,
18 : FD_PARAM_UNUSED ulong r4,
19 : FD_PARAM_UNUSED ulong r5,
20 102 : FD_PARAM_UNUSED ulong * _ret ) {
21 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/mod.rs#L630 */
22 102 : fd_vm_t * vm = (fd_vm_t *)_vm;
23 102 : FD_VM_ERR_FOR_LOG_SYSCALL( vm, FD_VM_ERR_SYSCALL_ABORT );
24 102 : return FD_VM_ERR_ABORT;
25 102 : }
26 :
27 : /* FD_TRANSLATE_STRING returns a read only pointer to the host address of
28 : a valid utf8 string, or it errors.
29 :
30 : Analogous of Agave's translate_string_and_do().
31 : https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/mod.rs#L601
32 :
33 : As of v0.2.6, the only two usages are in syscall panic and syscall log. */
34 900 : #define FD_TRANSLATE_STRING( vm, vaddr, msg_sz ) (__extension__({ \
35 900 : char const * msg = FD_VM_MEM_SLICE_HADDR_LD( vm, vaddr, FD_VM_ALIGN_RUST_U8, msg_sz ); \
36 900 : if( FD_UNLIKELY( !fd_utf8_verify( msg, msg_sz ) ) ) { \
37 6 : FD_VM_ERR_FOR_LOG_SYSCALL( vm, FD_VM_ERR_SYSCALL_INVALID_STRING ); \
38 6 : return FD_VM_ERR_SYSCALL_INVALID_STRING; \
39 6 : } \
40 900 : msg; \
41 894 : }))
42 :
43 : int
44 : fd_vm_syscall_sol_panic( /**/ void * _vm,
45 : /**/ ulong file_vaddr,
46 : /**/ ulong file_sz,
47 : /**/ ulong line,
48 : /**/ ulong column,
49 : FD_PARAM_UNUSED ulong r5,
50 33 : FD_PARAM_UNUSED ulong * _ret ) {
51 33 : fd_vm_t * vm = (fd_vm_t *)_vm;
52 :
53 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/mod.rs#L637
54 :
55 : Note: this syscall is not used by the Rust SDK, only by the C SDK.
56 : Rust transforms `panic!()` into a log, followed by an abort.
57 : It's unclear if this syscall actually makes any sense... */
58 33 : FD_VM_CU_UPDATE( vm, file_sz );
59 :
60 : /* Validate string */
61 27 : FD_TRANSLATE_STRING( vm, file_vaddr, file_sz );
62 :
63 : /* Note: we truncate the log, ignoring file, line, column.
64 : As mentioned above, it's unclear if anyone is even using this syscall,
65 : so dealing with the complexity of Agave's log is a waste of time. */
66 0 : (void)line;
67 24 : (void)column;
68 :
69 24 : FD_VM_ERR_FOR_LOG_SYSCALL( vm, FD_VM_ERR_SYSCALL_PANIC );
70 24 : return FD_VM_ERR_PANIC;
71 54 : }
72 :
73 : int
74 : fd_vm_syscall_sol_log( /**/ void * _vm,
75 : /**/ ulong msg_vaddr,
76 : /**/ ulong msg_sz,
77 : FD_PARAM_UNUSED ulong r2,
78 : FD_PARAM_UNUSED ulong r3,
79 : FD_PARAM_UNUSED ulong r4,
80 903 : /**/ ulong * _ret ) {
81 903 : fd_vm_t * vm = (fd_vm_t *)_vm;
82 :
83 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L5 */
84 :
85 903 : FD_VM_CU_UPDATE( vm, fd_ulong_max( msg_sz, FD_VM_SYSCALL_BASE_COST ) );
86 :
87 : /* Note: when msg_sz==0, msg can be undefined. fd_log_collector_program_log() handles it. */
88 873 : fd_log_collector_program_log( vm->instr_ctx, FD_TRANSLATE_STRING( vm, msg_vaddr, msg_sz ), msg_sz );
89 :
90 0 : *_ret = 0UL;
91 870 : return FD_VM_SUCCESS;
92 1746 : }
93 :
94 : int
95 : fd_vm_syscall_sol_log_64( void * _vm,
96 : ulong r1,
97 : ulong r2,
98 : ulong r3,
99 : ulong r4,
100 : ulong r5,
101 15 : ulong * _ret ) {
102 15 : fd_vm_t * vm = (fd_vm_t *)_vm;
103 :
104 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L37 */
105 :
106 15 : FD_VM_CU_UPDATE( vm, FD_VM_LOG_64_UNITS );
107 :
108 : /* Max msg_sz: 46 - 15 + 16*5 = 111 < 127 => we can use printf */
109 0 : fd_log_collector_printf_dangerous_max_127( vm->instr_ctx,
110 12 : "Program log: 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx", r1, r2, r3, r4, r5 );
111 :
112 12 : *_ret = 0UL;
113 12 : return FD_VM_SUCCESS;
114 15 : }
115 :
116 : int
117 : fd_vm_syscall_sol_log_compute_units( /**/ void * _vm,
118 : FD_PARAM_UNUSED ulong r1,
119 : FD_PARAM_UNUSED ulong r2,
120 : FD_PARAM_UNUSED ulong r3,
121 : FD_PARAM_UNUSED ulong r4,
122 : FD_PARAM_UNUSED ulong r5,
123 15 : /**/ ulong * _ret ) {
124 15 : fd_vm_t * vm = (fd_vm_t *)_vm;
125 :
126 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L60 */
127 :
128 15 : FD_VM_CU_UPDATE( vm, FD_VM_SYSCALL_BASE_COST );
129 :
130 : /* Max msg_sz: 40 - 3 + 20 = 57 < 127 => we can use printf */
131 0 : fd_log_collector_printf_dangerous_max_127( vm->instr_ctx,
132 12 : "Program consumption: %lu units remaining", vm->cu );
133 :
134 12 : *_ret = 0UL;
135 12 : return FD_VM_SUCCESS;
136 15 : }
137 :
138 : int
139 : fd_vm_syscall_sol_log_pubkey( /**/ void * _vm,
140 : /**/ ulong pubkey_vaddr,
141 : FD_PARAM_UNUSED ulong r2,
142 : FD_PARAM_UNUSED ulong r3,
143 : FD_PARAM_UNUSED ulong r4,
144 : FD_PARAM_UNUSED ulong r5,
145 12 : /**/ ulong * _ret ) {
146 12 : fd_vm_t * vm = (fd_vm_t *)_vm;
147 :
148 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L84 */
149 :
150 12 : FD_VM_CU_UPDATE( vm, FD_VM_LOG_PUBKEY_UNITS );
151 :
152 15 : void const * pubkey = FD_VM_MEM_HADDR_LD( vm, pubkey_vaddr, FD_VM_ALIGN_RUST_PUBKEY, sizeof(fd_pubkey_t) );
153 :
154 0 : char msg[ FD_BASE58_ENCODED_32_SZ ]; ulong msg_sz;
155 15 : if( FD_UNLIKELY( fd_base58_encode_32( pubkey, &msg_sz, msg )==NULL ) ) {
156 0 : return FD_VM_ERR_INVAL;
157 0 : }
158 :
159 3 : fd_log_collector_program_log( vm->instr_ctx, msg, msg_sz );
160 :
161 3 : *_ret = 0UL;
162 3 : return FD_VM_SUCCESS;
163 15 : }
164 :
165 : int
166 : fd_vm_syscall_sol_log_data( /**/ void * _vm,
167 : /**/ ulong slice_vaddr,
168 : /**/ ulong slice_cnt,
169 : FD_PARAM_UNUSED ulong r3,
170 : FD_PARAM_UNUSED ulong r4,
171 : FD_PARAM_UNUSED ulong r5,
172 81 : /**/ ulong * _ret ) {
173 81 : fd_vm_t * vm = (fd_vm_t *)_vm;
174 :
175 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L109
176 :
177 : Note: this is implemented following Agave's perverse behavior.
178 : We need to loop the slice multiple times to match the exact error,
179 : first compute budget, then memory mapping.
180 : And finally we can loop to log. */
181 :
182 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L121 */
183 :
184 81 : FD_VM_CU_UPDATE( vm, FD_VM_SYSCALL_BASE_COST );
185 :
186 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L123-L128 */
187 :
188 156 : fd_vm_vec_t const * slice = (fd_vm_vec_t const *)FD_VM_MEM_SLICE_HADDR_LD( vm, slice_vaddr, FD_VM_ALIGN_RUST_SLICE_U8_REF,
189 156 : fd_ulong_sat_mul( slice_cnt, sizeof(fd_vm_vec_t) ) );
190 :
191 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L130-L135 */
192 :
193 78 : FD_VM_CU_UPDATE( vm, fd_ulong_sat_mul( FD_VM_SYSCALL_BASE_COST, slice_cnt ) );
194 :
195 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L136-L141 */
196 :
197 6258 : for( ulong i=0UL; i<slice_cnt; i++ ) {
198 6183 : FD_VM_CU_UPDATE( vm, slice[i].len );
199 6180 : }
200 :
201 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L145-L152 */
202 :
203 75 : ulong msg_sz = 14UL; /* "Program data: ", with space */
204 6243 : for( ulong i=0UL; i<slice_cnt; i++ ) {
205 6171 : ulong cur_len = slice[i].len;
206 : /* This fails the syscall in case of memory mapping issues */
207 12339 : FD_VM_MEM_SLICE_HADDR_LD( vm, slice[i].addr, FD_VM_ALIGN_RUST_U8, cur_len );
208 : /* Every buffer will be base64 encoded + space separated */
209 0 : msg_sz += (slice[i].len + 2)/3*4 + (i > 0);
210 12339 : }
211 :
212 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L156 */
213 :
214 72 : char msg[ FD_LOG_COLLECTOR_MAX ];
215 72 : ulong bytes_written = fd_log_collector_check_and_truncate( &vm->instr_ctx->txn_ctx->log_collector, msg_sz );
216 72 : if( FD_LIKELY( bytes_written < ULONG_MAX ) ) {
217 63 : fd_memcpy( msg, "Program data: ", 14 );
218 63 : char * buf = msg + 14;
219 :
220 3147 : for( ulong i=0UL; i<slice_cnt; i++ ) {
221 3084 : ulong cur_len = slice[i].len;
222 6168 : void const * bytes = FD_VM_MEM_SLICE_HADDR_LD( vm, slice[i].addr, FD_VM_ALIGN_RUST_U8, cur_len );
223 :
224 3084 : if( i ) { *buf = ' '; ++buf; } /* skip first */
225 6168 : buf += fd_base64_encode( buf, bytes, cur_len );
226 6168 : }
227 63 : FD_TEST( (ulong)(buf-msg)==msg_sz );
228 :
229 63 : fd_log_collector_msg( vm->instr_ctx, msg, msg_sz );
230 63 : }
231 :
232 72 : *_ret = 0;
233 72 : return FD_VM_SUCCESS;
234 72 : }
235 :
236 : int
237 : fd_vm_syscall_sol_alloc_free( /**/ void * _vm,
238 : /**/ ulong sz,
239 : /**/ ulong free_vaddr,
240 : FD_PARAM_UNUSED ulong r3,
241 : FD_PARAM_UNUSED ulong r4,
242 : FD_PARAM_UNUSED ulong r5,
243 0 : /**/ ulong * _ret ) {
244 0 : fd_vm_t * vm = (fd_vm_t *)_vm;
245 :
246 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mod.rs#L666 */
247 :
248 : /* This syscall is ... uh ... problematic. But the community has
249 : already recognized this and deprecated it:
250 :
251 : https://github.com/solana-labs/solana/blob/v1.17.23/sdk/src/feature_set.rs#L846
252 :
253 : Unfortunately, old code never dies so, practically, this will need
254 : to be supported until the heat death of the universe.
255 :
256 : The most serious issue is that there is nothing to stop VM code
257 : making a decision based on the _location_ of the returned
258 : allocation. If different validator implementations use different
259 : allocator algorithms, though each implementation would behave
260 : functionally correct in isolation, the VM code that uses it would
261 : actually break consensus.
262 :
263 : As a result, every validator needs to use a bit-for-bit identical
264 : allocation algorithm. Fortunately, Solana is just using a basic
265 : bump allocator:
266 :
267 : https://github.com/solana-labs/solana/blob/v1.17.23/program-runtime/src/invoke_context.rs#L122-L148
268 :
269 : vm->heap_{sz,max} and the below replicate this exactly.
270 :
271 : Another major issue is that this alloc doesn't always conform
272 : typical malloc/free semantics (e.g. C/C++ requires malloc to have
273 : an alignment safe for primitive types ... 8 for the Solana machine
274 : model). This is clearly to support backward compat with older VM
275 : code (though ideally a malloc syscall should have behaved like ...
276 : well ... malloc from day 1). So the alignment behavior below is a
277 : bug-for-bug replication of that:
278 :
279 : https://github.com/solana-labs/solana/blob/v1.17.23/programs/bpf_loader/src/syscalls/mod.rs#L645-L681
280 : https://github.com/solana-labs/solana/blob/v1.17.23/sdk/program/src/entrypoint.rs#L265-L266
281 :
282 : More generally and already ranted about elsewhere, any code that
283 : uses malloc/free style dynamic allocation is inherently broken. So
284 : this syscall should have never existed in the first place ... it
285 : just feeds the trolls. The above is just additional implementation
286 : horror because people consistent think malloc/free is much simpler
287 : than it actually is. This is also an example of how quickly
288 : mistakes fossilize and become a thorn-in-the-side forever.
289 :
290 : IMPORTANT SAFETY TIP! heap_start must be non zero and both
291 : heap_start and heap_end should have an alignment of at least 8.
292 : This existing runtime policies around heap implicitly satisfy this.
293 :
294 : IMPORTANT SAFETY TIP! The specification for Rust's align_offset
295 : doesn't seem to provide a strong guarantee that it will return the
296 : minimal positive offset necessary to align pointers. It is
297 : possible for a "conforming" Rust compiler to break consensus by
298 : using a different align_offset implementation that aligned pointer
299 : between different compilations of the Solana validator and the
300 : below. */
301 :
302 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mod.rs#L676-L680 */
303 :
304 0 : ulong align = fd_vm_is_check_align_enabled( vm ) ? 8UL : FD_VM_ALIGN_RUST_U8;
305 :
306 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mod.rs#L681-L683
307 : Nothing to do. This section can't error, see:
308 : https://doc.rust-lang.org/1.81.0/src/core/alloc/layout.rs.html#70
309 : https://doc.rust-lang.org/1.81.0/src/core/alloc/layout.rs.html#100 */
310 :
311 :
312 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mod.rs#L684
313 : Nothing to do.
314 : TODO: unclear if it throw InstructionError::CallDepth
315 : https://github.com/anza-xyz/agave/blob/v2.0.8/program-runtime/src/invoke_context.rs#L662 */
316 :
317 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mod.rs#L685-L693 */
318 :
319 : /* Non-zero free address implies that this is a free() call. Since
320 : this is a bump allocator, free is a no-op. */
321 0 : if( FD_UNLIKELY( free_vaddr ) ) {
322 0 : *_ret = 0UL;
323 0 : return FD_VM_SUCCESS;
324 0 : }
325 :
326 :
327 0 : ulong heap_sz = fd_ulong_align_up( vm->heap_sz, align );
328 0 : ulong heap_vaddr = fd_ulong_sat_add ( heap_sz, FD_VM_MEM_MAP_HEAP_REGION_START );
329 0 : /**/ heap_sz = fd_ulong_sat_add ( heap_sz, sz );
330 :
331 0 : if( FD_UNLIKELY( heap_sz > vm->heap_max ) ) { /* Not enough free memory */
332 0 : *_ret = 0UL;
333 0 : return FD_VM_SUCCESS;
334 0 : }
335 :
336 0 : vm->heap_sz = heap_sz;
337 :
338 0 : *_ret = heap_vaddr;
339 0 : return FD_VM_SUCCESS;
340 0 : }
341 :
342 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mem_ops.rs#L145 */
343 : int
344 : fd_vm_memmove( fd_vm_t * vm,
345 : ulong dst_vaddr,
346 : ulong src_vaddr,
347 6015 : ulong sz ) {
348 6015 : if( FD_UNLIKELY( !sz ) ) {
349 0 : return FD_VM_SUCCESS;
350 0 : }
351 :
352 6015 : if( !vm->direct_mapping ) {
353 16602 : void * dst = FD_VM_MEM_HADDR_ST( vm, dst_vaddr, FD_VM_ALIGN_RUST_U8, sz );
354 16590 : void const * src = FD_VM_MEM_HADDR_LD( vm, src_vaddr, FD_VM_ALIGN_RUST_U8, sz );
355 0 : memmove( dst, src, sz );
356 16590 : } else {
357 :
358 : /* Find the correct src and dst haddrs to start operating from. If the src or dst vaddrs
359 : belong to the input data region (4), keep track of region statistics to memmove in chunks. */
360 477 : ulong dst_region = FD_VADDR_TO_REGION( dst_vaddr );
361 477 : uchar dst_is_input_mem_region = ( dst_region==4UL );
362 477 : ulong dst_offset = dst_vaddr & 0xffffffffUL;
363 477 : ulong dst_region_idx = 0UL;
364 477 : ulong dst_bytes_rem_in_cur_region;
365 477 : uchar * dst_haddr;
366 477 : if( dst_is_input_mem_region ) {
367 228 : FD_VM_MEM_HADDR_AND_REGION_IDX_FROM_INPUT_REGION_UNCHECKED( vm, dst_offset, dst_region_idx, dst_haddr );
368 228 : if( FD_UNLIKELY( !vm->input_mem_regions[ dst_region_idx ].is_writable ) ) {
369 186 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
370 186 : return FD_VM_ERR_SIGSEGV;
371 186 : }
372 42 : dst_bytes_rem_in_cur_region = fd_ulong_sat_sub( vm->input_mem_regions[ dst_region_idx ].region_sz, ( dst_offset - vm->input_mem_regions[ dst_region_idx ].vaddr_offset ) );
373 249 : } else {
374 249 : dst_haddr = (uchar*)FD_VM_MEM_HADDR_ST_FAST( vm, dst_vaddr );
375 249 : dst_bytes_rem_in_cur_region = fd_ulong_min( sz, fd_ulong_sat_sub( vm->region_st_sz[ dst_region ], dst_offset ) );
376 249 : }
377 :
378 291 : ulong src_region = FD_VADDR_TO_REGION( src_vaddr );
379 291 : uchar src_is_input_mem_region = ( src_region==4UL );
380 291 : ulong src_offset = src_vaddr & 0xffffffffUL;
381 291 : ulong src_region_idx = 0UL;
382 291 : ulong src_bytes_rem_in_cur_region;
383 291 : uchar * src_haddr;
384 291 : if( src_is_input_mem_region ) {
385 144 : FD_VM_MEM_HADDR_AND_REGION_IDX_FROM_INPUT_REGION_UNCHECKED( vm, src_offset, src_region_idx, src_haddr );
386 144 : src_bytes_rem_in_cur_region = fd_ulong_sat_sub( vm->input_mem_regions[ src_region_idx ].region_sz, ( src_offset - vm->input_mem_regions[ src_region_idx ].vaddr_offset ) );
387 147 : } else {
388 147 : src_haddr = (uchar*)FD_VM_MEM_HADDR_LD_FAST( vm, src_vaddr );
389 147 : src_bytes_rem_in_cur_region = fd_ulong_min( sz, fd_ulong_sat_sub( vm->region_ld_sz[ src_region ], src_offset ) );
390 147 : }
391 :
392 : /* Short circuit: if the number of copyable bytes stays within all memory regions,
393 : just memmove and return. This is a majority case in mainnet, devnet, and testnet.
394 : Someone would have to be very crafty and clever to construct a transaction that
395 : deploys and invokes a custom program that does not fall into this branch. */
396 291 : if( FD_LIKELY( sz<=dst_bytes_rem_in_cur_region && sz<=src_bytes_rem_in_cur_region ) ) {
397 78 : memmove( dst_haddr, src_haddr, sz );
398 78 : return FD_VM_SUCCESS;
399 78 : }
400 :
401 : /* Copy over the bytes from each region in chunks. */
402 513 : while( sz>0UL ) {
403 : /* End of region case */
404 501 : if( FD_UNLIKELY( dst_bytes_rem_in_cur_region==0UL ) ) {
405 : /* Only proceed if:
406 : - We are in the input memory region
407 : - There are remaining input memory regions to copy from
408 : - The next input memory region is writable
409 : Fail otherwise. */
410 48 : if( dst_is_input_mem_region &&
411 48 : ++dst_region_idx<vm->input_mem_regions_cnt &&
412 48 : vm->input_mem_regions[ dst_region_idx ].is_writable ) {
413 12 : dst_haddr = (uchar*)vm->input_mem_regions[ dst_region_idx ].haddr;
414 12 : dst_bytes_rem_in_cur_region = vm->input_mem_regions[ dst_region_idx ].region_sz;
415 36 : } else {
416 36 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
417 36 : return FD_VM_ERR_SIGSEGV;
418 36 : }
419 48 : }
420 :
421 465 : if( FD_UNLIKELY( src_bytes_rem_in_cur_region==0UL ) ) {
422 : /* Same as above, except no writable checks. */
423 258 : if( src_is_input_mem_region &&
424 258 : ++src_region_idx<vm->input_mem_regions_cnt ) {
425 93 : src_haddr = (uchar*)vm->input_mem_regions[ src_region_idx ].haddr;
426 93 : src_bytes_rem_in_cur_region = vm->input_mem_regions[ src_region_idx ].region_sz;
427 165 : } else {
428 165 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
429 165 : return FD_VM_ERR_SIGSEGV;
430 165 : }
431 258 : }
432 :
433 : /* Number of bytes to operate on in this iteration is the min of:
434 : - number of bytes left to copy
435 : - bytes left in the current src region
436 : - bytes left in the current dst region */
437 300 : ulong num_bytes_to_copy = fd_ulong_min( sz, fd_ulong_min( src_bytes_rem_in_cur_region, dst_bytes_rem_in_cur_region ) );
438 300 : memmove( dst_haddr, src_haddr, num_bytes_to_copy );
439 :
440 : /* Update haddrs */
441 300 : dst_haddr += num_bytes_to_copy;
442 300 : src_haddr += num_bytes_to_copy;
443 :
444 : /* Update size trackers */
445 300 : sz -= num_bytes_to_copy;
446 300 : src_bytes_rem_in_cur_region -= num_bytes_to_copy;
447 300 : dst_bytes_rem_in_cur_region -= num_bytes_to_copy;
448 300 : }
449 213 : }
450 :
451 5541 : return FD_VM_SUCCESS;
452 6015 : }
453 :
454 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mem_ops.rs#L41 */
455 : int
456 : fd_vm_syscall_sol_memmove( /**/ void * _vm,
457 : /**/ ulong dst_vaddr,
458 : /**/ ulong src_vaddr,
459 : /**/ ulong sz,
460 : FD_PARAM_UNUSED ulong r4,
461 : FD_PARAM_UNUSED ulong r5,
462 180 : /**/ ulong * _ret ) {
463 180 : *_ret = 0;
464 180 : fd_vm_t * vm = (fd_vm_t *)_vm;
465 :
466 180 : FD_VM_CU_MEM_OP_UPDATE( vm, sz );
467 :
468 : /* No overlap check for memmove. */
469 0 : return fd_vm_memmove( vm, dst_vaddr, src_vaddr, sz );
470 180 : }
471 :
472 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mem_ops.rs#L18 */
473 : int
474 : fd_vm_syscall_sol_memcpy( /**/ void * _vm,
475 : /**/ ulong dst_vaddr,
476 : /**/ ulong src_vaddr,
477 : /**/ ulong sz,
478 : FD_PARAM_UNUSED ulong r4,
479 : FD_PARAM_UNUSED ulong r5,
480 5910 : /**/ ulong * _ret ) {
481 5910 : *_ret = 0;
482 5910 : fd_vm_t * vm = (fd_vm_t *)_vm;
483 :
484 5910 : FD_VM_CU_MEM_OP_UPDATE( vm, sz );
485 :
486 : /* Exact same as memmove, except also check overlap.
487 : https://github.com/anza-xyz/agave/blob/master/programs/bpf_loader/src/syscalls/mem_ops.rs#L31 */
488 5907 : FD_VM_MEM_CHECK_NON_OVERLAPPING( vm, src_vaddr, sz, dst_vaddr, sz );
489 :
490 5835 : return fd_vm_memmove( vm, dst_vaddr, src_vaddr, sz );
491 5907 : }
492 :
493 : int
494 : fd_vm_syscall_sol_memcmp( /**/ void * _vm,
495 : /**/ ulong m0_vaddr,
496 : /**/ ulong m1_vaddr,
497 : /**/ ulong sz,
498 : /**/ ulong out_vaddr,
499 : FD_PARAM_UNUSED ulong r5,
500 582 : /**/ ulong * _ret ) {
501 582 : *_ret = 0;
502 582 : fd_vm_t * vm = (fd_vm_t *)_vm;
503 :
504 : /* https://github.com/anza-xyz/agave/blob/master/programs/bpf_loader/src/syscalls/mem_ops.rs#L59 */
505 :
506 582 : FD_VM_CU_MEM_OP_UPDATE( vm, sz );
507 :
508 : /* Note: though this behaves like a normal C-style memcmp, we can't
509 : use the compilers / libc memcmp directly because the specification
510 : doesn't provide strong enough guarantees about the return value (it
511 : only promises the sign). */
512 :
513 582 : if( !FD_FEATURE_ACTIVE( vm->instr_ctx->slot_ctx, bpf_account_data_direct_mapping ) ) {
514 1116 : uchar const * m0 = (uchar const *)FD_VM_MEM_SLICE_HADDR_LD( vm, m0_vaddr, FD_VM_ALIGN_RUST_U8, sz );
515 1116 : uchar const * m1 = (uchar const *)FD_VM_MEM_SLICE_HADDR_LD( vm, m1_vaddr, FD_VM_ALIGN_RUST_U8, sz );
516 :
517 : /* Silly that this doesn't use r0 to return ... slower, more edge
518 : case, different from libc style memcmp, harder to callers to use,
519 : etc ... probably too late to do anything about it now ... sigh */
520 :
521 558 : void * _out = FD_VM_MEM_HADDR_ST( vm, out_vaddr, FD_VM_ALIGN_RUST_I32, 4UL );
522 :
523 0 : int out = 0;
524 11760 : for( ulong i=0UL; i<sz; i++ ) {
525 11466 : int i0 = (int)m0[i];
526 11466 : int i1 = (int)m1[i];
527 11466 : if( i0!=i1 ) {
528 264 : out = i0 - i1;
529 264 : break;
530 264 : }
531 11466 : }
532 :
533 558 : fd_memcpy( _out, &out, 4UL ); /* Sigh ... see note above (and might be unaligned ... double sigh) */
534 :
535 558 : return FD_VM_SUCCESS;
536 1116 : } else {
537 : /* In the case that direct mapping is enabled, the behavior for memcmps
538 : differ significantly from the non-dm case. The key difference is that
539 : invalid loads will instantly lead to errors in the non-dm case. However,
540 : when direct mapping is enabled, we will first try to memcmp the largest
541 : size valid chunk first, and will exit successfully if a difference is
542 : found without aborting from the VM. A chunk is defined as the largest
543 : valid vaddr range in both memory regions that doesn't span multiple
544 : regions.
545 :
546 : Example:
547 : fd_vm_syscall_sol_memcmp( vm, m0_addr : 0x4000, m1_vaddr : 0x2000, 0x200, ... );
548 : m0's region: m0_addr 0x4000 -> 0x4000 + 0x50 (region sz 0x50)
549 : m1's region: m1_addr 0x2000 -> 0x2000 + 0x100 (region sz 0x100)
550 : sz: 0x200
551 :
552 : Case 1: 0x4000 -> 0x4050 does have the same bytes as 0x2000 -> 0x2050
553 : Case 2: 0x4000 -> 0x4050 does NOT have the same bytes as 0x2000 -> 0x2050
554 :
555 : Pre-DM:
556 : This will fail out before any bytes are compared because the memory
557 : translation is done first.
558 :
559 : Post-DM:
560 : For case 1, the memcmp will return an error and the VM will exit because
561 : the memcmp will eventually try to access 0x4051 which is invalid. First
562 : 0x50 bytes are compared, but the next chunk will lead to an invalid
563 : access.
564 :
565 : For case 2, the memcmp will first translate the first 0x50 bytes and will
566 : see that the bytes are not the same. This will lead to the syscall
567 : exiting out successfully without detecting the access violation.
568 :
569 : https://github.com/anza-xyz/agave/blob/v2.0.10/programs/bpf_loader/src/syscalls/mem_ops.rs#L213
570 : */
571 :
572 66 : void * _out = FD_VM_MEM_HADDR_ST( vm, out_vaddr, FD_VM_ALIGN_RUST_I32, 4UL );
573 0 : int out = 0;
574 :
575 : /* Lookup host address chunks. Try to do a standard memcpy if the regions
576 : do not cross memory regions. The translation logic is different if the
577 : the virtual address region is the input region vs. not. See the comment
578 : in fd_bpf_loader_serialization for more details on how the input
579 : region is different from other regions. The input data region will try
580 : to lookup the number of remaining bytes in the specific data region. If
581 : the memory access is not in the input data region, assume the bytes in
582 : the current region are bound by the size of the remaining bytes in the
583 : region. */
584 :
585 66 : ulong m0_region = FD_VADDR_TO_REGION( m0_vaddr );
586 66 : ulong m0_offset = m0_vaddr & 0xffffffffUL;
587 66 : ulong m0_region_idx = 0UL;
588 66 : ulong m0_bytes_in_cur_region = sz;
589 66 : uchar * m0_haddr = NULL;
590 66 : if( m0_region==4UL ) {
591 9 : m0_region_idx = fd_vm_get_input_mem_region_idx( vm, m0_offset );
592 9 : m0_haddr = (uchar*)(vm->input_mem_regions[ m0_region_idx ].haddr + m0_offset - vm->input_mem_regions[ m0_region_idx ].vaddr_offset);
593 9 : m0_bytes_in_cur_region = fd_ulong_min( sz, fd_ulong_sat_sub( vm->input_mem_regions[ m0_region_idx ].region_sz,
594 9 : ((ulong)m0_haddr - vm->input_mem_regions[ m0_region_idx ].haddr) ) );
595 12 : } else {
596 : /* We can safely load a slice of 1 byte here because we know that we will
597 : not ever read more than the number of bytes that are left in the
598 : region. */
599 12 : m0_bytes_in_cur_region = fd_ulong_min( sz, vm->region_ld_sz[ m0_region ] - m0_offset );
600 24 : m0_haddr = (uchar *)FD_VM_MEM_SLICE_HADDR_LD_SZ_UNCHECKED( vm, m0_vaddr, FD_VM_ALIGN_RUST_U8 );
601 24 : }
602 :
603 21 : ulong m1_region = FD_VADDR_TO_REGION( m1_vaddr );
604 21 : ulong m1_offset = m1_vaddr & 0xffffffffUL;
605 21 : ulong m1_region_idx = 0UL;
606 21 : ulong m1_bytes_in_cur_region = sz;
607 21 : uchar * m1_haddr = NULL;
608 21 : if( m1_region==4UL ) {
609 6 : m1_region_idx = fd_vm_get_input_mem_region_idx( vm, m1_offset );
610 6 : m1_haddr = (uchar*)(vm->input_mem_regions[ m1_region_idx ].haddr + m1_offset - vm->input_mem_regions[ m1_region_idx ].vaddr_offset);
611 6 : m1_bytes_in_cur_region = fd_ulong_min( sz, fd_ulong_sat_sub( vm->input_mem_regions[ m1_region_idx ].region_sz,
612 6 : ((ulong)m1_haddr - vm->input_mem_regions[ m1_region_idx ].haddr) ) );
613 15 : } else {
614 15 : m1_bytes_in_cur_region = fd_ulong_min( sz, vm->region_ld_sz[ m1_region ] - m1_offset );
615 30 : m1_haddr = (uchar *)FD_VM_MEM_SLICE_HADDR_LD_SZ_UNCHECKED( vm, m1_vaddr, FD_VM_ALIGN_RUST_U8 );
616 30 : }
617 :
618 : /* Case where the operation spans multiple regions. Copy over the bytes
619 : from each region while iterating to the next one. */
620 : /* TODO: An optimization would be to memcmp chunks at once */
621 21 : ulong m0_idx = 0UL;
622 21 : ulong m1_idx = 0UL;
623 8625 : for( ulong i=0UL; i<sz; i++ ) {
624 8622 : if( FD_UNLIKELY( !m0_bytes_in_cur_region ) ) {
625 : /* If the memory is not in the input region or it is the last input
626 : memory region, that means that if we don't exit now we will have
627 : an access violation. */
628 12 : if( FD_UNLIKELY( m0_region!=4UL || ++m0_region_idx>=vm->input_mem_regions_cnt ) ) {
629 6 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
630 6 : return FD_VM_ERR_SIGSEGV;
631 6 : }
632 : /* Otherwise, query the next input region. */
633 6 : m0_haddr = (uchar*)vm->input_mem_regions[ m0_region_idx ].haddr;
634 6 : m0_idx = 0UL;
635 6 : m0_bytes_in_cur_region = vm->input_mem_regions[ m0_region_idx ].region_sz;
636 6 : }
637 8616 : if( FD_UNLIKELY( !m1_bytes_in_cur_region ) ) {
638 0 : if( FD_UNLIKELY( m1_region!=4UL || ++m1_region_idx>=vm->input_mem_regions_cnt ) ) {
639 0 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
640 0 : return FD_VM_ERR_SIGSEGV;
641 0 : }
642 0 : m1_haddr = (uchar*)vm->input_mem_regions[ m1_region_idx ].haddr;
643 0 : m1_idx = 0UL;
644 0 : m1_bytes_in_cur_region = vm->input_mem_regions[ m1_region_idx ].region_sz;
645 0 : }
646 :
647 8616 : int i0 = (int)m0_haddr[ m0_idx ];
648 8616 : int i1 = (int)m1_haddr[ m1_idx ];
649 8616 : if( i0!=i1 ) {
650 12 : out = i0 - i1;
651 12 : break;
652 12 : }
653 :
654 8604 : m0_bytes_in_cur_region--;
655 8604 : m1_bytes_in_cur_region--;
656 8604 : m0_idx++;
657 8604 : m1_idx++;
658 8604 : }
659 15 : fd_memcpy( _out, &out, 4UL ); /* Sigh ... see note above (and might be unaligned ... double sigh) */
660 15 : return FD_VM_SUCCESS;
661 21 : }
662 582 : }
663 :
664 : int
665 : fd_vm_syscall_sol_memset( /**/ void * _vm,
666 : /**/ ulong dst_vaddr,
667 : /**/ ulong c,
668 : /**/ ulong sz,
669 : FD_PARAM_UNUSED ulong r4,
670 : FD_PARAM_UNUSED ulong r5,
671 261 : /**/ ulong * _ret ) {
672 261 : fd_vm_t * vm = (fd_vm_t *)_vm;
673 261 : *_ret = 0;
674 :
675 : /* https://github.com/anza-xyz/agave/blob/master/programs/bpf_loader/src/syscalls/mem_ops.rs#L115 */
676 :
677 261 : FD_VM_CU_MEM_OP_UPDATE( vm, sz );
678 :
679 261 : if( FD_UNLIKELY( !sz ) ) {
680 9 : return FD_VM_SUCCESS;
681 9 : }
682 :
683 252 : ulong region = FD_VADDR_TO_REGION( dst_vaddr );
684 252 : ulong offset = dst_vaddr & 0xffffffffUL;
685 252 : uchar * haddr;
686 :
687 252 : int b = (int)(c & 255UL);
688 :
689 252 : if( !vm->direct_mapping ) {
690 255 : haddr = FD_VM_MEM_HADDR_ST( vm, dst_vaddr, FD_VM_ALIGN_RUST_U8, sz );
691 0 : fd_memset( haddr, b, sz );
692 255 : } else if( region!=4UL ) {
693 : /* I acknowledge that this is not an ideal implementation, but Agave will
694 : memset as many bytes as possible until it reaches an unwritable section.
695 : This is purely just for fuzzing conformance, sigh... */
696 51 : haddr = (uchar*)FD_VM_MEM_HADDR_ST_FAST( vm, dst_vaddr );
697 51 : ulong bytes_in_cur_region = fd_ulong_sat_sub( vm->region_st_sz[ region ], offset );
698 51 : ulong bytes_to_set = fd_ulong_min( sz, bytes_in_cur_region );
699 51 : fd_memset( haddr, b, bytes_to_set );
700 51 : if( FD_UNLIKELY( bytes_to_set<sz ) ) {
701 45 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
702 45 : return FD_VM_ERR_SIGSEGV;
703 45 : }
704 114 : } else {
705 : /* In this case, we are in the input region AND direct mapping is enabled.
706 : Get the haddr and input region and check if it's writable. */
707 114 : ulong region_idx;
708 114 : FD_VM_MEM_HADDR_AND_REGION_IDX_FROM_INPUT_REGION_UNCHECKED( vm, offset, region_idx, haddr );
709 114 : ulong offset_in_cur_region = offset - vm->input_mem_regions[ region_idx ].vaddr_offset;
710 114 : ulong bytes_in_cur_region = fd_ulong_sat_sub( vm->input_mem_regions[ region_idx ].region_sz, offset_in_cur_region );
711 :
712 : /* Memset goes into multiple regions. */
713 174 : while( sz>0UL ) {
714 : /* Check that current region is writable */
715 174 : if( FD_UNLIKELY( !vm->input_mem_regions[ region_idx ].is_writable ) ) {
716 96 : break;
717 96 : }
718 :
719 : /* Memset bytes */
720 78 : ulong num_bytes_to_set = fd_ulong_min( sz, bytes_in_cur_region );
721 78 : fd_memset( haddr, b, num_bytes_to_set );
722 78 : sz -= num_bytes_to_set;
723 :
724 : /* If no more regions left, break. */
725 78 : if( ++region_idx==vm->input_mem_regions_cnt ) {
726 18 : break;
727 18 : }
728 :
729 : /* Move haddr to next region. */
730 60 : haddr = (uchar*)vm->input_mem_regions[ region_idx ].haddr;
731 60 : bytes_in_cur_region = vm->input_mem_regions[ region_idx ].region_sz;
732 60 : }
733 :
734 : /* If we were not able to successfully set all the bytes, throw an error. */
735 114 : if( FD_UNLIKELY( sz>0 ) ) {
736 108 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
737 108 : return FD_VM_ERR_SIGSEGV;
738 108 : }
739 114 : }
740 96 : return FD_VM_SUCCESS;
741 252 : }
|