Line data Source code
1 : #include "fd_vm_syscall.h"
2 :
3 : #include "../../../ballet/base64/fd_base64.h"
4 : #include "../../../ballet/utf8/fd_utf8.h"
5 : #include "../../runtime/sysvar/fd_sysvar.h"
6 : #include "../../runtime/sysvar/fd_sysvar_clock.h"
7 : #include "../../runtime/sysvar/fd_sysvar_epoch_schedule.h"
8 : #include "../../runtime/sysvar/fd_sysvar_fees.h"
9 : #include "../../runtime/context/fd_exec_txn_ctx.h"
10 : #include "../../runtime/context/fd_exec_instr_ctx.h"
11 : #include "../../runtime/fd_account.h"
12 :
13 : int
14 : fd_vm_syscall_abort( FD_PARAM_UNUSED void * _vm,
15 : FD_PARAM_UNUSED ulong r1,
16 : FD_PARAM_UNUSED ulong r2,
17 : FD_PARAM_UNUSED ulong r3,
18 : FD_PARAM_UNUSED ulong r4,
19 : FD_PARAM_UNUSED ulong r5,
20 99 : FD_PARAM_UNUSED ulong * _ret ) {
21 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/mod.rs#L630 */
22 99 : fd_vm_t * vm = (fd_vm_t *)_vm;
23 99 : FD_VM_ERR_FOR_LOG_SYSCALL( vm, FD_VM_SYSCALL_ERR_ABORT );
24 99 : return FD_VM_SYSCALL_ERR_ABORT;
25 99 : }
26 :
27 : /* FD_TRANSLATE_STRING returns a read only pointer to the host address of
28 : a valid utf8 string, or it errors.
29 :
30 : Analogous of Agave's translate_string_and_do().
31 : https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/mod.rs#L601
32 :
33 : As of v0.2.6, the only two usages are in syscall panic and syscall log. */
34 891 : #define FD_TRANSLATE_STRING( vm, vaddr, msg_sz ) (__extension__({ \
35 891 : char const * msg = FD_VM_MEM_SLICE_HADDR_LD( vm, vaddr, FD_VM_ALIGN_RUST_U8, msg_sz ); \
36 891 : if( FD_UNLIKELY( !fd_utf8_verify( msg, msg_sz ) ) ) { \
37 6 : FD_VM_ERR_FOR_LOG_SYSCALL( vm, FD_VM_SYSCALL_ERR_INVALID_STRING ); \
38 6 : return FD_VM_SYSCALL_ERR_INVALID_STRING; \
39 6 : } \
40 891 : msg; \
41 885 : }))
42 :
43 : int
44 : fd_vm_syscall_sol_panic( /**/ void * _vm,
45 : /**/ ulong file_vaddr,
46 : /**/ ulong file_sz,
47 : /**/ ulong line,
48 : /**/ ulong column,
49 : FD_PARAM_UNUSED ulong r5,
50 33 : FD_PARAM_UNUSED ulong * _ret ) {
51 33 : fd_vm_t * vm = (fd_vm_t *)_vm;
52 :
53 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/mod.rs#L637
54 :
55 : Note: this syscall is not used by the Rust SDK, only by the C SDK.
56 : Rust transforms `panic!()` into a log, followed by an abort.
57 : It's unclear if this syscall actually makes any sense... */
58 33 : FD_VM_CU_UPDATE( vm, file_sz );
59 :
60 : /* Validate string */
61 27 : FD_TRANSLATE_STRING( vm, file_vaddr, file_sz );
62 :
63 : /* Note: we truncate the log, ignoring file, line, column.
64 : As mentioned above, it's unclear if anyone is even using this syscall,
65 : so dealing with the complexity of Agave's log is a waste of time. */
66 0 : (void)line;
67 24 : (void)column;
68 :
69 24 : FD_VM_ERR_FOR_LOG_SYSCALL( vm, FD_VM_SYSCALL_ERR_PANIC );
70 24 : return FD_VM_SYSCALL_ERR_PANIC;
71 54 : }
72 :
73 : int
74 : fd_vm_syscall_sol_log( /**/ void * _vm,
75 : /**/ ulong msg_vaddr,
76 : /**/ ulong msg_sz,
77 : FD_PARAM_UNUSED ulong r2,
78 : FD_PARAM_UNUSED ulong r3,
79 : FD_PARAM_UNUSED ulong r4,
80 894 : /**/ ulong * _ret ) {
81 894 : fd_vm_t * vm = (fd_vm_t *)_vm;
82 :
83 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L5 */
84 :
85 894 : FD_VM_CU_UPDATE( vm, fd_ulong_max( msg_sz, FD_VM_SYSCALL_BASE_COST ) );
86 :
87 : /* Note: when msg_sz==0, msg can be undefined. fd_log_collector_program_log() handles it.
88 : FIXME: Macro invocation in function invocation? */
89 864 : fd_log_collector_program_log( vm->instr_ctx, FD_TRANSLATE_STRING( vm, msg_vaddr, msg_sz ), msg_sz );
90 :
91 0 : *_ret = 0UL;
92 861 : return FD_VM_SUCCESS;
93 1728 : }
94 :
95 : int
96 : fd_vm_syscall_sol_log_64( void * _vm,
97 : ulong r1,
98 : ulong r2,
99 : ulong r3,
100 : ulong r4,
101 : ulong r5,
102 15 : ulong * _ret ) {
103 15 : fd_vm_t * vm = (fd_vm_t *)_vm;
104 :
105 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L37 */
106 :
107 15 : FD_VM_CU_UPDATE( vm, FD_VM_LOG_64_UNITS );
108 :
109 : /* Max msg_sz: 46 - 15 + 16*5 = 111 < 127 => we can use printf */
110 0 : fd_log_collector_printf_dangerous_max_127( vm->instr_ctx,
111 12 : "Program log: 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx", r1, r2, r3, r4, r5 );
112 :
113 12 : *_ret = 0UL;
114 12 : return FD_VM_SUCCESS;
115 15 : }
116 :
117 : int
118 : fd_vm_syscall_sol_log_compute_units( /**/ void * _vm,
119 : FD_PARAM_UNUSED ulong r1,
120 : FD_PARAM_UNUSED ulong r2,
121 : FD_PARAM_UNUSED ulong r3,
122 : FD_PARAM_UNUSED ulong r4,
123 : FD_PARAM_UNUSED ulong r5,
124 15 : /**/ ulong * _ret ) {
125 15 : fd_vm_t * vm = (fd_vm_t *)_vm;
126 :
127 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L60 */
128 :
129 15 : FD_VM_CU_UPDATE( vm, FD_VM_SYSCALL_BASE_COST );
130 :
131 : /* Max msg_sz: 40 - 3 + 20 = 57 < 127 => we can use printf */
132 0 : fd_log_collector_printf_dangerous_max_127( vm->instr_ctx,
133 12 : "Program consumption: %lu units remaining", vm->cu );
134 :
135 12 : *_ret = 0UL;
136 12 : return FD_VM_SUCCESS;
137 15 : }
138 :
139 : int
140 : fd_vm_syscall_sol_log_pubkey( /**/ void * _vm,
141 : /**/ ulong pubkey_vaddr,
142 : FD_PARAM_UNUSED ulong r2,
143 : FD_PARAM_UNUSED ulong r3,
144 : FD_PARAM_UNUSED ulong r4,
145 : FD_PARAM_UNUSED ulong r5,
146 12 : /**/ ulong * _ret ) {
147 12 : fd_vm_t * vm = (fd_vm_t *)_vm;
148 :
149 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L84 */
150 :
151 12 : FD_VM_CU_UPDATE( vm, FD_VM_LOG_PUBKEY_UNITS );
152 :
153 15 : void const * pubkey = FD_VM_MEM_HADDR_LD( vm, pubkey_vaddr, FD_VM_ALIGN_RUST_PUBKEY, sizeof(fd_pubkey_t) );
154 :
155 0 : char msg[ FD_BASE58_ENCODED_32_SZ ]; ulong msg_sz;
156 15 : if( FD_UNLIKELY( fd_base58_encode_32( pubkey, &msg_sz, msg )==NULL ) ) {
157 0 : return FD_VM_SYSCALL_ERR_INVALID_STRING;
158 0 : }
159 :
160 3 : fd_log_collector_program_log( vm->instr_ctx, msg, msg_sz );
161 :
162 3 : *_ret = 0UL;
163 3 : return FD_VM_SUCCESS;
164 15 : }
165 :
166 : int
167 : fd_vm_syscall_sol_log_data( /**/ void * _vm,
168 : /**/ ulong slice_vaddr,
169 : /**/ ulong slice_cnt,
170 : FD_PARAM_UNUSED ulong r3,
171 : FD_PARAM_UNUSED ulong r4,
172 : FD_PARAM_UNUSED ulong r5,
173 81 : /**/ ulong * _ret ) {
174 81 : fd_vm_t * vm = (fd_vm_t *)_vm;
175 :
176 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L109
177 :
178 : Note: this is implemented following Agave's perverse behavior.
179 : We need to loop the slice multiple times to match the exact error,
180 : first compute budget, then memory mapping.
181 : And finally we can loop to log. */
182 :
183 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L121 */
184 :
185 81 : FD_VM_CU_UPDATE( vm, FD_VM_SYSCALL_BASE_COST );
186 :
187 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L123-L128 */
188 :
189 156 : fd_vm_vec_t const * slice = (fd_vm_vec_t const *)FD_VM_MEM_SLICE_HADDR_LD( vm, slice_vaddr, FD_VM_ALIGN_RUST_SLICE_U8_REF,
190 156 : fd_ulong_sat_mul( slice_cnt, sizeof(fd_vm_vec_t) ) );
191 :
192 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L130-L135 */
193 :
194 78 : FD_VM_CU_UPDATE( vm, fd_ulong_sat_mul( FD_VM_SYSCALL_BASE_COST, slice_cnt ) );
195 :
196 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L136-L141 */
197 :
198 6258 : for( ulong i=0UL; i<slice_cnt; i++ ) {
199 6183 : FD_VM_CU_UPDATE( vm, slice[i].len );
200 6180 : }
201 :
202 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L145-L152 */
203 :
204 75 : ulong msg_sz = 14UL; /* "Program data: ", with space */
205 6243 : for( ulong i=0UL; i<slice_cnt; i++ ) {
206 6171 : ulong cur_len = slice[i].len;
207 : /* This fails the syscall in case of memory mapping issues */
208 12339 : FD_VM_MEM_SLICE_HADDR_LD( vm, slice[i].addr, FD_VM_ALIGN_RUST_U8, cur_len );
209 : /* Every buffer will be base64 encoded + space separated */
210 0 : msg_sz += (slice[i].len + 2)/3*4 + (i > 0);
211 12339 : }
212 :
213 : /* https://github.com/anza-xyz/agave/blob/v2.0.6/programs/bpf_loader/src/syscalls/logging.rs#L156 */
214 :
215 72 : char msg[ FD_LOG_COLLECTOR_MAX ];
216 72 : ulong bytes_written = fd_log_collector_check_and_truncate( &vm->instr_ctx->txn_ctx->log_collector, msg_sz );
217 72 : if( FD_LIKELY( bytes_written < ULONG_MAX ) ) {
218 63 : fd_memcpy( msg, "Program data: ", 14 );
219 63 : char * buf = msg + 14;
220 :
221 3147 : for( ulong i=0UL; i<slice_cnt; i++ ) {
222 3084 : ulong cur_len = slice[i].len;
223 6168 : void const * bytes = FD_VM_MEM_SLICE_HADDR_LD( vm, slice[i].addr, FD_VM_ALIGN_RUST_U8, cur_len );
224 :
225 3084 : if( i ) { *buf = ' '; ++buf; } /* skip first */
226 6168 : buf += fd_base64_encode( buf, bytes, cur_len );
227 6168 : }
228 63 : FD_TEST( (ulong)(buf-msg)==msg_sz );
229 :
230 63 : fd_log_collector_msg( vm->instr_ctx, msg, msg_sz );
231 63 : }
232 :
233 72 : *_ret = 0;
234 72 : return FD_VM_SUCCESS;
235 72 : }
236 :
237 : int
238 : fd_vm_syscall_sol_alloc_free( /**/ void * _vm,
239 : /**/ ulong sz,
240 : /**/ ulong free_vaddr,
241 : FD_PARAM_UNUSED ulong r3,
242 : FD_PARAM_UNUSED ulong r4,
243 : FD_PARAM_UNUSED ulong r5,
244 0 : /**/ ulong * _ret ) {
245 0 : fd_vm_t * vm = (fd_vm_t *)_vm;
246 :
247 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mod.rs#L666 */
248 :
249 : /* This syscall is ... uh ... problematic. But the community has
250 : already recognized this and deprecated it:
251 :
252 : https://github.com/solana-labs/solana/blob/v1.17.23/sdk/src/feature_set.rs#L846
253 :
254 : Unfortunately, old code never dies so, practically, this will need
255 : to be supported until the heat death of the universe.
256 :
257 : The most serious issue is that there is nothing to stop VM code
258 : making a decision based on the _location_ of the returned
259 : allocation. If different validator implementations use different
260 : allocator algorithms, though each implementation would behave
261 : functionally correct in isolation, the VM code that uses it would
262 : actually break consensus.
263 :
264 : As a result, every validator needs to use a bit-for-bit identical
265 : allocation algorithm. Fortunately, Solana is just using a basic
266 : bump allocator:
267 :
268 : https://github.com/solana-labs/solana/blob/v1.17.23/program-runtime/src/invoke_context.rs#L122-L148
269 :
270 : vm->heap_{sz,max} and the below replicate this exactly.
271 :
272 : Another major issue is that this alloc doesn't always conform
273 : typical malloc/free semantics (e.g. C/C++ requires malloc to have
274 : an alignment safe for primitive types ... 8 for the Solana machine
275 : model). This is clearly to support backward compat with older VM
276 : code (though ideally a malloc syscall should have behaved like ...
277 : well ... malloc from day 1). So the alignment behavior below is a
278 : bug-for-bug replication of that:
279 :
280 : https://github.com/solana-labs/solana/blob/v1.17.23/programs/bpf_loader/src/syscalls/mod.rs#L645-L681
281 : https://github.com/solana-labs/solana/blob/v1.17.23/sdk/program/src/entrypoint.rs#L265-L266
282 :
283 : More generally and already ranted about elsewhere, any code that
284 : uses malloc/free style dynamic allocation is inherently broken. So
285 : this syscall should have never existed in the first place ... it
286 : just feeds the trolls. The above is just additional implementation
287 : horror because people consistent think malloc/free is much simpler
288 : than it actually is. This is also an example of how quickly
289 : mistakes fossilize and become a thorn-in-the-side forever.
290 :
291 : IMPORTANT SAFETY TIP! heap_start must be non zero and both
292 : heap_start and heap_end should have an alignment of at least 8.
293 : This existing runtime policies around heap implicitly satisfy this.
294 :
295 : IMPORTANT SAFETY TIP! The specification for Rust's align_offset
296 : doesn't seem to provide a strong guarantee that it will return the
297 : minimal positive offset necessary to align pointers. It is
298 : possible for a "conforming" Rust compiler to break consensus by
299 : using a different align_offset implementation that aligned pointer
300 : between different compilations of the Solana validator and the
301 : below. */
302 :
303 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mod.rs#L676-L680 */
304 :
305 0 : ulong align = fd_vm_is_check_align_enabled( vm ) ? 8UL : FD_VM_ALIGN_RUST_U8;
306 :
307 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mod.rs#L681-L683
308 : Nothing to do. This section can't error, see:
309 : https://doc.rust-lang.org/1.81.0/src/core/alloc/layout.rs.html#70
310 : https://doc.rust-lang.org/1.81.0/src/core/alloc/layout.rs.html#100 */
311 :
312 :
313 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mod.rs#L684
314 : Nothing to do.
315 : TODO: unclear if it throw InstructionError::CallDepth
316 : https://github.com/anza-xyz/agave/blob/v2.0.8/program-runtime/src/invoke_context.rs#L662 */
317 :
318 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mod.rs#L685-L693 */
319 :
320 : /* Non-zero free address implies that this is a free() call. Since
321 : this is a bump allocator, free is a no-op. */
322 0 : if( FD_UNLIKELY( free_vaddr ) ) {
323 0 : *_ret = 0UL;
324 0 : return FD_VM_SUCCESS;
325 0 : }
326 :
327 :
328 0 : ulong heap_sz = fd_ulong_align_up( vm->heap_sz, align );
329 0 : ulong heap_vaddr = fd_ulong_sat_add ( heap_sz, FD_VM_MEM_MAP_HEAP_REGION_START );
330 0 : /**/ heap_sz = fd_ulong_sat_add ( heap_sz, sz );
331 :
332 0 : if( FD_UNLIKELY( heap_sz > vm->heap_max ) ) { /* Not enough free memory */
333 0 : *_ret = 0UL;
334 0 : return FD_VM_SUCCESS;
335 0 : }
336 :
337 0 : vm->heap_sz = heap_sz;
338 :
339 0 : *_ret = heap_vaddr;
340 0 : return FD_VM_SUCCESS;
341 0 : }
342 :
343 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mem_ops.rs#L145 */
344 : int
345 : fd_vm_memmove( fd_vm_t * vm,
346 : ulong dst_vaddr,
347 : ulong src_vaddr,
348 6051 : ulong sz ) {
349 6051 : if( FD_UNLIKELY( !sz ) ) {
350 0 : return FD_VM_SUCCESS;
351 0 : }
352 :
353 6051 : if( !vm->direct_mapping ) {
354 5532 : void * dst = FD_VM_MEM_HADDR_ST( vm, dst_vaddr, FD_VM_ALIGN_RUST_U8, sz );
355 16572 : void const * src = FD_VM_MEM_HADDR_LD( vm, src_vaddr, FD_VM_ALIGN_RUST_U8, sz );
356 0 : memmove( dst, src, sz );
357 16572 : } else {
358 : /* If the src and dst vaddrs overlap and src_vaddr < dst_vaddr, Agave iterates through input regions backwards
359 : to maintain correct memmove behavior for overlapping cases. Although this logic should only apply to the src and dst
360 : vaddrs being in the input data region (since that is the only possible case you could have overlapping, chunked-up memmoves),
361 : Agave will iterate backwards in ANY region. If it eventually reaches the end of a region after iterating backwards and
362 : hits an access violation, the bytes from [region_begin, start_vaddr] will still be written to, causing fuzzing mismatches.
363 : In this case, if we didn't have the reverse flag, we would have thrown an access violation before any bytes were copied.
364 : The same logic applies to memmoves that go past the high end of a region - reverse iteration logic would throw an access
365 : violation before any bytes were copied, while the current logic would copy the bytes until the end of the region.
366 : https://github.com/anza-xyz/agave/blob/v2.1.0/programs/bpf_loader/src/syscalls/mem_ops.rs#L184 */
367 519 : uchar reverse = !!( dst_vaddr >= src_vaddr && dst_vaddr < src_vaddr + sz );
368 :
369 : /* In reverse calculations, start from the rightmost vaddr that will be accessed (note the - 1). */
370 519 : ulong dst_vaddr_begin = reverse ? fd_ulong_sat_add( dst_vaddr, sz - 1UL ) : dst_vaddr;
371 519 : ulong src_vaddr_begin = reverse ? fd_ulong_sat_add( src_vaddr, sz - 1UL ) : src_vaddr;
372 :
373 : /* Find the correct src and dst haddrs to start operating from. If the src or dst vaddrs
374 : belong to the input data region (4), keep track of region statistics to memmove in chunks. */
375 519 : ulong dst_region = FD_VADDR_TO_REGION( dst_vaddr_begin );
376 519 : uchar dst_is_input_mem_region = ( dst_region==4UL );
377 519 : ulong dst_offset = dst_vaddr_begin & FD_VM_OFFSET_MASK;
378 519 : ulong dst_region_idx = 0UL;
379 519 : ulong dst_bytes_rem_in_cur_region;
380 519 : uchar * dst_haddr;
381 519 : if( dst_is_input_mem_region ) {
382 231 : FD_VM_MEM_HADDR_AND_REGION_IDX_FROM_INPUT_REGION_CHECKED( vm, dst_offset, dst_region_idx, dst_haddr );
383 219 : if( FD_UNLIKELY( !vm->input_mem_regions[ dst_region_idx ].is_writable ) ) {
384 180 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
385 180 : return FD_VM_SYSCALL_ERR_SEGFAULT;
386 180 : }
387 39 : if( FD_UNLIKELY( reverse ) ) {
388 : /* Bytes remaining between region begin and current position (+ 1 for inclusive region beginning). */
389 6 : dst_bytes_rem_in_cur_region = fd_ulong_sat_sub( dst_offset + 1UL, vm->input_mem_regions[ dst_region_idx ].vaddr_offset );
390 33 : } else {
391 : /* Bytes remaining between current position and region end. */
392 33 : dst_bytes_rem_in_cur_region = fd_ulong_sat_sub( vm->input_mem_regions[ dst_region_idx ].region_sz, ( dst_offset - vm->input_mem_regions[ dst_region_idx ].vaddr_offset ) );
393 33 : }
394 288 : } else {
395 288 : dst_haddr = (uchar*)FD_VM_MEM_HADDR_ST_NO_SZ_CHECK( vm, dst_vaddr_begin, FD_VM_ALIGN_RUST_U8 );
396 :
397 246 : if( FD_UNLIKELY( reverse ) ) {
398 : /* Bytes remaining is minimum of the offset from the beginning of the current
399 : region (+1 for inclusive region beginning) and the number of storable bytes in the region. */
400 6 : dst_bytes_rem_in_cur_region = fd_ulong_min( vm->region_st_sz[ dst_region ], dst_offset + 1UL );
401 :
402 240 : } else {
403 : /* Bytes remaining is the number of writable bytes left in the region */
404 240 : dst_bytes_rem_in_cur_region = fd_ulong_sat_sub( vm->region_st_sz[ dst_region ], dst_offset );
405 240 : }
406 246 : }
407 :
408 : /* Logic for src vaddr translation is similar to above excluding any writable checks. */
409 285 : ulong src_region = FD_VADDR_TO_REGION( src_vaddr_begin );
410 285 : uchar src_is_input_mem_region = ( src_region==4UL );
411 285 : ulong src_offset = src_vaddr_begin & FD_VM_OFFSET_MASK;
412 285 : ulong src_region_idx = 0UL;
413 285 : ulong src_bytes_rem_in_cur_region;
414 285 : uchar * src_haddr;
415 285 : if( src_is_input_mem_region ) {
416 144 : FD_VM_MEM_HADDR_AND_REGION_IDX_FROM_INPUT_REGION_CHECKED( vm, src_offset, src_region_idx, src_haddr );
417 144 : if( FD_UNLIKELY( reverse ) ) {
418 6 : src_bytes_rem_in_cur_region = fd_ulong_sat_sub( src_offset + 1UL, vm->input_mem_regions[ src_region_idx ].vaddr_offset );
419 138 : } else {
420 138 : src_bytes_rem_in_cur_region = fd_ulong_sat_sub( vm->input_mem_regions[ src_region_idx ].region_sz, ( src_offset - vm->input_mem_regions[ src_region_idx ].vaddr_offset ) );
421 138 : }
422 144 : } else {
423 423 : src_haddr = (uchar*)FD_VM_MEM_HADDR_LD_NO_SZ_CHECK( vm, src_vaddr_begin, FD_VM_ALIGN_RUST_U8 );
424 :
425 141 : if( FD_UNLIKELY( reverse ) ) {
426 6 : src_bytes_rem_in_cur_region = fd_ulong_min( vm->region_ld_sz[ src_region ], src_offset + 1UL );
427 :
428 135 : } else {
429 135 : src_bytes_rem_in_cur_region = fd_ulong_sat_sub( vm->region_ld_sz[ src_region ], src_offset );
430 135 : }
431 423 : }
432 :
433 : /* Short circuit: if the number of copyable bytes stays within all memory regions,
434 : just memmove and return. This is a majority case in mainnet, devnet, and testnet.
435 : Someone would have to be very crafty and clever to construct a transaction that
436 : deploys and invokes a custom program that does not fall into this branch. */
437 285 : if( FD_LIKELY( sz<=dst_bytes_rem_in_cur_region && sz<=src_bytes_rem_in_cur_region ) ) {
438 78 : if( FD_UNLIKELY( reverse ) ) {
439 : /* In the reverse iteration case, the haddrs point to the end of the region here. Since the
440 : above checks guarantee that there are enough bytes left in the src and dst regions to do
441 : a direct memmove, we can just subtract (sz-1) from the haddrs, memmove, and return. */
442 3 : memmove( dst_haddr - sz + 1UL, src_haddr - sz + 1UL, sz );
443 75 : } else {
444 : /* In normal iteration, the haddrs correspond to the correct starting point for the memcpy,
445 : so no further translation has to be done. */
446 75 : memmove( dst_haddr, src_haddr, sz );
447 75 : }
448 78 : return FD_VM_SUCCESS;
449 78 : }
450 :
451 : /* Copy over the bytes from each region in chunks. */
452 510 : while( sz>0UL ) {
453 : /* End of region case */
454 498 : if( FD_UNLIKELY( dst_bytes_rem_in_cur_region==0UL ) ) {
455 : /* Only proceed if:
456 : - We are in the input memory region
457 : - There are remaining input memory regions to copy from (for both regular and reverse iteration orders)
458 : - The next input memory region is writable
459 : Fail otherwise. */
460 39 : if( FD_LIKELY( !reverse &&
461 39 : dst_is_input_mem_region &&
462 39 : dst_region_idx+1UL<vm->input_mem_regions_cnt &&
463 39 : vm->input_mem_regions[ dst_region_idx+1UL ].is_writable ) ) {
464 : /* In normal iteration, we move the haddr to the beginning of the next region. */
465 9 : dst_region_idx++;
466 9 : dst_haddr = (uchar*)vm->input_mem_regions[ dst_region_idx ].haddr;
467 30 : } else if( FD_LIKELY( reverse &&
468 30 : dst_region_idx>0UL &&
469 30 : vm->input_mem_regions[ dst_region_idx-1UL ].is_writable ) ) {
470 : /* Note that when reverse iterating, we set the haddr to the END of the PREVIOUS region. */
471 3 : dst_region_idx--;
472 3 : dst_haddr = (uchar*)vm->input_mem_regions[ dst_region_idx ].haddr + vm->input_mem_regions[ dst_region_idx ].region_sz - 1UL;
473 27 : } else {
474 27 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
475 27 : return FD_VM_SYSCALL_ERR_SEGFAULT;
476 27 : }
477 12 : dst_bytes_rem_in_cur_region = vm->input_mem_regions[ dst_region_idx ].region_sz;
478 12 : }
479 :
480 471 : if( FD_UNLIKELY( src_bytes_rem_in_cur_region==0UL ) ) {
481 : /* Same as above, except no writable checks. */
482 261 : if( FD_LIKELY( !reverse &&
483 261 : src_is_input_mem_region &&
484 261 : src_region_idx+1UL<vm->input_mem_regions_cnt ) ) {
485 78 : src_region_idx++;
486 78 : src_haddr = (uchar*)vm->input_mem_regions[ src_region_idx ].haddr;
487 183 : } else if( FD_LIKELY( reverse && src_region_idx>0UL ) ) {
488 15 : src_region_idx--;
489 15 : src_haddr = (uchar*)vm->input_mem_regions[ src_region_idx ].haddr + vm->input_mem_regions[ src_region_idx ].region_sz - 1UL;
490 168 : } else {
491 168 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
492 168 : return FD_VM_SYSCALL_ERR_SEGFAULT;
493 168 : }
494 93 : src_bytes_rem_in_cur_region = vm->input_mem_regions[ src_region_idx ].region_sz;
495 93 : }
496 :
497 : /* Number of bytes to operate on in this iteration is the min of:
498 : - number of bytes left to copy
499 : - bytes left in the current src region
500 : - bytes left in the current dst region */
501 303 : ulong num_bytes_to_copy = fd_ulong_min( sz, fd_ulong_min( src_bytes_rem_in_cur_region, dst_bytes_rem_in_cur_region ) );
502 303 : if( FD_UNLIKELY( reverse ) ) {
503 24 : memmove( dst_haddr - num_bytes_to_copy + 1UL, src_haddr - num_bytes_to_copy + 1UL, num_bytes_to_copy );
504 24 : dst_haddr -= num_bytes_to_copy;
505 24 : src_haddr -= num_bytes_to_copy;
506 279 : } else {
507 279 : memmove( dst_haddr, src_haddr, num_bytes_to_copy );
508 279 : dst_haddr += num_bytes_to_copy;
509 279 : src_haddr += num_bytes_to_copy;
510 279 : }
511 :
512 : /* Update size trackers */
513 303 : sz -= num_bytes_to_copy;
514 303 : src_bytes_rem_in_cur_region -= num_bytes_to_copy;
515 303 : dst_bytes_rem_in_cur_region -= num_bytes_to_copy;
516 303 : }
517 207 : }
518 :
519 5535 : return FD_VM_SUCCESS;
520 6051 : }
521 :
522 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mem_ops.rs#L41 */
523 : int
524 : fd_vm_syscall_sol_memmove( /**/ void * _vm,
525 : /**/ ulong dst_vaddr,
526 : /**/ ulong src_vaddr,
527 : /**/ ulong sz,
528 : FD_PARAM_UNUSED ulong r4,
529 : FD_PARAM_UNUSED ulong r5,
530 222 : /**/ ulong * _ret ) {
531 222 : *_ret = 0;
532 222 : fd_vm_t * vm = (fd_vm_t *)_vm;
533 :
534 222 : FD_VM_CU_MEM_OP_UPDATE( vm, sz );
535 :
536 : /* No overlap check for memmove. */
537 0 : return fd_vm_memmove( vm, dst_vaddr, src_vaddr, sz );
538 222 : }
539 :
540 : /* https://github.com/anza-xyz/agave/blob/v2.0.8/programs/bpf_loader/src/syscalls/mem_ops.rs#L18 */
541 : int
542 : fd_vm_syscall_sol_memcpy( /**/ void * _vm,
543 : /**/ ulong dst_vaddr,
544 : /**/ ulong src_vaddr,
545 : /**/ ulong sz,
546 : FD_PARAM_UNUSED ulong r4,
547 : FD_PARAM_UNUSED ulong r5,
548 5904 : /**/ ulong * _ret ) {
549 5904 : *_ret = 0;
550 5904 : fd_vm_t * vm = (fd_vm_t *)_vm;
551 :
552 5904 : FD_VM_CU_MEM_OP_UPDATE( vm, sz );
553 :
554 : /* Exact same as memmove, except also check overlap.
555 : https://github.com/anza-xyz/agave/blob/master/programs/bpf_loader/src/syscalls/mem_ops.rs#L31 */
556 5901 : FD_VM_MEM_CHECK_NON_OVERLAPPING( vm, src_vaddr, sz, dst_vaddr, sz );
557 :
558 5829 : return fd_vm_memmove( vm, dst_vaddr, src_vaddr, sz );
559 5901 : }
560 :
561 : int
562 : fd_vm_syscall_sol_memcmp( /**/ void * _vm,
563 : /**/ ulong m0_vaddr,
564 : /**/ ulong m1_vaddr,
565 : /**/ ulong sz,
566 : /**/ ulong out_vaddr,
567 : FD_PARAM_UNUSED ulong r5,
568 582 : /**/ ulong * _ret ) {
569 582 : *_ret = 0;
570 582 : fd_vm_t * vm = (fd_vm_t *)_vm;
571 :
572 : /* https://github.com/anza-xyz/agave/blob/master/programs/bpf_loader/src/syscalls/mem_ops.rs#L59 */
573 :
574 582 : FD_VM_CU_MEM_OP_UPDATE( vm, sz );
575 :
576 : /* Note: though this behaves like a normal C-style memcmp, we can't
577 : use the compilers / libc memcmp directly because the specification
578 : doesn't provide strong enough guarantees about the return value (it
579 : only promises the sign). */
580 :
581 582 : if( !FD_FEATURE_ACTIVE( vm->instr_ctx->slot_ctx, bpf_account_data_direct_mapping ) ) {
582 1116 : uchar const * m0 = (uchar const *)FD_VM_MEM_SLICE_HADDR_LD( vm, m0_vaddr, FD_VM_ALIGN_RUST_U8, sz );
583 1116 : uchar const * m1 = (uchar const *)FD_VM_MEM_SLICE_HADDR_LD( vm, m1_vaddr, FD_VM_ALIGN_RUST_U8, sz );
584 :
585 : /* Silly that this doesn't use r0 to return ... slower, more edge
586 : case, different from libc style memcmp, harder to callers to use,
587 : etc ... probably too late to do anything about it now ... sigh */
588 :
589 558 : void * _out = FD_VM_MEM_HADDR_ST( vm, out_vaddr, FD_VM_ALIGN_RUST_I32, 4UL );
590 :
591 0 : int out = 0;
592 11760 : for( ulong i=0UL; i<sz; i++ ) {
593 11466 : int i0 = (int)m0[i];
594 11466 : int i1 = (int)m1[i];
595 11466 : if( i0!=i1 ) {
596 264 : out = i0 - i1;
597 264 : break;
598 264 : }
599 11466 : }
600 :
601 558 : fd_memcpy( _out, &out, 4UL ); /* Sigh ... see note above (and might be unaligned ... double sigh) */
602 :
603 558 : return FD_VM_SUCCESS;
604 1116 : } else {
605 : /* In the case that direct mapping is enabled, the behavior for memcmps
606 : differ significantly from the non-dm case. The key difference is that
607 : invalid loads will instantly lead to errors in the non-dm case. However,
608 : when direct mapping is enabled, we will first try to memcmp the largest
609 : size valid chunk first, and will exit successfully if a difference is
610 : found without aborting from the VM. A chunk is defined as the largest
611 : valid vaddr range in both memory regions that doesn't span multiple
612 : regions.
613 :
614 : Example:
615 : fd_vm_syscall_sol_memcmp( vm, m0_addr : 0x4000, m1_vaddr : 0x2000, 0x200, ... );
616 : m0's region: m0_addr 0x4000 -> 0x4000 + 0x50 (region sz 0x50)
617 : m1's region: m1_addr 0x2000 -> 0x2000 + 0x100 (region sz 0x100)
618 : sz: 0x200
619 :
620 : Case 1: 0x4000 -> 0x4050 does have the same bytes as 0x2000 -> 0x2050
621 : Case 2: 0x4000 -> 0x4050 does NOT have the same bytes as 0x2000 -> 0x2050
622 :
623 : Pre-DM:
624 : This will fail out before any bytes are compared because the memory
625 : translation is done first.
626 :
627 : Post-DM:
628 : For case 1, the memcmp will return an error and the VM will exit because
629 : the memcmp will eventually try to access 0x4051 which is invalid. First
630 : 0x50 bytes are compared, but the next chunk will lead to an invalid
631 : access.
632 :
633 : For case 2, the memcmp will first translate the first 0x50 bytes and will
634 : see that the bytes are not the same. This will lead to the syscall
635 : exiting out successfully without detecting the access violation.
636 :
637 : https://github.com/anza-xyz/agave/blob/v2.0.10/programs/bpf_loader/src/syscalls/mem_ops.rs#L213
638 : */
639 :
640 24 : void * _out = FD_VM_MEM_HADDR_ST( vm, out_vaddr, FD_VM_ALIGN_RUST_I32, 4UL );
641 0 : int out = 0;
642 :
643 : /* Lookup host address chunks. Try to do a standard memcpy if the regions
644 : do not cross memory regions. The translation logic is different if the
645 : the virtual address region is the input region vs. not. See the comment
646 : in fd_bpf_loader_serialization for more details on how the input
647 : region is different from other regions. The input data region will try
648 : to lookup the number of remaining bytes in the specific data region. If
649 : the memory access is not in the input data region, assume the bytes in
650 : the current region are bound by the size of the remaining bytes in the
651 : region. */
652 :
653 21 : ulong m0_region = FD_VADDR_TO_REGION( m0_vaddr );
654 21 : ulong m0_offset = m0_vaddr & FD_VM_OFFSET_MASK;
655 21 : ulong m0_region_idx = 0UL;
656 21 : ulong m0_bytes_in_cur_region = sz;
657 21 : uchar * m0_haddr = NULL;
658 21 : if( m0_region==4UL ) {
659 9 : m0_region_idx = fd_vm_get_input_mem_region_idx( vm, m0_offset );
660 9 : m0_haddr = (uchar*)(vm->input_mem_regions[ m0_region_idx ].haddr + m0_offset - vm->input_mem_regions[ m0_region_idx ].vaddr_offset);
661 9 : m0_bytes_in_cur_region = fd_ulong_min( sz, fd_ulong_sat_sub( vm->input_mem_regions[ m0_region_idx ].region_sz,
662 9 : ((ulong)m0_haddr - vm->input_mem_regions[ m0_region_idx ].haddr) ) );
663 12 : } else {
664 : /* We can safely load a slice of 1 byte here because we know that we will
665 : not ever read more than the number of bytes that are left in the
666 : region. */
667 12 : m0_bytes_in_cur_region = fd_ulong_min( sz, vm->region_ld_sz[ m0_region ] - m0_offset );
668 24 : m0_haddr = (uchar *)FD_VM_MEM_SLICE_HADDR_LD_SZ_UNCHECKED( vm, m0_vaddr, FD_VM_ALIGN_RUST_U8 );
669 24 : }
670 :
671 21 : ulong m1_region = FD_VADDR_TO_REGION( m1_vaddr );
672 21 : ulong m1_offset = m1_vaddr & FD_VM_OFFSET_MASK;
673 21 : ulong m1_region_idx = 0UL;
674 21 : ulong m1_bytes_in_cur_region = sz;
675 21 : uchar * m1_haddr = NULL;
676 21 : if( m1_region==4UL ) {
677 6 : m1_region_idx = fd_vm_get_input_mem_region_idx( vm, m1_offset );
678 6 : m1_haddr = (uchar*)(vm->input_mem_regions[ m1_region_idx ].haddr + m1_offset - vm->input_mem_regions[ m1_region_idx ].vaddr_offset);
679 6 : m1_bytes_in_cur_region = fd_ulong_min( sz, fd_ulong_sat_sub( vm->input_mem_regions[ m1_region_idx ].region_sz,
680 6 : ((ulong)m1_haddr - vm->input_mem_regions[ m1_region_idx ].haddr) ) );
681 15 : } else {
682 15 : m1_bytes_in_cur_region = fd_ulong_min( sz, vm->region_ld_sz[ m1_region ] - m1_offset );
683 30 : m1_haddr = (uchar *)FD_VM_MEM_SLICE_HADDR_LD_SZ_UNCHECKED( vm, m1_vaddr, FD_VM_ALIGN_RUST_U8 );
684 30 : }
685 :
686 : /* Case where the operation spans multiple regions. Copy over the bytes
687 : from each region while iterating to the next one. */
688 : /* TODO: An optimization would be to memcmp chunks at once */
689 21 : ulong m0_idx = 0UL;
690 21 : ulong m1_idx = 0UL;
691 8625 : for( ulong i=0UL; i<sz; i++ ) {
692 8622 : if( FD_UNLIKELY( !m0_bytes_in_cur_region ) ) {
693 : /* If the memory is not in the input region or it is the last input
694 : memory region, that means that if we don't exit now we will have
695 : an access violation. */
696 12 : if( FD_UNLIKELY( m0_region!=4UL || ++m0_region_idx>=vm->input_mem_regions_cnt ) ) {
697 6 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
698 6 : return FD_VM_SYSCALL_ERR_SEGFAULT;
699 6 : }
700 : /* Otherwise, query the next input region. */
701 6 : m0_haddr = (uchar*)vm->input_mem_regions[ m0_region_idx ].haddr;
702 6 : m0_idx = 0UL;
703 6 : m0_bytes_in_cur_region = vm->input_mem_regions[ m0_region_idx ].region_sz;
704 6 : }
705 8616 : if( FD_UNLIKELY( !m1_bytes_in_cur_region ) ) {
706 0 : if( FD_UNLIKELY( m1_region!=4UL || ++m1_region_idx>=vm->input_mem_regions_cnt ) ) {
707 0 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
708 0 : return FD_VM_SYSCALL_ERR_SEGFAULT;
709 0 : }
710 0 : m1_haddr = (uchar*)vm->input_mem_regions[ m1_region_idx ].haddr;
711 0 : m1_idx = 0UL;
712 0 : m1_bytes_in_cur_region = vm->input_mem_regions[ m1_region_idx ].region_sz;
713 0 : }
714 :
715 8616 : int i0 = (int)m0_haddr[ m0_idx ];
716 8616 : int i1 = (int)m1_haddr[ m1_idx ];
717 8616 : if( i0!=i1 ) {
718 12 : out = i0 - i1;
719 12 : break;
720 12 : }
721 :
722 8604 : m0_bytes_in_cur_region--;
723 8604 : m1_bytes_in_cur_region--;
724 8604 : m0_idx++;
725 8604 : m1_idx++;
726 8604 : }
727 15 : fd_memcpy( _out, &out, 4UL ); /* Sigh ... see note above (and might be unaligned ... double sigh) */
728 15 : return FD_VM_SUCCESS;
729 21 : }
730 582 : }
731 :
732 : int
733 : fd_vm_syscall_sol_memset( /**/ void * _vm,
734 : /**/ ulong dst_vaddr,
735 : /**/ ulong c,
736 : /**/ ulong sz,
737 : FD_PARAM_UNUSED ulong r4,
738 : FD_PARAM_UNUSED ulong r5,
739 264 : /**/ ulong * _ret ) {
740 264 : fd_vm_t * vm = (fd_vm_t *)_vm;
741 264 : *_ret = 0;
742 :
743 : /* https://github.com/anza-xyz/agave/blob/master/programs/bpf_loader/src/syscalls/mem_ops.rs#L115 */
744 :
745 264 : FD_VM_CU_MEM_OP_UPDATE( vm, sz );
746 :
747 264 : if( FD_UNLIKELY( !sz ) ) {
748 9 : return FD_VM_SUCCESS;
749 9 : }
750 :
751 255 : ulong region = FD_VADDR_TO_REGION( dst_vaddr );
752 255 : ulong offset = dst_vaddr & FD_VM_OFFSET_MASK;
753 255 : uchar * haddr;
754 :
755 255 : int b = (int)(c & 255UL);
756 :
757 255 : if( !vm->direct_mapping ) {
758 90 : haddr = FD_VM_MEM_HADDR_ST( vm, dst_vaddr, FD_VM_ALIGN_RUST_U8, sz );
759 0 : fd_memset( haddr, b, sz );
760 165 : } else if( region!=4UL ) {
761 : /* I acknowledge that this is not an ideal implementation, but Agave will
762 : memset as many bytes as possible until it reaches an unwritable section.
763 : This is purely just for fuzzing conformance, sigh... */
764 51 : haddr = (uchar*)FD_VM_MEM_HADDR_ST_FAST( vm, dst_vaddr );
765 51 : ulong bytes_in_cur_region = fd_ulong_sat_sub( vm->region_st_sz[ region ], offset );
766 51 : ulong bytes_to_set = fd_ulong_min( sz, bytes_in_cur_region );
767 51 : fd_memset( haddr, b, bytes_to_set );
768 51 : if( FD_UNLIKELY( bytes_to_set<sz ) ) {
769 45 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
770 45 : return FD_VM_SYSCALL_ERR_SEGFAULT;
771 45 : }
772 114 : } else {
773 : /* In this case, we are in the input region AND direct mapping is enabled.
774 : Get the haddr and input region and check if it's writable. */
775 114 : ulong region_idx;
776 114 : FD_VM_MEM_HADDR_AND_REGION_IDX_FROM_INPUT_REGION_CHECKED( vm, offset, region_idx, haddr );
777 0 : ulong offset_in_cur_region = offset - vm->input_mem_regions[ region_idx ].vaddr_offset;
778 114 : ulong bytes_in_cur_region = fd_ulong_sat_sub( vm->input_mem_regions[ region_idx ].region_sz, offset_in_cur_region );
779 :
780 : /* Memset goes into multiple regions. */
781 174 : while( sz>0UL ) {
782 : /* Check that current region is writable */
783 174 : if( FD_UNLIKELY( !vm->input_mem_regions[ region_idx ].is_writable ) ) {
784 96 : break;
785 96 : }
786 :
787 : /* Memset bytes */
788 78 : ulong num_bytes_to_set = fd_ulong_min( sz, bytes_in_cur_region );
789 78 : fd_memset( haddr, b, num_bytes_to_set );
790 78 : sz -= num_bytes_to_set;
791 :
792 : /* If no more regions left, break. */
793 78 : if( ++region_idx==vm->input_mem_regions_cnt ) {
794 18 : break;
795 18 : }
796 :
797 : /* Move haddr to next region. */
798 60 : haddr = (uchar*)vm->input_mem_regions[ region_idx ].haddr;
799 60 : bytes_in_cur_region = vm->input_mem_regions[ region_idx ].region_sz;
800 60 : }
801 :
802 : /* If we were not able to successfully set all the bytes, throw an error. */
803 114 : if( FD_UNLIKELY( sz>0 ) ) {
804 108 : FD_VM_ERR_FOR_LOG_EBPF( vm, FD_VM_ERR_EBPF_ACCESS_VIOLATION );
805 108 : return FD_VM_SYSCALL_ERR_SEGFAULT;
806 108 : }
807 114 : }
808 99 : return FD_VM_SUCCESS;
809 255 : }
|