Line data Source code
1 : #ifndef HEADER_fd_src_flamenco_runtime_fd_runtime_h
2 : #define HEADER_fd_src_flamenco_runtime_fd_runtime_h
3 :
4 : #include "../fd_flamenco_base.h"
5 : #include "fd_runtime_err.h"
6 : #include "fd_runtime_init.h"
7 : #include "fd_rocksdb.h"
8 : #include "fd_acc_mgr.h"
9 : #include "../features/fd_features.h"
10 : #include "fd_rent_lists.h"
11 : #include "../../ballet/poh/fd_poh.h"
12 : #include "../leaders/fd_leaders.h"
13 : #include "context/fd_exec_epoch_ctx.h"
14 : #include "context/fd_exec_slot_ctx.h"
15 : #include "context/fd_capture_ctx.h"
16 : #include "context/fd_exec_txn_ctx.h"
17 : #include "info/fd_block_info.h"
18 : #include "info/fd_instr_info.h"
19 : #include "../gossip/fd_gossip.h"
20 : #include "../repair/fd_repair.h"
21 : #include "../../ballet/pack/fd_microblock.h"
22 :
23 : /* Various constant values used by the runtime. */
24 :
25 6864 : #define MICRO_LAMPORTS_PER_LAMPORT (1000000UL)
26 :
27 : #define DEFAULT_HASHES_PER_TICK (12500)
28 0 : #define UPDATED_HASHES_PER_TICK2 (17500)
29 0 : #define UPDATED_HASHES_PER_TICK3 (27500)
30 0 : #define UPDATED_HASHES_PER_TICK4 (47500)
31 0 : #define UPDATED_HASHES_PER_TICK5 (57500)
32 0 : #define UPDATED_HASHES_PER_TICK6 (62500)
33 :
34 : #define FD_RUNTIME_TRACE_NONE (0)
35 : #define FD_RUNTIME_TRACE_SAVE (1)
36 : #define FD_RUNTIME_TRACE_REPLAY (2)
37 :
38 0 : #define FD_RUNTIME_NUM_ROOT_BLOCKS (32UL)
39 :
40 5961648 : #define FD_FEATURE_ACTIVE(_slot_ctx, _feature_name) (_slot_ctx->slot_bank.slot >= _slot_ctx->epoch_ctx->features. _feature_name)
41 0 : #define FD_FEATURE_JUST_ACTIVATED(_slot_ctx, _feature_name) (_slot_ctx->slot_bank.slot == _slot_ctx->epoch_ctx->features. _feature_name)
42 :
43 12741 : #define FD_BLOCKHASH_QUEUE_MAX_ENTRIES (300UL)
44 2623155 : #define FD_RECENT_BLOCKHASHES_MAX_ENTRIES (150UL)
45 :
46 828 : #define FD_RENT_EXEMPT_RENT_EPOCH (ULONG_MAX)
47 :
48 12741 : #define SECONDS_PER_YEAR ((double)(365.242199 * 24.0 * 60.0 * 60.0))
49 :
50 : /* TODO: increase this to default once we have enough memory to support a 95G status cache. */
51 0 : #define MAX_CACHE_TXNS_PER_SLOT (FD_TXNCACHE_DEFAULT_MAX_TRANSACTIONS_PER_SLOT / 8)
52 :
53 : /* This is the reasonably tight upper bound for the number of writable
54 : accounts in a slot. This is because a block has a limit of 48 million
55 : compute units. Each writable account lock costs 300 CUs. That means there
56 : can be up to 48M/300 writable accounts in a block. */
57 0 : #define FD_WRITABLE_ACCS_IN_SLOT (160000UL)
58 :
59 : struct fd_execute_txn_task_info {
60 : fd_spad_t * * spads;
61 : fd_exec_txn_ctx_t * txn_ctx;
62 : fd_txn_p_t * txn;
63 : int exec_res;
64 : };
65 : typedef struct fd_execute_txn_task_info fd_execute_txn_task_info_t;
66 :
67 : struct fd_raw_block_txn_iter {
68 : fd_block_entry_batch_t const * curr_batch;
69 : uchar const * orig_data;
70 : ulong remaining_batches;
71 : ulong remaining_microblocks;
72 : ulong remaining_txns;
73 : ulong curr_offset;
74 :
75 : ulong curr_txn_sz;
76 : };
77 :
78 : typedef struct fd_raw_block_txn_iter fd_raw_block_txn_iter_t;
79 :
80 : /* The below logic is used to size out the memory footprint generated by the
81 : runtime during transaction execution. */
82 :
83 : /* The prevailing layout we have in the runtime is the meta followed by
84 : the account's data. This struct encodes that layout and asserts that
85 : the alignment requirements of the constituents are satisfied. */
86 : // TODO: Use this struct at allocation sites so it's clear we use this layout
87 : struct __attribute__((packed)) fd_account_rec {
88 : fd_account_meta_t meta;
89 : uchar data[];
90 : };
91 : typedef struct fd_account_rec fd_account_rec_t;
92 999903 : #define FD_ACCOUNT_REC_ALIGN (8UL)
93 : #define FD_ACCOUNT_REC_DATA_ALIGN (8UL)
94 : FD_STATIC_ASSERT( FD_ACCOUNT_REC_ALIGN>=FD_ACCOUNT_META_ALIGN, account_rec_meta_align );
95 : FD_STATIC_ASSERT( FD_ACCOUNT_REC_ALIGN>=FD_ACCOUNT_REC_DATA_ALIGN, account_rec_data_align );
96 : FD_STATIC_ASSERT( (offsetof(fd_account_rec_t, meta)%FD_ACCOUNT_META_ALIGN)==0, account_rec_meta_offset );
97 : FD_STATIC_ASSERT( (offsetof(fd_account_rec_t, data)%FD_ACCOUNT_REC_DATA_ALIGN)==0, account_rec_data_offset );
98 :
99 418839 : #define MAX_PERMITTED_DATA_INCREASE (10240UL) // 10KB
100 826443 : #define FD_BPF_ALIGN_OF_U128 (8UL )
101 : FD_STATIC_ASSERT( FD_BPF_ALIGN_OF_U128==FD_ACCOUNT_REC_DATA_ALIGN, input_data_align );
102 409680 : #define FD_RUNTIME_INPUT_REGION_ALLOC_ALIGN_UP (16UL)
103 :
104 : /******** These macros bound out memory footprint ********/
105 :
106 : /* The tight upper bound on borrowed account footprint over the
107 : execution of a single transaction. */
108 408870 : #define FD_RUNTIME_BORROWED_ACCOUNT_FOOTPRINT (MAX_TX_ACCOUNT_LOCKS * fd_ulong_align_up( FD_ACC_TOT_SZ_MAX, FD_ACCOUNT_REC_ALIGN ))
109 :
110 : /* The tight-ish upper bound on input region footprint over the
111 : execution of a single transaction. See input serialization code for
112 : reference: fd_bpf_loader_serialization.c
113 :
114 : This bound is based off of the transaction MTU. We consider the
115 : question of what kind of transaction one would construct to
116 : maximally bloat the input region.
117 : The worst case scenario is when every nested instruction references
118 : all unique accounts in the transaction. A transaction can lock a max
119 : of MAX_TX_ACCOUNT_LOCKS accounts. Then all remaining input account
120 : references are going to be duplicates, which cost 1 byte to specify
121 : offset in payload, and which cost 8 bytes during serialization. Then
122 : there would be 0 bytes of instruction data, because they exist byte
123 : for byte in the raw payload, which is not a worthwhile bloat factor.
124 : */
125 : #define FD_RUNTIME_INPUT_REGION_UNIQUE_ACCOUNT_FOOTPRINT(direct_mapping) \
126 408870 : (1UL /* dup byte */ + \
127 408870 : sizeof(uchar) /* is_signer */ + \
128 408870 : sizeof(uchar) /* is_writable */ + \
129 408870 : sizeof(uchar) /* executable */ + \
130 408870 : sizeof(uint) /* original_data_len */ + \
131 408870 : sizeof(fd_pubkey_t) /* key */ + \
132 408870 : sizeof(fd_pubkey_t) /* owner */ + \
133 408870 : sizeof(ulong) /* lamports */ + \
134 408870 : sizeof(ulong) /* data len */ + \
135 408870 : (direct_mapping ? FD_BPF_ALIGN_OF_U128 : fd_ulong_align_up( FD_ACC_SZ_MAX, FD_BPF_ALIGN_OF_U128 )) + \
136 408870 : MAX_PERMITTED_DATA_INCREASE + \
137 408870 : sizeof(ulong)) /* rent_epoch */
138 :
139 : #define FD_RUNTIME_INPUT_REGION_INSN_FOOTPRINT(account_lock_limit, direct_mapping) \
140 408870 : (fd_ulong_align_up( (sizeof(ulong) /* acct_cnt */ + \
141 408870 : account_lock_limit*FD_RUNTIME_INPUT_REGION_UNIQUE_ACCOUNT_FOOTPRINT(direct_mapping) + \
142 408870 : sizeof(ulong) /* instr data len */ + \
143 408870 : /* No instr data */ \
144 408870 : sizeof(fd_pubkey_t)), /* program id */ \
145 408870 : FD_RUNTIME_INPUT_REGION_ALLOC_ALIGN_UP ) + FD_BPF_ALIGN_OF_U128)
146 :
147 : #define FD_RUNTIME_INPUT_REGION_TXN_FOOTPRINT(account_lock_limit, direct_mapping) \
148 408870 : ((FD_MAX_INSTRUCTION_STACK_DEPTH*FD_RUNTIME_INPUT_REGION_INSN_FOOTPRINT(account_lock_limit, direct_mapping)) + \
149 408870 : ((FD_TXN_MTU-FD_TXN_MIN_SERIALIZED_SZ-account_lock_limit)*8UL)) /* We can have roughly this much duplicate offsets */
150 :
151 : /* Bincode valloc footprint over the execution of a single transaction.
152 : As well as other footprint specific to each native program type.
153 :
154 : N.B. We know that bincode valloc footprint is bounded, because
155 : whenever we alloc something, we advance our pointer into the binary
156 : buffer, so eventually we are gonna reach the end of the buffer.
157 : This buffer is usually backed by and ultimately bounded in size by
158 : either accounts data or the transaction MTU.
159 :
160 : That being said, it's not obvious what the tight upper bound would
161 : be for allocations across all possible execution paths of all native
162 : programs, including possible CPIs from native programs. The
163 : footprint estimate here is based on a manual review of our native
164 : program implementation. Note that even if the possible paths remain
165 : steady at the Solana protocol level, the footprint is subject to
166 : change when we change our implementation.
167 :
168 : ### Native programs
169 : ALUT (migrated to BPF)
170 : Loader
171 : - rodata for bpf program relocation and validation
172 : Compute budget (0 allocations)
173 : Config (migrated to BPF)
174 : Precompile (0 allocations)
175 : Stake
176 : - The instruction with the largest footprint is deactivate_delinquent
177 : - During instruction decode, no allocations
178 : - During execution, this is (vote account get_state() + vote convert_to_current()) times 2, once for delinquent_vote_account, and once for reference_vote_account
179 : System
180 : - system_program_instruction_decode seed
181 : Vote
182 : - The instruction with the largest footprint is compact vote state update
183 : - During instruction decode, this is 9*lockouts_len bytes, MTU bounded
184 : - During execution, this is vote account get_state() + vote convert_to_current() + 12*lockouts_len bytes + lockouts_len ulong + deq_fd_landed_vote_t_alloc(lockouts_len)
185 : Zk Elgamal (0 allocations)
186 :
187 : The largest footprint is hence deactivate_delinquent, in which the
188 : two get_state() calls dominate the footprint. In particular, the
189 : authorized_voters treaps bloat 40 bytes (epoch+pubkey) in a vote
190 : account to 72 bytes (sizeof(fd_vote_authorized_voter_t)) in memory.
191 : */
192 408870 : #define FD_RUNTIME_BINCODE_AND_NATIVE_FOOTPRINT (2UL*FD_ACC_SZ_MAX*72UL/40UL)
193 :
194 : /* Misc other footprint. */
195 408870 : #define FD_RUNTIME_SYSCALL_TABLE_FOOTPRINT (FD_MAX_INSTRUCTION_STACK_DEPTH*fd_ulong_align_up( fd_sbpf_syscalls_footprint(), fd_sbpf_syscalls_align() ))
196 :
197 : #ifdef FD_DEBUG_SBPF_TRACES
198 : #define FD_RUNTIME_VM_TRACE_EVENT_MAX (1UL<<30)
199 : #define FD_RUNTIME_VM_TRACE_EVENT_DATA_MAX (2048UL)
200 : #define FD_RUNTIME_VM_TRACE_FOOTPRINT (FD_MAX_INSTRUCTION_STACK_DEPTH*fd_ulong_align_up( fd_vm_trace_footprint( FD_RUNTIME_VM_TRACE_EVENT_MAX, FD_RUNTIME_VM_TRACE_EVENT_DATA_MAX ), fd_vm_trace_align() ))
201 : #else
202 408870 : #define FD_RUNTIME_VM_TRACE_FOOTPRINT (0UL)
203 : #endif
204 :
205 408870 : #define FD_RUNTIME_MISC_FOOTPRINT (FD_RUNTIME_SYSCALL_TABLE_FOOTPRINT+FD_RUNTIME_VM_TRACE_FOOTPRINT)
206 :
207 : /* Now finally, we bound out the footprint of transaction execution. */
208 : #define FD_RUNTIME_TRANSACTION_EXECUTION_FOOTPRINT(account_lock_limit, direct_mapping) \
209 408870 : (FD_RUNTIME_BORROWED_ACCOUNT_FOOTPRINT + \
210 408870 : FD_RUNTIME_INPUT_REGION_TXN_FOOTPRINT(account_lock_limit, direct_mapping) + \
211 408870 : FD_RUNTIME_BINCODE_AND_NATIVE_FOOTPRINT + \
212 408870 : FD_RUNTIME_MISC_FOOTPRINT)
213 :
214 : /* Convenience macros for common use cases.
215 :
216 : TODO: If account lock limits are increased to 128, this macro will need to be updated. */
217 408870 : #define FD_RUNTIME_TRANSACTION_EXECUTION_FOOTPRINT_FUZZ FD_RUNTIME_TRANSACTION_EXECUTION_FOOTPRINT(64UL, 0)
218 : #define FD_RUNTIME_TRANSACTION_EXECUTION_FOOTPRINT_DEFAULT FD_RUNTIME_TRANSACTION_EXECUTION_FOOTPRINT(64UL, 0)
219 :
220 : /* Helpers for runtime spad frame management. */
221 : struct fd_runtime_spad_verify_handle_private {
222 : fd_spad_t * spad;
223 : fd_exec_txn_ctx_t * txn_ctx;
224 : };
225 : typedef struct fd_runtime_spad_verify_handle_private fd_runtime_spad_verify_handle_private_t;
226 :
227 : static inline void
228 68853 : fd_runtime_spad_private_frame_end( fd_runtime_spad_verify_handle_private_t * _spad_handle ) {
229 : /* fd_spad_verify() returns 0 if everything looks good, and non-zero
230 : otherwise.
231 :
232 : Since the fast spad alloc API doesn't check for or indicate an OOM
233 : situation and is going to happily permit an OOB alloc, we need
234 : some way of detecting that. Moreover, we would also like to detect
235 : unbalanced frame push/pop or usage of more frames than allowed.
236 : While surrounding the spad with guard regions will help detect the
237 : former, it won't necessarily catch the latter.
238 :
239 : On compliant transactions, fd_spad_verify() isn't all that
240 : expensive. Nonetheless, We invoke fd_spad_verify() only at the
241 : peak of memory usage, and not gratuitously everywhere. One peak
242 : would be right before we do the most deeply nested spad frame pop.
243 : However, we do pops through compiler-inserted cleanup functions
244 : that take only a single pointer, so we define this helper function
245 : to access the needed context info. The end result is that we do
246 : super fast spad calls everywhere in the runtime, and every now and
247 : then we invoke verify to check things. */
248 : /* -1UL because spad pop is called after instr stack pop. */
249 68853 : if( FD_UNLIKELY( _spad_handle->txn_ctx->instr_stack_sz>=FD_MAX_INSTRUCTION_STACK_DEPTH-1UL && fd_spad_verify( _spad_handle->txn_ctx->spad ) ) ) {
250 0 : uchar const * txn_signature = (uchar const *)fd_txn_get_signatures( _spad_handle->txn_ctx->txn_descriptor, _spad_handle->txn_ctx->_txn_raw->raw );
251 0 : FD_BASE58_ENCODE_64_BYTES( txn_signature, sig );
252 0 : FD_LOG_ERR(( "spad corrupted or overflown on transaction %s", sig ));
253 0 : }
254 68853 : fd_spad_pop( _spad_handle->spad );
255 68853 : }
256 :
257 68853 : #define FD_RUNTIME_TXN_SPAD_FRAME_BEGIN(_spad, _txn_ctx) do { \
258 68853 : fd_runtime_spad_verify_handle_private_t _spad_handle __attribute__((cleanup(fd_runtime_spad_private_frame_end))) = \
259 68853 : (fd_runtime_spad_verify_handle_private_t) { .spad = _spad, .txn_ctx = _txn_ctx }; \
260 68853 : fd_spad_push( _spad_handle.spad ); \
261 68853 : do
262 :
263 68853 : #define FD_RUNTIME_TXN_SPAD_FRAME_END while(0); } while(0)
264 :
265 : FD_PROTOTYPES_BEGIN
266 :
267 : /* Runtime Helpers ************************************************************/
268 :
269 : void
270 : fd_runtime_update_leaders( fd_exec_slot_ctx_t * slot_ctx, ulong slot );
271 :
272 : int
273 : fd_runtime_sysvar_cache_load( fd_exec_slot_ctx_t * slot_ctx );
274 :
275 : /* TODO: Invoked by fd_executor: layering violation. Rent logic is deprecated
276 : and will be torn out entirely very soon. */
277 : ulong
278 : fd_runtime_collect_rent_from_account( fd_exec_slot_ctx_t const * slot_ctx,
279 : fd_account_meta_t * acc,
280 : fd_pubkey_t const * key,
281 : ulong epoch );
282 :
283 : /* Block Level Execution Prep/Finalize ****************************************/
284 :
285 : int
286 : fd_runtime_block_execute_prepare( fd_exec_slot_ctx_t * slot_ctx );
287 :
288 : int
289 : fd_runtime_block_execute_finalize_tpool( fd_exec_slot_ctx_t * slot_ctx,
290 : fd_capture_ctx_t * capture_ctx,
291 : fd_block_info_t const * block_info,
292 : fd_tpool_t * tpool );
293 :
294 : /* Transaction Level Execution Management *************************************/
295 :
296 : /* fd_runtime_prepare_txns_start and fd_runtime_pre_execute_check are only
297 : publicly exposed for the fuzzing harnesses. These functions are responsible
298 : for various transaction sanitization checks. */
299 :
300 : int
301 : fd_runtime_prepare_txns_start( fd_exec_slot_ctx_t * slot_ctx,
302 : fd_execute_txn_task_info_t * task_info,
303 : fd_txn_p_t * txns,
304 : ulong txn_cnt );
305 :
306 : void
307 : fd_runtime_pre_execute_check( fd_execute_txn_task_info_t * task_info );
308 :
309 : /* fd_runtime_process_txns is responsible for end-to-end preparing, executing,
310 : and finalizing a list of transactions. It will execute all of the
311 : transactions on a single core. */
312 : int
313 : fd_runtime_process_txns( fd_exec_slot_ctx_t * slot_ctx,
314 : fd_spad_t * spad,
315 : fd_capture_ctx_t * capture_ctx,
316 : fd_txn_p_t * txns,
317 : ulong txn_cnt );
318 :
319 :
320 : /* fd_runtime_execute_txns_in_waves_tpool is responsible for end-to-end
321 : preparing, executing and finalizng a list of transactions. It will schedule
322 : out a set of transactions to maximize parallelism*/
323 : int
324 : fd_runtime_process_txns_in_waves_tpool( fd_exec_slot_ctx_t * slot_ctx,
325 : fd_capture_ctx_t * capture_ctx,
326 : fd_txn_p_t * txns,
327 : ulong txn_cnt,
328 : fd_tpool_t * tpool,
329 : fd_spad_t * * spads,
330 : ulong spads_cnt );
331 :
332 : /* fd_runtime_process_txns and fd_runtime_execute_txns_in_waves_tpool are
333 : both entrypoints for executing transactions. Currently, the former is used
334 : in the leader pipeline as conflict-free microblocks are streamed in from the
335 : pack tile. The latter is used for replaying non-leader blocks. Currently the
336 : entire block must be recieved to start replaying. This allows us to scheedule
337 : out all of the transactions. Eventually, transactions will be executed in
338 : a streamed fashion.*/
339 :
340 : /* Epoch Boundary *************************************************************/
341 :
342 : uint
343 : fd_runtime_is_epoch_boundary( fd_epoch_bank_t * epoch_bank,
344 : ulong curr_slot,
345 : ulong prev_slot );
346 :
347 : void
348 : fd_process_new_epoch( fd_exec_slot_ctx_t * slot_ctx,
349 : ulong parent_epoch );
350 :
351 : /* Debugging Tools ************************************************************/
352 :
353 : void
354 : fd_runtime_checkpt( fd_capture_ctx_t * capture_ctx,
355 : fd_exec_slot_ctx_t * slot_ctx,
356 : ulong slot );
357 :
358 : /* TODO: This logic is very old and likely needs to be reworked to work with the
359 : snapshot service. It is not being removed as the logic can probably reused
360 : for snapshot minimization. */
361 :
362 : void
363 : fd_runtime_collect_rent_accounts_prune( ulong slot,
364 : fd_exec_slot_ctx_t * slot_ctx,
365 : fd_capture_ctx_t * capture_ctx );
366 :
367 : /* Block Parsing **************************************************************/
368 :
369 : /* Live Replay APIs */
370 :
371 : fd_raw_block_txn_iter_t
372 : fd_raw_block_txn_iter_init( uchar const * orig_data,
373 : fd_block_entry_batch_t const * batches,
374 : ulong batch_cnt );
375 :
376 : ulong
377 : fd_raw_block_txn_iter_done( fd_raw_block_txn_iter_t iter );
378 :
379 : fd_raw_block_txn_iter_t
380 : fd_raw_block_txn_iter_next( fd_raw_block_txn_iter_t iter );
381 :
382 : void
383 : fd_raw_block_txn_iter_ele( fd_raw_block_txn_iter_t iter, fd_txn_p_t * out_txn );
384 :
385 : /* Offline Replay *************************************************************/
386 :
387 : int
388 : fd_runtime_block_eval_tpool( fd_exec_slot_ctx_t * slot_ctx,
389 : fd_capture_ctx_t * capture_ctx,
390 : fd_tpool_t * tpool,
391 : ulong scheduler,
392 : ulong * txn_cnt,
393 : fd_spad_t * * spads,
394 : ulong spads_cnt );
395 :
396 : /* Genesis ********************************************************************/
397 :
398 : void
399 : fd_runtime_read_genesis( fd_exec_slot_ctx_t * slot_ctx,
400 : char const * genesis_filepath,
401 : uchar is_snapshot,
402 : fd_capture_ctx_t * capture_ctx,
403 : fd_tpool_t * tpool );
404 :
405 : FD_PROTOTYPES_END
406 :
407 : #endif /* HEADER_fd_src_flamenco_runtime_fd_runtime_h */
|