Line data Source code
1 : #ifndef HEADER_fd_src_discof_replay_fd_sched_h 2 : #define HEADER_fd_src_discof_replay_fd_sched_h 3 : 4 : #include "fd_rdisp.h" 5 : #include "../../disco/store/fd_store.h" /* for fd_store_fec_t */ 6 : #include "../../disco/pack/fd_microblock.h" /* for fd_txn_p_t */ 7 : 8 : #include "../../funk/fd_funk_base.h" /* for ALUTs */ 9 : #include "../../util/spad/fd_spad.h" /* for ALUTs */ 10 : 11 : /* fd_sched wraps all the smarts and mechanical chores around scheduling 12 : transactions for replay execution. It is built on top of the 13 : dispatcher fd_rdisp. The dispatcher is responsible for high 14 : performance lane-based scheduling of transactions. On top of that, 15 : we add fork-aware management of lanes, and policies regarding which 16 : lanes to priotize for execution. 17 : 18 : Conceptually, transactions in a block form a DAG. We would like to 19 : make our way through a block with a sufficient degree of parallelism, 20 : such that the execution time of the critical path of the DAG is the 21 : limiting factor. The dispatcher does a good job of emerging the 22 : critical path of the DAG on the fly. Blocks are tracked by the 23 : dispatcher either as a block staged on a lane, or as an unstaged 24 : block. When a block is staged, it will enjoy the most intelligent 25 : online scheduling that the dispatcher has to offer. Lanes have to 26 : consist of linear chains of blocks down a fork. So to map a fork 27 : tree to lanes, we will need multiple lanes. Ideally, every branch in 28 : the fork tree sits on some lane. However, memory footprint limits us 29 : to a few number of lanes. 30 : 31 : This module implements a state machine for ensuring that blocks enter 32 : into and exit ouf of lanes in an orderly fashion. The public APIs of 33 : this module are invoked to drive state transitions on a small number 34 : of events, such as new transactions arriving, or transactions 35 : completing, or a block being aborted/abandoned. We also implement 36 : policies for deciding which blocks get staged onto lanes, or evicted 37 : from lanes, as well as which lanes to prioritize for execution. 38 : 39 : 40 : The general order in which calls happen under the normal case is: 41 : 42 : fd_sched_fec_ingest()* ... fd_sched_txn_next_ready()* ... fd_sched_txn_done()* ... 43 : more ingest, more ready, more done ... 44 : ... 45 : fd_sched_txn_next_ready() indicates that the last transaction in the block is being scheduled 46 : fd_sched_txn_done()* 47 : fd_sched_block_is_done() 48 : end-of-block processing in caller 49 : fd_sched_txn_next_ready() starts returning transactions from the next block 50 : more ingest, more ready, more done ... 51 : ... */ 52 : 53 : struct fd_sched; 54 : typedef struct fd_sched fd_sched_t; 55 : 56 : typedef fd_eslot_t fd_sched_block_id_t; 57 : 58 : struct fd_sched_alut_ctx { 59 : fd_funk_t * funk; 60 : fd_funk_txn_t * funk_txn; 61 : ulong els; /* effective lookup slot */ 62 : fd_spad_t * runtime_spad; 63 : }; 64 : typedef struct fd_sched_alut_ctx fd_sched_alut_ctx_t; 65 : 66 : struct fd_sched_fec { 67 : fd_sched_block_id_t block_id; 68 : fd_sched_block_id_t parent_block_id; 69 : fd_store_fec_t * fec; 70 : uint shred_cnt; 71 : uint is_last_in_batch:1; /* set if this is the last FEC set in the batch; relevant because the 72 : parser should ignore trailing bytes at the end of a batch */ 73 : uint is_last_in_block:1; /* set if this is the last FEC set in the block */ 74 : 75 : fd_sched_alut_ctx_t alut_ctx[ 1 ]; 76 : }; 77 : typedef struct fd_sched_fec fd_sched_fec_t; 78 : 79 0 : #define FD_SCHED_TXN_ID_NULL (ULONG_MAX) 80 0 : #define FD_SCHED_TXN_ID_BLOCK_START (ULONG_MAX-1UL) 81 0 : #define FD_SCHED_TXN_ID_BLOCK_END (ULONG_MAX-2UL) 82 : 83 : struct fd_sched_txn_ready { 84 : fd_sched_block_id_t block_id; 85 : fd_sched_block_id_t parent_block_id; 86 : ulong txn_id; 87 : uint block_end:1; /* set for the final sentinel in the block; relevant because the 88 : caller might need to do end-of-block processing; special txn_id */ 89 : uint block_start:1; /* set for the first sentinel in the block; relevant because the 90 : caller might need to do start-of-block processing; special txn_id */ 91 : }; 92 : typedef struct fd_sched_txn_ready fd_sched_txn_ready_t; 93 : 94 : FD_PROTOTYPES_BEGIN 95 : 96 : /* fd_sched_{align,footprint} return the required alignment and 97 : footprint in bytes for a region of memory to be used as a scheduler. 98 : */ 99 : ulong fd_sched_align ( void ); 100 : ulong fd_sched_footprint( void ); 101 : 102 : void * 103 : fd_sched_new( void * mem ); 104 : 105 : fd_sched_t * 106 : fd_sched_join( void * mem ); 107 : 108 : /* Add the data in the FEC set to the scheduler. If is_last_fec is 1, 109 : then this is the last FEC set in the block. Transactions may span 110 : FEC set boundaries. The scheduler is responsible for incrementally 111 : parsing transactions from concatenated FEC set data. Assumes that 112 : FEC sets are delivered in replay order. That is, forks form a 113 : partial ordering over FEC sets: in-order per fork, but arbitrary 114 : ordering across forks. The fork tree is implied by the stream of 115 : parent-child relationships delivered in FEC sets. Also assumes that 116 : there is enough space in the scheduler to ingest the FEC set. The 117 : caller should generally call fd_sched_fec_can_ingest() first. */ 118 : void 119 : fd_sched_fec_ingest( fd_sched_t * sched, fd_sched_fec_t * fec ); 120 : 121 : /* Check if there is enough space in the scheduler to ingest the data in 122 : the FEC set. Returns 1 if there is, 0 otherwise. This is a cheap 123 : and conservative check. */ 124 : int 125 : fd_sched_fec_can_ingest( fd_sched_t * sched, fd_sched_fec_t * fec ); 126 : 127 : /* Check if there is enough space in the scheduler to ingest a worst 128 : case FEC set. Returns 1 if there is, 0 otherwise. This is a cheap 129 : and conservative check, and has less precision than 130 : fd_sched_fec_can_ingest(). */ 131 : int 132 : fd_sched_can_ingest( fd_sched_t * sched ); 133 : 134 : /* Obtain a transaction eligible for execution. This implies that all 135 : prior transactions with w-r or w-w conflicts have completed. 136 : Information regarding the scheduled transaction is written to the out 137 : pointer. Returns 1 on success, 0 on failure. Failures are generally 138 : transient and non-fatal, and are simply an indication that no 139 : transaction is ready for execution yet. When in-flight transactions 140 : retire or when more FEC sets are ingested, more transactions may 141 : become ready for execution. 142 : 143 : Transactions on the same fork will be returned in a way that 144 : maintains the serial fiction. That is, reordering can happen, but 145 : only within the constraint that transactions appear to be ready in 146 : the order in which they occur in the block. Transactions from 147 : different forks may interleave, and the caller should be prepared to 148 : switch execution context in response to interleavings. The scheduler 149 : will barrier on block boundaries, in the sense that transactions from 150 : a subsequent block will not be returned for execution until all 151 : transactions from the previous block have completed. This gives the 152 : caller a chance to perform end-of-block processing before 153 : transactions from a subsequent block start executing. In general, 154 : the caller should check if the last transaction in the current block 155 : is done, and if so, do end-of-block processing before calling this 156 : function to start the next block. */ 157 : ulong 158 : fd_sched_txn_next_ready( fd_sched_t * sched, fd_sched_txn_ready_t * out_txn ); 159 : 160 : /* Mark a transaction as complete. This means that the effects of this 161 : transaction's execution are now visible on any core that could 162 : execute a subsequent transaction. */ 163 : void 164 : fd_sched_txn_done( fd_sched_t * sched, ulong txn_id ); 165 : 166 : /* Abandon a block. This means that we are no longer interested in 167 : executing the block. This also implies that any block which chains 168 : off of the provided block shall be abandoned. This is mainly used 169 : when a block is aborted because we decided that it would be a 170 : dead/invalid block, and so there's no point in spending resources 171 : executing it. The scheduler will no longer return transactions from 172 : abandoned blocks for execution. This should only be invoked on an 173 : actively replayed block, and should only be invoked once on it. */ 174 : void 175 : fd_sched_block_abandon( fd_sched_t * sched, fd_sched_block_id_t * block_id ); 176 : 177 : /* Returns 1 if the block is done, 0 otherwise. A block is done if all 178 : transactions in the block have completed. Caller may begin 179 : end-of-block processing when the block is done. Assumes that the 180 : block has not been published away. */ 181 : int 182 : fd_sched_block_is_done( fd_sched_t * sched, fd_sched_block_id_t * block_id ); 183 : 184 : /* Add a block as immediately done to the scheduler. This is useful for 185 : installing the snapshot slot, or for informing the scheduler of a 186 : packed leader block. Parent block should be NULL for the snapshot 187 : slot, and otherwise a block that hasn't been published away. */ 188 : void 189 : fd_sched_block_add_done( fd_sched_t * sched, fd_sched_block_id_t * block_id, fd_sched_block_id_t * parent_block_id ); 190 : 191 : /* Publish new root, pruning all blocks across forks that do not descend 192 : from the new root. Assumes the new root is in the fork tree and 193 : connected to the current root. Also assumes that there are no more 194 : in-flight transactions from the soon-to-be-pruned blocks. This 195 : should be called after root_notify() and the caller is responsible 196 : for figuring out the new root to safely publish to. */ 197 : void 198 : fd_sched_root_publish( fd_sched_t * sched, fd_sched_block_id_t * root ); 199 : 200 : /* Notify the scheduler of a new root. This has the effect of calling 201 : abandon() on all minority forks that do not descend from the new 202 : root. Shortly after a call to this function, in-flight transactions 203 : from these abandoned blocks should retire from the execution 204 : pipeline, and the new root will be safe for publishing. */ 205 : void 206 : fd_sched_root_notify( fd_sched_t * sched, fd_sched_block_id_t * root ); 207 : 208 : fd_txn_p_t * 209 : fd_sched_get_txn( fd_sched_t * sched, ulong txn_id ); 210 : 211 : fd_hash_t * 212 : fd_sched_get_poh( fd_sched_t * sched, fd_sched_block_id_t * block_id ); 213 : 214 : uint 215 : fd_sched_get_shred_cnt( fd_sched_t * sched, fd_sched_block_id_t * block_id ); 216 : 217 : void * 218 : fd_sched_leave( fd_sched_t * sched ); 219 : 220 : void * 221 : fd_sched_delete( void * mem ); 222 : 223 : FD_PROTOTYPES_END 224 : 225 : #endif /* HEADER_fd_src_discof_replay_fd_rsched_h */