Line data Source code
1 : #ifndef HEADER_fd_src_discof_replay_fd_sched_h
2 : #define HEADER_fd_src_discof_replay_fd_sched_h
3 :
4 : #include "fd_rdisp.h"
5 : #include "../../disco/store/fd_store.h" /* for fd_store_fec_t */
6 : #include "../../disco/pack/fd_microblock.h" /* for fd_txn_p_t */
7 :
8 : #include "../../flamenco/accdb/fd_accdb_user.h"
9 : #include "../../util/spad/fd_spad.h" /* for ALUTs */
10 :
11 : /* fd_sched wraps all the smarts and mechanical chores around scheduling
12 : transactions for replay execution. It is built on top of the
13 : dispatcher fd_rdisp. The dispatcher is responsible for high
14 : performance lane-based scheduling of transactions. On top of that,
15 : we add fork-aware management of lanes, and policies regarding which
16 : lanes to prioritize for execution.
17 :
18 : Conceptually, transactions in a block form a DAG. We would like to
19 : make our way through a block with a sufficient degree of parallelism,
20 : such that the execution time of the critical path of the DAG is the
21 : limiting factor. The dispatcher does a good job of emerging the
22 : critical path of the DAG on the fly. Blocks are tracked by the
23 : dispatcher either as a block staged on a lane, or as an unstaged
24 : block. When a block is staged, it will enjoy the most intelligent
25 : online scheduling that the dispatcher has to offer. Lanes have to
26 : consist of linear chains of blocks down a fork. So to map a fork
27 : tree to lanes, we will need multiple lanes. Ideally, every branch in
28 : the fork tree sits on some lane. However, memory footprint limits us
29 : to a few number of lanes.
30 :
31 : This module implements a state machine for ensuring that blocks enter
32 : into and exit ouf of lanes in an orderly fashion. The public APIs of
33 : this module are invoked to drive state transitions on a small number
34 : of events, such as new transactions arriving, or transactions
35 : completing, or a block being aborted/abandoned. We also implement
36 : policies for deciding which blocks get staged onto lanes, or evicted
37 : from lanes, as well as which lanes to prioritize for execution.
38 :
39 :
40 : The general order in which calls happen under the normal case is:
41 :
42 : fd_sched_fec_ingest()* ... fd_sched_txn_next_ready()* ... fd_sched_txn_done()* ...
43 : more ingest, more ready, more done ...
44 : ...
45 : fd_sched_txn_next_ready() indicates that the last transaction in the block is being scheduled
46 : fd_sched_txn_done()*
47 : fd_sched_block_is_done()
48 : end-of-block processing in caller
49 : fd_sched_txn_next_ready() starts returning transactions from the next block
50 : more ingest, more ready, more done ...
51 : ... */
52 :
53 : struct fd_sched;
54 : typedef struct fd_sched fd_sched_t;
55 :
56 :
57 : struct fd_sched_alut_ctx {
58 : fd_accdb_user_t accdb[1];
59 : fd_funk_txn_xid_t xid[1];
60 : ulong els; /* Effective lookup slot. */
61 : };
62 : typedef struct fd_sched_alut_ctx fd_sched_alut_ctx_t;
63 :
64 : struct fd_sched_fec {
65 : ulong bank_idx; /* Index of the block. Assumed to be in [0, block_cnt_max). Caller
66 : is responsible for ensuring that bank idx is in bounds and unique
67 : across equivocated blocks. */
68 : ulong parent_bank_idx; /* Index of the parent block. Assumed to be in [0, block_cnt_max).
69 : Caller is responsible for ensuring that parent bank idx is in
70 : bounds and unique across equivocated blocks. */
71 : ulong slot; /* Slot number of the block. */
72 : ulong parent_slot; /* Slot number of the parent block. */
73 : fd_store_fec_t * fec; /* FEC set data. */
74 : uint shred_cnt; /* Number of shreds in the FEC set. */
75 : uint is_last_in_batch:1; /* Set if this is the last FEC set in the batch; relevant because the
76 : parser should ignore trailing bytes at the end of a batch. */
77 : uint is_last_in_block:1; /* Set if this is the last FEC set in the block. */
78 : uint is_first_in_block:1; /* Set if this is the first FEC set in the block. */
79 :
80 : fd_sched_alut_ctx_t alut_ctx[ 1 ];
81 : };
82 : typedef struct fd_sched_fec fd_sched_fec_t;
83 :
84 : /* The scheduler may return one of the following types of tasks for the
85 : replay tile.
86 :
87 : e - passed down to exec tiles.
88 : i - replay completes the task immediately.
89 : q - replay may either do it immediately or queue the task up. */
90 0 : #define FD_SCHED_TT_NULL (0UL)
91 0 : #define FD_SCHED_TT_BLOCK_START (1UL) /* (i) Start-of-block processing. */
92 0 : #define FD_SCHED_TT_BLOCK_END (2UL) /* (q) End-of-block processing. */
93 0 : #define FD_SCHED_TT_TXN_EXEC (3UL) /* (e) Transaction execution. */
94 0 : #define FD_SCHED_TT_TXN_SIGVERIFY (4UL) /* (e) Transaction sigverify. */
95 : #define FD_SCHED_TT_LTHASH (5UL) /* (e) Account lthash. */
96 : #define FD_SCHED_TT_POH_VERIFY (6UL) /* (e) PoH hash verification. */
97 :
98 : struct fd_sched_block_start {
99 : ulong bank_idx; /* Same as in fd_sched_fec_t. */
100 : ulong parent_bank_idx; /* Same as in fd_sched_fec_t. */
101 : ulong slot; /* Slot number of the block. */
102 : };
103 : typedef struct fd_sched_block_start fd_sched_block_start_t;
104 :
105 : struct fd_sched_block_end {
106 : ulong bank_idx;
107 : };
108 : typedef struct fd_sched_block_end fd_sched_block_end_t;
109 :
110 : struct fd_sched_txn_exec {
111 : ulong bank_idx;
112 : ulong slot;
113 : ulong txn_idx;
114 : ulong exec_idx;
115 : };
116 : typedef struct fd_sched_txn_exec fd_sched_txn_exec_t;
117 :
118 : struct fd_sched_txn_sigverify {
119 : ulong bank_idx;
120 : ulong txn_idx;
121 : ulong exec_idx;
122 : };
123 : typedef struct fd_sched_txn_sigverify fd_sched_txn_sigverify_t;
124 :
125 : struct fd_sched_task {
126 : ulong task_type; /* Set to one of the task types defined above. */
127 : union {
128 : fd_sched_block_start_t block_start[ 1 ];
129 : fd_sched_block_end_t block_end[ 1 ];
130 : fd_sched_txn_exec_t txn_exec[ 1 ];
131 : fd_sched_txn_sigverify_t txn_sigverify[ 1 ];
132 : };
133 : };
134 : typedef struct fd_sched_task fd_sched_task_t;
135 :
136 : FD_PROTOTYPES_BEGIN
137 :
138 : /* fd_sched_{align,footprint} return the required alignment and
139 : footprint in bytes for a region of memory to be used as a scheduler.
140 : block_cnt_max is the maximum number of blocks that will be tracked by
141 : the scheduler. */
142 : ulong fd_sched_align ( void );
143 : ulong fd_sched_footprint( ulong block_cnt_max );
144 :
145 : void *
146 : fd_sched_new( void * mem, ulong block_cnt_max, ulong exec_cnt );
147 :
148 : fd_sched_t *
149 : fd_sched_join( void * mem, ulong block_cnt_max );
150 :
151 : /* Add the data in the FEC set to the scheduler. If is_last_fec is 1,
152 : then this is the last FEC set in the block. Transactions may span
153 : FEC set boundaries. The scheduler is responsible for incrementally
154 : parsing transactions from concatenated FEC set data. Assumes that
155 : FEC sets are delivered in replay order. That is, forks form a
156 : partial ordering over FEC sets: in-order per fork, but arbitrary
157 : ordering across forks. The fork tree is implied by the stream of
158 : parent-child relationships delivered in FEC sets. Also assumes that
159 : there is enough space in the scheduler to ingest the FEC set. The
160 : caller should generally call fd_sched_fec_can_ingest() first.
161 :
162 : Returns 1 on success, 0 if the block is bad and should be marked
163 : dead. */
164 : FD_WARN_UNUSED int
165 : fd_sched_fec_ingest( fd_sched_t * sched, fd_sched_fec_t * fec );
166 :
167 : /* Check if there is enough space in the scheduler to ingest the data in
168 : the FEC set. Returns 1 if there is, 0 otherwise. This is a cheap
169 : and conservative check. */
170 : int
171 : fd_sched_fec_can_ingest( fd_sched_t * sched, fd_sched_fec_t * fec );
172 :
173 : /* Check if there is enough space in the scheduler to ingest fec_cnt
174 : worst-case FEC sets. Returns 1 if there is, 0 otherwise. This is a
175 : cheap and conservative check, and has less precision than
176 : fd_sched_fec_can_ingest(). */
177 : int
178 : fd_sched_can_ingest( fd_sched_t * sched, ulong fec_cnt );
179 :
180 : /* Obtain a transaction eligible for execution. This implies that all
181 : prior transactions with w-r or w-w conflicts have completed.
182 : Information regarding the scheduled transaction is written to the out
183 : pointer. Returns 1 on success, 0 on failure. Failures are generally
184 : transient and non-fatal, and are simply an indication that no
185 : transaction is ready for execution yet. When in-flight transactions
186 : retire or when more FEC sets are ingested, more transactions may
187 : become ready for execution.
188 :
189 : Transactions on the same fork will be returned in a way that
190 : maintains the serial fiction. That is, reordering can happen, but
191 : only within the constraint that transactions appear to be ready in
192 : the order in which they occur in the block. Transactions from
193 : different forks may interleave, and the caller should be prepared to
194 : switch execution context in response to interleavings. The scheduler
195 : will barrier on block boundaries, in the sense that transactions from
196 : a subsequent block will not be returned for execution until all
197 : transactions from the previous block have completed. This gives the
198 : caller a chance to perform end-of-block processing before
199 : transactions from a subsequent block start executing. In general,
200 : the caller should check if the last transaction in the current block
201 : is done, and if so, do end-of-block processing before calling this
202 : function to start the next block.
203 :
204 : In addition to returning transactions for execution, this function
205 : may also return a sigverify task. Sigverify can be completed
206 : aynschronously outside the critical path of transaction execution, as
207 : long as every transaction in a block passes sigverify before we
208 : commit the block. The scheduler prioritizes actual execution of
209 : transactions over sigverify, and in general sigverify tasks are only
210 : returned when no real transaction can be dispatched. In other words,
211 : the scheduler tries to exploit idle cycles in the exec tiles during
212 : times of low parallelism critical path progression. */
213 : ulong
214 : fd_sched_task_next_ready( fd_sched_t * sched, fd_sched_task_t * out );
215 :
216 : /* Mark a task as complete. For transaction execution, this means that
217 : the effects of the execution are now visible on any core that could
218 : execute a subsequent transaction. */
219 : void
220 : fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong exec_idx );
221 :
222 : /* Abandon a block. This means that we are no longer interested in
223 : executing the block. This also implies that any block which chains
224 : off of the provided block shall be abandoned. This is mainly used
225 : when a block is aborted because we decided that it would be a
226 : dead/invalid block, and so there's no point in spending resources
227 : executing it. The scheduler will no longer return transactions from
228 : abandoned blocks for execution. This should only be invoked on an
229 : actively replayed block, and should only be invoked once on it. */
230 : void
231 : fd_sched_block_abandon( fd_sched_t * sched, ulong bank_idx );
232 :
233 : /* Add a block as immediately done to the scheduler. This is useful for
234 : installing the snapshot slot, or for informing the scheduler of a
235 : packed leader block. Parent block should be ULONG_MAX for the
236 : snapshot slot, and otherwise a block that hasn't been pruned. */
237 : void
238 : fd_sched_block_add_done( fd_sched_t * sched, ulong bank_idx, ulong parent_bank_idx );
239 :
240 : /* Advance the root, pruning all blocks across forks that do not descend
241 : from the new root. Assumes the new root is in the fork tree and
242 : connected to the current root. Also assumes that there are no more
243 : in-flight transactions from the soon-to-be-pruned blocks. This
244 : should be called after root_notify() and the caller is responsible
245 : for figuring out the new root to safely prune to. */
246 : void
247 : fd_sched_advance_root( fd_sched_t * sched, ulong root_idx );
248 :
249 : /* Notify the scheduler of a new root. This has the effect of calling
250 : abandon() on all minority forks that do not descend from the new
251 : root. Shortly after a call to this function, in-flight transactions
252 : from these abandoned blocks should retire from the execution
253 : pipeline, and the new root will be safe for pruning. */
254 : void
255 : fd_sched_root_notify( fd_sched_t * sched, ulong root_idx );
256 :
257 : fd_txn_p_t *
258 : fd_sched_get_txn( fd_sched_t * sched, ulong txn_idx );
259 :
260 : fd_hash_t *
261 : fd_sched_get_poh( fd_sched_t * sched, ulong bank_idx );
262 :
263 : uint
264 : fd_sched_get_shred_cnt( fd_sched_t * sched, ulong bank_idx );
265 :
266 : void *
267 : fd_sched_leave( fd_sched_t * sched );
268 :
269 : void *
270 : fd_sched_delete( void * mem );
271 :
272 : FD_PROTOTYPES_END
273 :
274 : #endif /* HEADER_fd_src_discof_replay_fd_sched_h */
|