Line data Source code
1 : #ifndef HEADER_fd_src_discof_replay_fd_sched_h
2 : #define HEADER_fd_src_discof_replay_fd_sched_h
3 :
4 : #include "fd_rdisp.h"
5 : #include "../../disco/store/fd_store.h" /* for fd_store_fec_t */
6 : #include "../../disco/pack/fd_microblock.h" /* for fd_txn_p_t */
7 :
8 : #include "../../funk/fd_funk_base.h" /* for ALUTs */
9 : #include "../../util/spad/fd_spad.h" /* for ALUTs */
10 :
11 : /* fd_sched wraps all the smarts and mechanical chores around scheduling
12 : transactions for replay execution. It is built on top of the
13 : dispatcher fd_rdisp. The dispatcher is responsible for high
14 : performance lane-based scheduling of transactions. On top of that,
15 : we add fork-aware management of lanes, and policies regarding which
16 : lanes to prioritize for execution.
17 :
18 : Conceptually, transactions in a block form a DAG. We would like to
19 : make our way through a block with a sufficient degree of parallelism,
20 : such that the execution time of the critical path of the DAG is the
21 : limiting factor. The dispatcher does a good job of emerging the
22 : critical path of the DAG on the fly. Blocks are tracked by the
23 : dispatcher either as a block staged on a lane, or as an unstaged
24 : block. When a block is staged, it will enjoy the most intelligent
25 : online scheduling that the dispatcher has to offer. Lanes have to
26 : consist of linear chains of blocks down a fork. So to map a fork
27 : tree to lanes, we will need multiple lanes. Ideally, every branch in
28 : the fork tree sits on some lane. However, memory footprint limits us
29 : to a few number of lanes.
30 :
31 : This module implements a state machine for ensuring that blocks enter
32 : into and exit ouf of lanes in an orderly fashion. The public APIs of
33 : this module are invoked to drive state transitions on a small number
34 : of events, such as new transactions arriving, or transactions
35 : completing, or a block being aborted/abandoned. We also implement
36 : policies for deciding which blocks get staged onto lanes, or evicted
37 : from lanes, as well as which lanes to prioritize for execution.
38 :
39 :
40 : The general order in which calls happen under the normal case is:
41 :
42 : fd_sched_fec_ingest()* ... fd_sched_txn_next_ready()* ... fd_sched_txn_done()* ...
43 : more ingest, more ready, more done ...
44 : ...
45 : fd_sched_txn_next_ready() indicates that the last transaction in the block is being scheduled
46 : fd_sched_txn_done()*
47 : fd_sched_block_is_done()
48 : end-of-block processing in caller
49 : fd_sched_txn_next_ready() starts returning transactions from the next block
50 : more ingest, more ready, more done ...
51 : ... */
52 :
53 : struct fd_sched;
54 : typedef struct fd_sched fd_sched_t;
55 :
56 :
57 : struct fd_sched_alut_ctx {
58 : fd_funk_t * funk;
59 : fd_funk_txn_xid_t xid[1];
60 : ulong els; /* Effective lookup slot. */
61 : fd_spad_t * runtime_spad;
62 : };
63 : typedef struct fd_sched_alut_ctx fd_sched_alut_ctx_t;
64 :
65 : struct fd_sched_fec {
66 : ulong bank_idx; /* Index of the block. Assumed to be in [0, block_cnt_max). Caller
67 : is responsible for ensuring that bank idx is in bounds and unique
68 : across equivocated blocks. */
69 : ulong parent_bank_idx; /* Index of the parent block. Assumed to be in [0, block_cnt_max).
70 : Caller is responsible for ensuring that parent bank idx is in
71 : bounds and unique across equivocated blocks. */
72 : ulong slot; /* Slot number of the block. */
73 : ulong parent_slot; /* Slot number of the parent block. */
74 : fd_store_fec_t * fec; /* FEC set data. */
75 : uint shred_cnt; /* Number of shreds in the FEC set. */
76 : uint is_last_in_batch:1; /* Set if this is the last FEC set in the batch; relevant because the
77 : parser should ignore trailing bytes at the end of a batch. */
78 : uint is_last_in_block:1; /* Set if this is the last FEC set in the block. */
79 : uint is_first_in_block:1; /* Set if this is the first FEC set in the block. */
80 :
81 : fd_sched_alut_ctx_t alut_ctx[ 1 ];
82 : };
83 : typedef struct fd_sched_fec fd_sched_fec_t;
84 :
85 : /* The scheduler may return one of the following types of tasks for the
86 : replay tile.
87 :
88 : e - passed down to exec tiles.
89 : i - replay completes the task immediately.
90 : q - replay may either do it immediately or queue the task up. */
91 0 : #define FD_SCHED_TT_NULL (0UL)
92 0 : #define FD_SCHED_TT_BLOCK_START (1UL) /* (i) Start-of-block processing. */
93 0 : #define FD_SCHED_TT_BLOCK_END (2UL) /* (q) End-of-block processing. */
94 0 : #define FD_SCHED_TT_TXN_EXEC (3UL) /* (e) Transaction execution. */
95 0 : #define FD_SCHED_TT_TXN_SIGVERIFY (4UL) /* (e) Transaction sigverify. */
96 : #define FD_SCHED_TT_LTHASH (5UL) /* (e) Account lthash. */
97 : #define FD_SCHED_TT_POH_VERIFY (6UL) /* (e) PoH hash verification. */
98 :
99 : struct fd_sched_block_start {
100 : ulong bank_idx; /* Same as in fd_sched_fec_t. */
101 : ulong parent_bank_idx; /* Same as in fd_sched_fec_t. */
102 : ulong slot; /* Slot number of the block. */
103 : };
104 : typedef struct fd_sched_block_start fd_sched_block_start_t;
105 :
106 : struct fd_sched_block_end {
107 : ulong bank_idx;
108 : };
109 : typedef struct fd_sched_block_end fd_sched_block_end_t;
110 :
111 : struct fd_sched_txn_exec {
112 : ulong bank_idx;
113 : ulong slot;
114 : ulong txn_idx;
115 : ulong exec_idx;
116 : };
117 : typedef struct fd_sched_txn_exec fd_sched_txn_exec_t;
118 :
119 : struct fd_sched_txn_sigverify {
120 : ulong bank_idx;
121 : ulong txn_idx;
122 : ulong exec_idx;
123 : };
124 : typedef struct fd_sched_txn_sigverify fd_sched_txn_sigverify_t;
125 :
126 : struct fd_sched_task {
127 : ulong task_type; /* Set to one of the task types defined above. */
128 : union {
129 : fd_sched_block_start_t block_start[ 1 ];
130 : fd_sched_block_end_t block_end[ 1 ];
131 : fd_sched_txn_exec_t txn_exec[ 1 ];
132 : fd_sched_txn_sigverify_t txn_sigverify[ 1 ];
133 : };
134 : };
135 : typedef struct fd_sched_task fd_sched_task_t;
136 :
137 : FD_PROTOTYPES_BEGIN
138 :
139 : /* fd_sched_{align,footprint} return the required alignment and
140 : footprint in bytes for a region of memory to be used as a scheduler.
141 : block_cnt_max is the maximum number of blocks that will be tracked by
142 : the scheduler. */
143 : ulong fd_sched_align ( void );
144 : ulong fd_sched_footprint( ulong block_cnt_max );
145 :
146 : void *
147 : fd_sched_new( void * mem, ulong block_cnt_max, ulong exec_cnt );
148 :
149 : fd_sched_t *
150 : fd_sched_join( void * mem, ulong block_cnt_max );
151 :
152 : /* Add the data in the FEC set to the scheduler. If is_last_fec is 1,
153 : then this is the last FEC set in the block. Transactions may span
154 : FEC set boundaries. The scheduler is responsible for incrementally
155 : parsing transactions from concatenated FEC set data. Assumes that
156 : FEC sets are delivered in replay order. That is, forks form a
157 : partial ordering over FEC sets: in-order per fork, but arbitrary
158 : ordering across forks. The fork tree is implied by the stream of
159 : parent-child relationships delivered in FEC sets. Also assumes that
160 : there is enough space in the scheduler to ingest the FEC set. The
161 : caller should generally call fd_sched_fec_can_ingest() first.
162 :
163 : Returns 1 on success, 0 if the block is bad and should be marked
164 : dead. */
165 : FD_WARN_UNUSED int
166 : fd_sched_fec_ingest( fd_sched_t * sched, fd_sched_fec_t * fec );
167 :
168 : /* Check if there is enough space in the scheduler to ingest the data in
169 : the FEC set. Returns 1 if there is, 0 otherwise. This is a cheap
170 : and conservative check. */
171 : int
172 : fd_sched_fec_can_ingest( fd_sched_t * sched, fd_sched_fec_t * fec );
173 :
174 : /* Check if there is enough space in the scheduler to ingest a worst
175 : case FEC set. Returns 1 if there is, 0 otherwise. This is a cheap
176 : and conservative check, and has less precision than
177 : fd_sched_fec_can_ingest(). */
178 : int
179 : fd_sched_can_ingest( fd_sched_t * sched );
180 :
181 : /* Obtain a transaction eligible for execution. This implies that all
182 : prior transactions with w-r or w-w conflicts have completed.
183 : Information regarding the scheduled transaction is written to the out
184 : pointer. Returns 1 on success, 0 on failure. Failures are generally
185 : transient and non-fatal, and are simply an indication that no
186 : transaction is ready for execution yet. When in-flight transactions
187 : retire or when more FEC sets are ingested, more transactions may
188 : become ready for execution.
189 :
190 : Transactions on the same fork will be returned in a way that
191 : maintains the serial fiction. That is, reordering can happen, but
192 : only within the constraint that transactions appear to be ready in
193 : the order in which they occur in the block. Transactions from
194 : different forks may interleave, and the caller should be prepared to
195 : switch execution context in response to interleavings. The scheduler
196 : will barrier on block boundaries, in the sense that transactions from
197 : a subsequent block will not be returned for execution until all
198 : transactions from the previous block have completed. This gives the
199 : caller a chance to perform end-of-block processing before
200 : transactions from a subsequent block start executing. In general,
201 : the caller should check if the last transaction in the current block
202 : is done, and if so, do end-of-block processing before calling this
203 : function to start the next block.
204 :
205 : In addition to returning transactions for execution, this function
206 : may also return a sigverify task. Sigverify can be completed
207 : aynschronously outside the critical path of transaction execution, as
208 : long as every transaction in a block passes sigverify before we
209 : commit the block. The scheduler prioritizes actual execution of
210 : transactions over sigverify, and in general sigverify tasks are only
211 : returned when no real transaction can be dispatched. In other words,
212 : the scheduler tries to exploit idle cycles in the exec tiles during
213 : times of low parallelism critical path progression. */
214 : ulong
215 : fd_sched_task_next_ready( fd_sched_t * sched, fd_sched_task_t * out );
216 :
217 : /* Mark a task as complete. For transaction execution, this means that
218 : the effects of the execution are now visible on any core that could
219 : execute a subsequent transaction. */
220 : void
221 : fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong exec_idx );
222 :
223 : /* Abandon a block. This means that we are no longer interested in
224 : executing the block. This also implies that any block which chains
225 : off of the provided block shall be abandoned. This is mainly used
226 : when a block is aborted because we decided that it would be a
227 : dead/invalid block, and so there's no point in spending resources
228 : executing it. The scheduler will no longer return transactions from
229 : abandoned blocks for execution. This should only be invoked on an
230 : actively replayed block, and should only be invoked once on it. */
231 : void
232 : fd_sched_block_abandon( fd_sched_t * sched, ulong bank_idx );
233 :
234 : /* Add a block as immediately done to the scheduler. This is useful for
235 : installing the snapshot slot, or for informing the scheduler of a
236 : packed leader block. Parent block should be ULONG_MAX for the
237 : snapshot slot, and otherwise a block that hasn't been pruned. */
238 : void
239 : fd_sched_block_add_done( fd_sched_t * sched, ulong bank_idx, ulong parent_bank_idx );
240 :
241 : /* Advance the root, pruning all blocks across forks that do not descend
242 : from the new root. Assumes the new root is in the fork tree and
243 : connected to the current root. Also assumes that there are no more
244 : in-flight transactions from the soon-to-be-pruned blocks. This
245 : should be called after root_notify() and the caller is responsible
246 : for figuring out the new root to safely prune to. */
247 : void
248 : fd_sched_advance_root( fd_sched_t * sched, ulong root_idx );
249 :
250 : /* Notify the scheduler of a new root. This has the effect of calling
251 : abandon() on all minority forks that do not descend from the new
252 : root. Shortly after a call to this function, in-flight transactions
253 : from these abandoned blocks should retire from the execution
254 : pipeline, and the new root will be safe for pruning. */
255 : void
256 : fd_sched_root_notify( fd_sched_t * sched, ulong root_idx );
257 :
258 : fd_txn_p_t *
259 : fd_sched_get_txn( fd_sched_t * sched, ulong txn_idx );
260 :
261 : fd_hash_t *
262 : fd_sched_get_poh( fd_sched_t * sched, ulong bank_idx );
263 :
264 : uint
265 : fd_sched_get_shred_cnt( fd_sched_t * sched, ulong bank_idx );
266 :
267 : void *
268 : fd_sched_leave( fd_sched_t * sched );
269 :
270 : void *
271 : fd_sched_delete( void * mem );
272 :
273 : FD_PROTOTYPES_END
274 :
275 : #endif /* HEADER_fd_src_discof_replay_fd_sched_h */
|