Line data Source code
1 : #include "fd_rdisp.h"
2 : #include <math.h> /* for the EMA */
3 :
4 : /* The conflict graph that this file builds is not a general DAG, but
5 : the union of several special account-conflict graphs. Each
6 : account-conflict graph has special structure:
7 : ---> 3 --
8 : / \
9 : / v
10 : 1 ---> 2 -----> 4 --> 6 ----> 7
11 : \ ^
12 : \ /
13 : ----> 5 --
14 :
15 : That is, the graph is almost a line, but may have fan-out and fan-in
16 : regions. The tricky part about representing a graph like this
17 : without dynamic memory allocation is that nodes may have arbitrary
18 : in-degree and out-degree. Thus, we use a pretty standard trick and
19 : use sibling pointers, denoted below with dotted lines. Each node
20 : maintains at most one successor pointer and at most one sibling
21 : pointer. Although not shown below, the sibling pointers are
22 : circularly linked, so 5's sibling is 3. Additionally, though not
23 : shown below, the child of the last node (7) stores information about
24 : what account address all this is for, which facilitates deleting the
25 : last node in the graph.
26 :
27 :
28 : --> 3 --
29 : / ^ \
30 : / : \
31 : / V v
32 : 1 ---> 2 4 --> 6 ----> 7
33 : ^ ^
34 : : /
35 : V /
36 : 5 --
37 :
38 : The normal edge 2->3 along with the sibling edge 3..>4 implies a
39 : normal edge 2->4. That then transitively implies an edge 2->5.
40 :
41 : We want each node to maintain a count of its in-degree so that we
42 : know when it can be executed. The implied edges also count for the
43 : in-degree. In this example, node 1 has in-degree 0, node 6 has
44 : in-degree 3, and the rest have in-degree 1.
45 :
46 : Maintaining each account-conflict graph is relatively easy given the
47 : operations we want to support. Only the details about sibling edges
48 : are worth mentioning. For example, when deleting node 2, we
49 : decrement the in-degree count for its successor, and then follow the
50 : sibling pointers, decrementing all the in-degree counts as we go to
51 : mark the deletion of the implied edges. Sibling edges also need to
52 : be doubly linked, so that e.g. nodes 3 and 5 can be re-linked in O(1)
53 : if node 4 is deleted. They're also circularly linked as well for
54 : convenience.
55 :
56 : When building the graph, we maintain a map of account to the last
57 : node that references it, whether that was a read or write, and
58 : whether there are any writers to that account in the graph right now.
59 : If the new node reads from an account that was last read, the new
60 : node becomes a sibling of the last read, with in-degree increased if
61 : there are any writers. Otherwise, it becomes a successor of the node
62 : that last referenced the account. */
63 :
64 : /* For a task like this with lots of graph traversal and pointer
65 : chasing, performance is typically limited by memory latency. That
66 : means that the more that can fit in cache, the better the
67 : performance. This implementation uses a lot of bit-packing to
68 : improve cache footprint. */
69 :
70 : /* The following structs are all very local to this compilation unit,
71 : and so they don't have globally acceptable type names (e.g.
72 : fd_rdisp_edge_t). */
73 :
74 : /* Everything is set up to allow this to be 128, but we can save the
75 : space until it's necessary. */
76 777003 : #define MAX_ACCT_PER_TXN 64UL
77 :
78 : /* edge_t: Fields typed edge_t represent an edge in one of the parallel
79 : account-conflict DAGs. Each transaction stores a list of all its
80 : outgoing edges. The type is actually a union of bitfield, but C
81 : bitfields are gross, so we just do it manually with macros. If the
82 : high bit is set, that means the transaction storing this edge_t value
83 : is the last in this specific DAG, and the lower 31 bits of the
84 : value are an index in the map_pool for the account pubkey for this
85 : DAG. See the comments about the hidden edge outgoing from node 7 in
86 : the DAG at the top of this file for an example.
87 : If the high bit is not set, then the next 23 bits store the
88 : destination of the edge, represented by its index in the pool of
89 : transactions. Then the lowest 8 bits store the account index within
90 : that transaction of the edge that is part of the same
91 : account-specific DAG as this edge. Because the max depth is 2^23-1,
92 : and each transaction can reference 128 accounts, the max accounts
93 : that can be referenced fits in 30 bits.
94 : The proper type would be something like
95 : typedef union {
96 : struct {
97 : uint is_last:1;
98 : uint map_pool_idx:31;
99 : } last;
100 : struct {
101 : uint is_last:1;
102 : uint txn_idx:23;
103 : uint acct_idx:8;
104 : } e;
105 : } edge_t;
106 :
107 : */
108 : typedef uint edge_t;
109 :
110 :
111 : /* fd_rdisp_txn is the representation of a transaction as a node in the
112 : DAG. */
113 : struct fd_rdisp_txn {
114 : /* in_degree: The total number of edges summed across all account DAGs
115 : with this node as their destination. In the worst case, all the
116 : other transactions in the pool read from each of the max number of
117 : accounts that this transaction writes to, so there are
118 : MAX_ACCT_PER_TXN*depth edges that come into this node, which fits in
119 : about 30 bits, so we have some room for special values. If
120 : in_degree is one of the following values, then
121 : the transaction is: */
122 234453531 : #define IN_DEGREE_FREE (UINT_MAX )
123 57500892 : #define IN_DEGREE_UNSTAGED (UINT_MAX-1U)/* unstaged, not dispatched */
124 1166490 : #define IN_DEGREE_DISPATCHED (UINT_MAX-2U)/* staged, dispatched */
125 57500874 : #define IN_DEGREE_UNSTAGED_DISPATCHED (UINT_MAX-3U)/* unstaged, dispatched */
126 57500859 : #define IN_DEGREE_ZOMBIE (UINT_MAX-4U)/* zombie */
127 : /* a transaction that is staged and dispatched is must have an
128 : in_degree of 0. in_degree isn't a meaningful concept for unstaged
129 : transactions. */
130 : uint in_degree;
131 :
132 : /* score: integer part stores how many transactions in the block must
133 : have completed before this transaction can be scheduled. This is
134 : useful for transactions marked as serializing. The fractional part
135 : gives some measure of how urgent the transaction is, where lower is
136 : more urgent. This means we can't have more transactions in a block
137 : marked as serializing than the first integer that a float cannot
138 : represent. That value is about 16M, which is much higher than the
139 : maximum number of transactions in a block, so this is not a
140 : problem. If in the very rare case that there are more than just a
141 : few serializing transactions, we will lose a bit of precision for
142 : the fractional portion, which makes total sense; if there's not
143 : much room for parallel execution, having the optimal parallel
144 : execution is not very important. */
145 : float score;
146 :
147 : /* edge_cnt_etc:
148 : 0xFFFF0000 (16 bits) for linear block number,
149 : 0x0000C000 (2 bits) for concurrency lane,
150 : 0x00003F80 (7 bits) for r_cnt
151 : 0x0000007F (7 bits) for w_cnt */
152 : union {
153 : uint edge_cnt_etc;
154 : /* edge_cnt_etc is only used when the transaction is STAGED and
155 : PENDING, READY, or DISPATCHED. If UNSTAGED or FREE, the next
156 : pointer is also here. It can't be UNSTAGED and FREE at the same
157 : time, so there's no conflict with storage there either. */
158 : uint unstaged_next;
159 : uint free_next;
160 : /* If ZOMBIE, pointer back to block is here. No storage conflict
161 : because ZOMBIE is an exclusive state. */
162 : uint block_idx;
163 : };
164 :
165 :
166 : /* When a transaction writes to an account, it only creates one
167 : link. When a transaction reads from an account, we need the full
168 : doubly linked list with its siblings, so it creates 3 edges
169 : (child, next, prev). All edges from writable accounts come first,
170 : and we keep track of how many there are. In the worst case, all
171 : accounts are reads, so we size it appropriately. */
172 : edge_t edges[3UL*MAX_ACCT_PER_TXN]; /* addressed [0, w_cnt+3*r_cnt) */
173 : };
174 : typedef struct fd_rdisp_txn fd_rdisp_txn_t;
175 :
176 : #define EDGE_IS_LAST(x) ((x)&0x80000000U)
177 :
178 : /* Two more definitions:
179 : An edge index is an array position within the edges array. An
180 : account index is a position within a transaction's account addresses,
181 : reordered so that the writable ones come first. Since writable
182 : accounts use one position in the edges array per account address,
183 : these often coincide. */
184 :
185 :
186 : /* FOLLOW_EDGE and FOLLOW_EDGE_TXN are helper macros for dealing with
187 : edges. Given an edge_t x, and transaction pool base, FOLLOW_EDGE_TXN
188 : returns a pointer to transaction that the edge points to; FOLLOW_EDGE
189 : returns a pointer to the (first) edge_t within that transaction that
190 : is part of the same DAG as this edge. FOLLOW_EDGE and
191 : FOLLOW_EDGE_TXN must not be called if EDGE_IS_LAST is non-zero.
192 :
193 : Then the edge index, i.e. the position in the edges array of the
194 : (first) edge for an account index is:
195 : acct_idx if acct_idx<w_cnt
196 : w_cnt + 3*(acct_idx-w_cnt) else.
197 : Simplifying gives
198 : acct_idx + 2*signed_max( 0, acct_idx-w_cnt ).
199 : In doing this calculation, we also basically get for free whether the
200 : edge is for a writable account or a readonly account, so we return
201 : that as well via the w parameter. If the child transaction only reads
202 : the account address for this DAG, then the next and prev pointers can
203 : be accessed using the returned value +1 and +2, respectively. */
204 176064747 : #define FOLLOW_EDGE(base, x, w) (__extension__({ \
205 176064747 : uint __e = (x); \
206 176064747 : fd_rdisp_txn_t * __txn = ((base)+(__e>>8)); \
207 176064747 : uint __wcnt = __txn->edge_cnt_etc & 0x7FU; \
208 176064747 : uint __idx = (__e & 0xFFU); \
209 176064747 : (w) = __idx<__wcnt; \
210 176064747 : (void)(w); /* not robust... */ \
211 176064747 : __txn->edges + __idx + 2*fd_int_max( 0, (int)(__idx)-(int)(__wcnt) ); }))
212 153735396 : #define FOLLOW_EDGE_TXN(base, x) ( (base)+((x)>>8) )
213 :
214 : /* The pool and slist are almost the same, but they are used
215 : differently, so keep them as different structures for now. */
216 :
217 : #define POOL_NAME pool
218 27 : #define POOL_T fd_rdisp_txn_t
219 : #define POOL_IDX_T uint
220 2530839 : #define POOL_NEXT free_next
221 : #define POOL_SENTINEL 1
222 : #include "../../util/tmpl/fd_pool.c"
223 :
224 : #define SLIST_NAME unstaged_txn_ll
225 : #define SLIST_ELE_T fd_rdisp_txn_t
226 18 : #define SLIST_IDX_T uint
227 72 : #define SLIST_NEXT unstaged_next
228 : #include "../../util/tmpl/fd_slist.c"
229 :
230 : #define DLIST_IDX_T uint
231 6 : #define DLIST_PREV edges[0]
232 6 : #define DLIST_NEXT edges[1]
233 : #define DLIST_NAME zombie_dlist
234 : #define DLIST_ELE_T fd_rdisp_txn_t
235 : #include "../../util/tmpl/fd_dlist.c"
236 :
237 :
238 : /* ACCT_INFO_FLAG: It's a bit unfortunate that we have to maintain these
239 : flags, but basically we need to be able to distinguish the case where
240 : there are only readers so that we don't increment in_degree when
241 : adding a new fd_rdisp_txn. If we have any writers, the only way to
242 : transition into a state where there are only readers is to complete
243 : the last writer. We know we are in this case when the completed
244 : node's child doesn't have a child, and the completed node's child is
245 : a reader, as indicated by the LAST_REF_WAS_WRITE bit.
246 : LAST_REF_WAS_WRITE also has the advantage of being easy to
247 : maintain. */
248 18094905 : #define ACCT_INFO_FLAG_LAST_REF_WAS_WRITE(lane) (((uchar)1)<<(2*(lane)))
249 17671827 : #define ACCT_INFO_FLAG_ANY_WRITERS( lane) (((uchar)2)<<(2*(lane)))
250 :
251 : /* acct_info_t is a node in a map_chain that contains the metadata for a
252 : single account address's conflict graph DAG. In particular, it
253 : contains the information needed to know where to insert a node that
254 : reads from or writes to the account. The objects of this type follow
255 : this state machine:
256 :
257 : FREE -----> ACTIVE ----> CACHED
258 : ^ |
259 : |-------------
260 : When FREE, it is in free_acct_dlist only. When ACTIVE, it is in
261 : acct_map only. When CACHED, it is in both free_acct_dlist and
262 : cached_acct_map.
263 : */
264 : struct acct_info {
265 : /* key, next, and prev are the map_chain fields. Used in the ACTIVE
266 : and CACHED states. next and prev set to 0 when in the FREE state.
267 : Element 0 is a sentinel and isn't inserted to the free_acct_dlist,
268 : so this is unambiguous. */
269 : fd_acct_addr_t key;
270 : uint next;
271 : uint prev;
272 :
273 : union {
274 : struct {
275 : /* This is effectively a pointer to the last node in the DAG for
276 : this pubkey, one for each staging lane.
277 : EDGE_IS_LAST(FOLLOW_EDGE(base, last_reference[i])) is non-zero.
278 : */
279 : edge_t last_reference[4];
280 :
281 : }; /* When in the ACTIVE state */
282 : struct {
283 : uint free_ll_next;
284 : uint free_ll_prev;
285 : /* 8 bytes of padding here */
286 : }; /* When not in the ACTIVE state, used by the free_acct_dlist */
287 : };
288 : /* flags: a combination of ACCT_INFO_FLAG_* bitfields above. Used
289 : when ACTIVE. */
290 : uint flags:8;
291 :
292 :
293 : /* We want to dispatch the READY transactions in an order that
294 : maximizes parallelism, but we also want to be able to start
295 : dispatching transactions decently well before we have the full
296 : conflict graph. We can do that because we know that contentious
297 : accounts tend to stay contentious and uncontentious accounts tend
298 : to stay uncontentious.
299 :
300 : To accomplish this, we maintain a special EMA. Let x_i be 1 if
301 : transaction i references it and 0 if not. If we squint and assume
302 : the transactions are independent and all drawn from some
303 : distribution (which is not true, but that's why we squint), an EMA
304 : of x_i estimates the probability that the next transaction
305 : references this account. How we use this value is detailed later.
306 :
307 : We can't update this value for every pubkey for each transaction,
308 : so we maintain it in a lazy way, by applying updates only when we
309 : need to read the value, which also happens to be every time we want
310 : to add a 1 value to the EMA. We then just need to maintain the
311 : last index i at which x_i was updated and the current value. We
312 : only have 24 bits for the index, which means that we can't maintain
313 : it in a fork-aware way, which doesn't seem like a problem. Also,
314 : it's possible it can overflow, and that can result in an incorrect
315 : value, but that means the account is only referenced ~ 1/2^24
316 : transactions, which is also fine.
317 :
318 : last_ref is in the domain of global_inserted_txn_cnt. ema_refs is
319 : in [0, 1]. Both fields are used in ACTIVE and CACHED. Maintaining
320 : these fields is actually the main reason CACHED exists. */
321 : uint last_ref:24;
322 : float ema_refs;
323 : };
324 : typedef struct acct_info acct_info_t;
325 :
326 : FD_STATIC_ASSERT( sizeof(acct_info_t)==64UL, acct_info_t );
327 :
328 : /* For the acct_map and the free_acct_map */
329 : #define MAP_NAME acct_map
330 3726312 : #define MAP_ELE_T acct_info_t
331 375691212 : #define MAP_IDX_T uint
332 : #define MAP_KEY_T fd_acct_addr_t
333 714941709 : #define MAP_KEY_HASH(k,s) fd_hash( (s), (k)->b, 32UL )
334 360889470 : #define MAP_KEY_EQ(k0,k1) (!memcmp( (k0)->b, (k1)->b, 32UL ))
335 : #define MAP_OPTIMIZE_RANDOM_ACCESS_REMOVAL 1
336 : #include "../../util/tmpl/fd_map_chain.c"
337 :
338 :
339 : #define DLIST_IDX_T uint
340 16406382 : #define DLIST_PREV free_ll_prev
341 16406382 : #define DLIST_NEXT free_ll_next
342 : #define DLIST_NAME free_dlist
343 : #define DLIST_ELE_T acct_info_t
344 : #include "../../util/tmpl/fd_dlist.c"
345 :
346 :
347 : struct pending_prq_ele {
348 : /* lower score means should be scheduled sooner. Integer part has a
349 : special meaning as explained above. */
350 : float score;
351 :
352 : uint linear_block_number;
353 : uint txn_idx;
354 :
355 : /* It seems like we should be able to get this whole struct in 8
356 : bytes, but we're a few bits over.
357 :
358 : 23 bits for txn_idx
359 : 16 bit for linear_block_number
360 : Then the integer part of the score needs to be at least 18 or 19
361 : bits, but we can't use a custom floating point number. */
362 : };
363 :
364 : typedef struct pending_prq_ele pending_prq_ele_t;
365 :
366 :
367 : #define PRQ_NAME pending_prq
368 15249270 : #define PRQ_T pending_prq_ele_t
369 : #define PRQ_EXPLICIT_TIMEOUT 0
370 : /* returns 1 if x is strictly after y */
371 12665712 : #define PRQ_AFTER(x,y) (__extension__( { \
372 12665712 : int cmp0 = (int)(x).linear_block_number - (int)(y).linear_block_number; \
373 12665712 : fd_int_if( cmp0!=0, cmp0>0, (x).score>(y).score ); \
374 12665712 : }))
375 : #include "../../util/tmpl/fd_prq.c"
376 :
377 : /* fd_rdisp_blockinfo_t maintains a little metadata about transactions for each
378 : slot. */
379 : struct fd_rdisp_blockinfo {
380 : FD_RDISP_BLOCK_TAG_T block;
381 : uint linear_block_number;
382 :
383 : uint insert_ready:1;
384 : uint schedule_ready:1;
385 : uint staged:1;
386 : uint staging_lane:2; /* ignored if staged==0 */
387 : uint last_insert_was_serializing:1;
388 :
389 : uint inserted_cnt;
390 : uint dispatched_cnt;
391 : uint completed_cnt;
392 : uint last_serializing;
393 :
394 : uint map_chain_next;
395 : uint ll_next;
396 : unstaged_txn_ll_t ll[ 1 ]; /* used only when unstaged */
397 : zombie_dlist_t zombie_list[ 1 ];
398 : };
399 : typedef struct fd_rdisp_blockinfo fd_rdisp_blockinfo_t;
400 :
401 : #define POOL_NAME block_pool
402 27 : #define POOL_T fd_rdisp_blockinfo_t
403 : #define POOL_IDX_T uint
404 1007172 : #define POOL_NEXT ll_next
405 : #define POOL_SENTINEL 1
406 : #include "../../util/tmpl/fd_pool.c"
407 :
408 : #define MAP_NAME block_map
409 : #define MAP_ELE_T fd_rdisp_blockinfo_t
410 : #define MAP_KEY_T FD_RDISP_BLOCK_TAG_T
411 405246 : #define MAP_KEY block
412 10124793 : #define MAP_NEXT map_chain_next
413 12261066 : #define MAP_IDX_T uint
414 : #include "../../util/tmpl/fd_map_chain.c"
415 :
416 : #define SLIST_NAME block_slist
417 : #define SLIST_ELE_T fd_rdisp_blockinfo_t
418 405246 : #define SLIST_IDX_T uint
419 810495 : #define SLIST_NEXT ll_next
420 : #include "../../util/tmpl/fd_slist.c"
421 :
422 : struct fd_rdisp_unstaged {
423 : FD_RDISP_BLOCK_TAG_T block;
424 :
425 : uint writable_cnt;
426 : uint readonly_cnt;
427 : fd_acct_addr_t keys[MAX_ACCT_PER_TXN];
428 : };
429 : typedef struct fd_rdisp_unstaged fd_rdisp_unstaged_t;
430 :
431 : typedef struct {
432 : pending_prq_ele_t * pending;
433 : ulong linear_block_number;
434 : block_slist_t block_ll[1];
435 : ulong inserted_cnt;
436 : ulong dispatched_cnt;
437 : ulong completed_cnt;
438 : } per_lane_info_t;
439 :
440 : /* We maintain two maps from pubkeys to acct_info_t. The first one is
441 : the main acct_map, just called acct_map. All pubkeys in this map
442 : have >0 references in the one of the staging lane DAGs. When an
443 : account goes to 0 references, it gets removed from main map_chain and
444 : moved to free map_chain, called free_acct_map. The free_acct_map
445 : exists to maintain the reference count EMA information lazily.
446 : Unless we need the acct_info_t for something in the DAG, we might as
447 : well maintain the EMA info.
448 :
449 : When we start up, all the acct_info_t structs are in the
450 : free_acct_dlist. Whenever something is added to the free_acct_map,
451 : it's also added to the tail of the free_acct_dlist. When we need an
452 : acct_info_t that's not in the free_acct_map, we pop the head of the
453 : free_acct_dlist. In general, the free_acct_dlist contains everything
454 : in the free_acct_map, potentially plus some elements that have never
455 : been used; all acct_info_t objects are in exactly one of the main
456 : acct_map and the free_acct_dlist (not free_acct_map). See
457 : acct_info_t for more information about this. */
458 :
459 : struct fd_rdisp {
460 : ulong depth;
461 : ulong block_depth;
462 :
463 : ulong global_insert_cnt;
464 : ulong unstaged_lblk_num;
465 :
466 : /* pool: an fd_pool, indexed [0, depth+1), with 0 being a sentinel */
467 : fd_rdisp_txn_t * pool;
468 : fd_rdisp_unstaged_t * unstaged; /* parallel to pool with additional info */
469 :
470 : block_map_t * blockmap; /* map chain */
471 : fd_rdisp_blockinfo_t * block_pool;
472 :
473 : int free_lanes; /* a bitmask */
474 : per_lane_info_t lanes[4];
475 :
476 : acct_map_t * acct_map;
477 : acct_map_t * free_acct_map;
478 : /* acct_pool is not an fd_pool, but is just a flat array, since we
479 : don't need to acquire and release from it because of the dlist. */
480 : acct_info_t * acct_pool;
481 : free_dlist_t free_acct_dlist[1];
482 : };
483 :
484 : typedef struct fd_rdisp fd_rdisp_t;
485 :
486 :
487 : #define ACCT_ITER_TO_PTR( iter ) (__extension__( { \
488 : ulong __idx = fd_txn_acct_iter_idx( iter ); \
489 : fd_ptr_if( __idx<fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_IMM ), accts, alt_adj )+__idx; \
490 : }))
491 :
492 :
493 117 : ulong fd_rdisp_align( void ) { return 128UL; }
494 :
495 : ulong
496 : fd_rdisp_footprint( ulong depth,
497 9 : ulong block_depth ) {
498 9 : if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH) | (depth<2UL) |
499 9 : (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) | (block_depth<4UL) ) ) return 0UL;
500 :
501 9 : ulong chain_cnt = block_map_chain_cnt_est( block_depth );
502 9 : ulong acct_depth = depth*MAX_ACCT_PER_TXN;
503 9 : ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
504 :
505 9 : ulong l = FD_LAYOUT_INIT;
506 9 : l = FD_LAYOUT_APPEND( l, fd_rdisp_align(), sizeof(fd_rdisp_t) );
507 9 : l = FD_LAYOUT_APPEND( l, pool_align(), pool_footprint ( depth+1UL ) ); /* pool */
508 9 : l = FD_LAYOUT_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL ) ); /* unstaged */
509 9 : l = FD_LAYOUT_APPEND( l, block_map_align(), block_map_footprint ( chain_cnt ) ); /* blockmap */
510 9 : l = FD_LAYOUT_APPEND( l, block_pool_align(), block_pool_footprint ( block_depth+1UL ) ); /* block_pool */
511 9 : l = FD_LAYOUT_APPEND( l, pending_prq_align(), 4UL*pending_prq_footprint ( depth ) ); /* pending */
512 9 : l = FD_LAYOUT_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) ); /* acct_map */
513 9 : l = FD_LAYOUT_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) ); /* free_acct_map */
514 9 : l = FD_LAYOUT_APPEND( l, alignof(acct_info_t), (acct_depth+1UL)*sizeof(acct_info_t) ); /* acct_pool */
515 9 : return FD_LAYOUT_FINI( l, fd_rdisp_align() );
516 9 : }
517 :
518 : void *
519 : fd_rdisp_new( void * mem,
520 : ulong depth,
521 : ulong block_depth,
522 9 : ulong seed ) {
523 9 : if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH) | (depth<2UL) |
524 9 : (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) | (block_depth<4UL) ) ) return 0UL;
525 :
526 9 : ulong chain_cnt = block_map_chain_cnt_est( block_depth );
527 9 : ulong acct_depth = depth*MAX_ACCT_PER_TXN;
528 9 : ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
529 :
530 9 : FD_SCRATCH_ALLOC_INIT( l, mem );
531 9 : fd_rdisp_t * disp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), sizeof(fd_rdisp_t) );
532 9 : void * _pool = FD_SCRATCH_ALLOC_APPEND( l, pool_align(), pool_footprint ( depth+1UL ) );
533 9 : void * _unstaged = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL ) );
534 9 : void * _bmap = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(), block_map_footprint ( chain_cnt ) );
535 9 : void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(), block_pool_footprint ( block_depth+1UL ) );
536 9 : uchar * _pending = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(), 4UL*pending_prq_footprint ( depth ) );
537 9 : void * _acct_map = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) );
538 9 : void * _freea_map = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) );
539 9 : acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t), (acct_depth+1UL)*sizeof(acct_info_t) );
540 9 : FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
541 :
542 9 : disp->depth = depth;
543 9 : disp->block_depth = block_depth;
544 9 : disp->global_insert_cnt = 0UL;
545 9 : disp->unstaged_lblk_num = 0UL;
546 :
547 9 : fd_rdisp_txn_t * temp_pool_join = pool_join( pool_new( _pool, depth+1UL ) );
548 197823 : for( ulong i=0UL; i<depth+1UL; i++ ) temp_pool_join[ i ].in_degree = IN_DEGREE_FREE;
549 9 : pool_leave( temp_pool_join );
550 :
551 9 : memset( _unstaged, '\0', sizeof(fd_rdisp_unstaged_t)*(depth+1UL) );
552 :
553 9 : block_map_new ( _bmap, chain_cnt, seed );
554 9 : block_pool_new( _bpool, block_depth+1UL );
555 :
556 9 : fd_rdisp_blockinfo_t * bpool_temp_join = block_pool_join( _bpool );
557 196683 : for( ulong i=0UL; i<block_depth+1UL; i++ ) zombie_dlist_new( bpool_temp_join[ i ].zombie_list );
558 9 : block_pool_leave( bpool_temp_join );
559 :
560 9 : disp->free_lanes = 0xF;
561 45 : for( ulong i=0UL; i<4UL; i++ ) {
562 36 : pending_prq_new( _pending, depth );
563 36 : _pending += pending_prq_footprint( depth );
564 :
565 36 : disp->lanes[i].linear_block_number = 0UL;
566 36 : disp->lanes[i].inserted_cnt = 0U;
567 36 : disp->lanes[i].dispatched_cnt = 0U;
568 36 : disp->lanes[i].completed_cnt = 0U;
569 36 : }
570 :
571 9 : acct_map_new( _acct_map, acct_chain_cnt, fd_ulong_hash( seed+1UL ) );
572 9 : acct_map_new( _freea_map, acct_chain_cnt, fd_ulong_hash( seed+2UL ) );
573 :
574 9 : free_dlist_t * temp_join = free_dlist_join( free_dlist_new( disp->free_acct_dlist ) );
575 12659529 : for( ulong i=1UL; i<acct_depth+1UL; i++ ) {
576 12659520 : apool[ i ].next = apool[ i ].prev = 0U;
577 12659520 : free_dlist_idx_push_tail( disp->free_acct_dlist, i, apool );
578 12659520 : }
579 9 : free_dlist_leave( temp_join );
580 :
581 9 : return disp;
582 9 : }
583 :
584 : fd_rdisp_t *
585 9 : fd_rdisp_join( void * mem ) {
586 9 : fd_rdisp_t * disp = (fd_rdisp_t *)mem;
587 :
588 9 : ulong depth = disp->depth;
589 9 : ulong block_depth = disp->block_depth;
590 9 : ulong chain_cnt = block_map_chain_cnt_est( block_depth );
591 9 : ulong acct_depth = depth*MAX_ACCT_PER_TXN;
592 9 : ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
593 :
594 9 : FD_SCRATCH_ALLOC_INIT( l, mem );
595 9 : /* */ FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), sizeof(fd_rdisp_t) );
596 9 : void * _pool = FD_SCRATCH_ALLOC_APPEND( l, pool_align(), pool_footprint ( depth+1UL ) );
597 9 : void * _unstaged = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL ) );
598 9 : void * _bmap = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(), block_map_footprint ( chain_cnt ) );
599 9 : void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(), block_pool_footprint ( block_depth+1UL ) );
600 9 : uchar * _pending = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(), 4UL*pending_prq_footprint ( depth ) );
601 9 : void * _acct_map = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) );
602 9 : void * _freea_map = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) );
603 9 : acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t), (acct_depth+1UL)*sizeof(acct_info_t) );
604 9 : FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
605 :
606 9 : disp->pool = pool_join( _pool );
607 9 : disp->unstaged = (fd_rdisp_unstaged_t *)_unstaged;
608 9 : disp->blockmap = block_map_join( _bmap );
609 9 : disp->block_pool = block_pool_join( _bpool );
610 :
611 196683 : for( ulong i=0UL; i<block_depth+1UL; i++ ) zombie_dlist_join( disp->block_pool[ i ].zombie_list );
612 :
613 45 : for( ulong i=0UL; i<4UL; i++ ) {
614 36 : disp->lanes[i].pending = pending_prq_join( _pending );
615 36 : _pending += pending_prq_footprint( depth );
616 36 : }
617 :
618 9 : disp->acct_map = acct_map_join( _acct_map );
619 9 : disp->free_acct_map = acct_map_join( _freea_map );
620 9 : disp->acct_pool = apool;
621 9 : free_dlist_join( disp->free_acct_dlist );
622 :
623 9 : return disp;
624 9 : }
625 :
626 : static inline void
627 : free_lane( fd_rdisp_t * disp,
628 12027 : ulong staging_lane ) {
629 12027 : disp->free_lanes |= 1<<staging_lane;
630 12027 : per_lane_info_t * l = disp->lanes+staging_lane;
631 12027 : FD_TEST( pending_prq_cnt( l->pending )==0UL );
632 12027 : l->linear_block_number = 0UL;
633 12027 : l->inserted_cnt = 0UL;
634 12027 : l->dispatched_cnt = 0UL;
635 12027 : l->completed_cnt = 0UL;
636 12027 : block_slist_delete( block_slist_leave( l->block_ll ) );
637 12027 : }
638 :
639 : static inline void
640 : alloc_lane( fd_rdisp_t * disp,
641 12030 : ulong staging_lane ) {
642 12030 : disp->free_lanes &= ~(1<<staging_lane);
643 12030 : block_slist_join( block_slist_new( disp->lanes[staging_lane].block_ll ) );
644 12030 : }
645 :
646 : ulong
647 : fd_rdisp_suggest_staging_lane( fd_rdisp_t const * disp,
648 : FD_RDISP_BLOCK_TAG_T parent_block,
649 0 : int duplicate ) {
650 :
651 : /* 1. If it's a duplicate, suggest FD_RDISP_UNSTAGED */
652 0 : if( FD_UNLIKELY( duplicate ) ) return FD_RDISP_UNSTAGED;
653 :
654 : /* 2. If parent is the last block in any existing staging lane, suggest
655 : that lane */
656 0 : fd_rdisp_blockinfo_t const * block_pool = disp->block_pool;
657 0 : fd_rdisp_blockinfo_t const * block = block_map_ele_query_const( disp->blockmap, &parent_block, NULL, block_pool );
658 0 : if( FD_LIKELY( block && block->insert_ready && block->staged ) ) return block->staging_lane;
659 :
660 : /* 3. If there is at least one free lane, suggest a free lane */
661 0 : if( FD_LIKELY( disp->free_lanes!=0 ) ) return (ulong)fd_uint_find_lsb( (uint)disp->free_lanes );
662 :
663 : /* 4. Else, suggest FD_RDISP_UNSTAGED */
664 0 : return FD_RDISP_UNSTAGED;
665 0 : }
666 :
667 : int
668 : fd_rdisp_add_block( fd_rdisp_t * disp,
669 : FD_RDISP_BLOCK_TAG_T new_block,
670 405252 : ulong staging_lane ) {
671 405252 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
672 :
673 405252 : if( FD_UNLIKELY( !block_pool_free( block_pool ) ) ) return -1;
674 405252 : if( FD_UNLIKELY( ULONG_MAX!=block_map_idx_query_const( disp->blockmap, &new_block, ULONG_MAX, block_pool ) ) ) return -1;
675 405246 : fd_rdisp_blockinfo_t * block = block_pool_ele_acquire( block_pool );
676 405246 : block->block = new_block;
677 405246 : block_map_ele_insert( disp->blockmap, block, block_pool );
678 :
679 405246 : block->insert_ready = 1;
680 405246 : block->staged = staging_lane!=FD_RDISP_UNSTAGED;
681 405246 : block->staging_lane = (uint)(staging_lane & 0x3UL);
682 405246 : block->last_insert_was_serializing = 0U;
683 :
684 405246 : block->inserted_cnt = 0U;
685 405246 : block->dispatched_cnt = 0U;
686 405246 : block->completed_cnt = 0U;
687 405246 : block->last_serializing = 0U;
688 :
689 405246 : if( FD_UNLIKELY( staging_lane==FD_RDISP_UNSTAGED ) ) {
690 3 : block->schedule_ready = 1;
691 3 : block->linear_block_number = (uint)disp->unstaged_lblk_num++;
692 3 : unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
693 405243 : } else {
694 405243 : block->linear_block_number = (uint)disp->lanes[staging_lane].linear_block_number++;
695 405243 : block->schedule_ready = (uint)(1 & (disp->free_lanes >> staging_lane));
696 :
697 405243 : if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
698 :
699 405243 : block_slist_t * sl = disp->lanes[staging_lane].block_ll;
700 405243 : if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
701 405243 : block_slist_ele_push_tail( sl, block, block_pool );
702 405243 : }
703 405246 : return 0;
704 405252 : }
705 :
706 :
707 :
708 : int
709 : fd_rdisp_remove_block( fd_rdisp_t * disp,
710 405243 : FD_RDISP_BLOCK_TAG_T block_tag ) {
711 405243 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
712 :
713 405243 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
714 405243 : if( FD_UNLIKELY( block==NULL ) ) return -1;
715 :
716 405240 : FD_TEST( block->schedule_ready );
717 405240 : FD_TEST( block->completed_cnt==block->inserted_cnt );
718 405240 : FD_TEST( zombie_dlist_is_empty( block->zombie_list, disp->pool ) );
719 :
720 405240 : if( FD_LIKELY( block->staged ) ) {
721 405240 : ulong staging_lane = (ulong)block->staging_lane;
722 405240 : block_slist_t * sl = disp->lanes[staging_lane].block_ll;
723 :
724 405240 : FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
725 405240 : block_slist_idx_pop_head( sl, block_pool );
726 405240 : if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
727 12024 : else free_lane( disp, staging_lane );
728 405240 : } else {
729 0 : unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
730 0 : }
731 405240 : block_pool_idx_release( block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, block_pool ) );
732 :
733 405240 : return 0;
734 405240 : }
735 :
736 :
737 : int
738 : fd_rdisp_abandon_block( fd_rdisp_t * disp,
739 6 : FD_RDISP_BLOCK_TAG_T block_tag ) {
740 6 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
741 :
742 6 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &block_tag, NULL, disp->block_pool );
743 6 : if( FD_UNLIKELY( block==NULL ) ) return -1;
744 :
745 3 : FD_TEST( block->schedule_ready );
746 3 : FD_TEST( block->dispatched_cnt==block->completed_cnt ); /* TODO: remove this when it can call complete properly */
747 12 : while( block->completed_cnt<block->inserted_cnt ) {
748 : /* because there is nothing DISPATCHED, there has to be something
749 : READY */
750 9 : ulong txn = fd_rdisp_get_next_ready( disp, block_tag );
751 9 : FD_TEST( txn );
752 9 : fd_rdisp_complete_txn( disp, txn, 1 );
753 9 : }
754 3 : while( !zombie_dlist_is_empty( block->zombie_list, disp->pool ) ) {
755 0 : fd_rdisp_complete_txn( disp, zombie_dlist_idx_pop_head( block->zombie_list, disp->pool ), 1 );
756 0 : }
757 :
758 3 : if( FD_LIKELY( block->staged ) ) {
759 3 : ulong staging_lane = (ulong)block->staging_lane;
760 3 : block_slist_t * sl = disp->lanes[staging_lane].block_ll;
761 :
762 3 : FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
763 3 : block_slist_idx_pop_head( sl, block_pool );
764 3 : if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
765 0 : else free_lane( disp, staging_lane );
766 3 : } else {
767 0 : unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
768 0 : }
769 3 : block_pool_idx_release( disp->block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, disp->block_pool ) );
770 :
771 3 : return 0;
772 3 : }
773 :
774 : static void
775 : add_edges( fd_rdisp_t * disp,
776 : fd_rdisp_txn_t * ele,
777 : fd_acct_addr_t const * addr,
778 : ulong addr_cnt,
779 : uint staging_lane,
780 : int writable,
781 : int update_score );
782 : /* updates in_degree, edge_cnt_etc */
783 :
784 : int
785 : fd_rdisp_promote_block( fd_rdisp_t * disp,
786 : FD_RDISP_BLOCK_TAG_T block_tag,
787 6 : ulong staging_lane ) {
788 6 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
789 6 : per_lane_info_t * lane = disp->lanes + staging_lane;
790 6 : block_slist_t * sl = lane->block_ll;
791 :
792 6 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
793 6 : if( FD_UNLIKELY( block==NULL ) ) return -1;
794 6 : if( FD_UNLIKELY( block->staged ) ) return -1;
795 :
796 6 : block->staged = 1;
797 6 : block->staging_lane = (uint)(staging_lane & 0x3);
798 6 : block->insert_ready = 1;
799 6 : block->schedule_ready = (uint)(1 & (disp->free_lanes >> staging_lane));
800 :
801 6 : if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
802 :
803 6 : if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
804 6 : block_slist_ele_push_tail( sl, block, block_pool );
805 6 : uint linear_block_number = (uint)lane->linear_block_number++;
806 6 : block->linear_block_number = linear_block_number;
807 :
808 6 : unstaged_txn_ll_iter_t next;
809 6 : for( unstaged_txn_ll_iter_t iter = unstaged_txn_ll_iter_init( block->ll, disp->pool );
810 24 : !unstaged_txn_ll_iter_done( iter, block->ll, disp->pool );
811 18 : iter = next ) {
812 18 : next = unstaged_txn_ll_iter_next( iter, block->ll, disp->pool );
813 :
814 18 : fd_rdisp_txn_t * ele = unstaged_txn_ll_iter_ele( iter, block->ll, disp->pool );
815 18 : fd_rdisp_unstaged_t * uns = disp->unstaged + unstaged_txn_ll_iter_idx( iter, block->ll, disp->pool );
816 18 : FD_TEST( ele->in_degree==IN_DEGREE_UNSTAGED );
817 :
818 18 : ele->in_degree = 0U;
819 18 : ele->edge_cnt_etc = 0U;
820 :
821 18 : add_edges( disp, ele, uns->keys, uns->writable_cnt, (uint)staging_lane, 1, 0 );
822 18 : add_edges( disp, ele, uns->keys+uns->writable_cnt, uns->readonly_cnt, (uint)staging_lane, 0, 0 );
823 :
824 18 : ele->edge_cnt_etc |= (uint)staging_lane<<14;
825 18 : ele->edge_cnt_etc |= linear_block_number<<16;
826 :
827 18 : if( FD_UNLIKELY( ele->in_degree==0U ) ) {
828 3 : pending_prq_ele_t temp[1] = {{ .score = ele->score, .linear_block_number = linear_block_number, .txn_idx = (uint)(ele-disp->pool)}};
829 3 : pending_prq_insert( lane->pending, temp );
830 3 : }
831 18 : }
832 6 : unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
833 :
834 6 : lane->inserted_cnt += block->inserted_cnt;
835 6 : lane->dispatched_cnt += block->dispatched_cnt;
836 6 : lane->completed_cnt += block->completed_cnt;
837 :
838 6 : return 0;
839 6 : }
840 :
841 : int
842 : fd_rdisp_demote_block( fd_rdisp_t * disp,
843 3 : FD_RDISP_BLOCK_TAG_T block_tag ) {
844 3 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
845 :
846 3 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
847 3 : if( FD_UNLIKELY( block==NULL ) ) return -1;
848 3 : if( FD_UNLIKELY( !block->staged ) ) return -1;
849 3 : if( FD_UNLIKELY( !block->schedule_ready ) ) return -1;
850 3 : if( FD_UNLIKELY( block->completed_cnt!=block->inserted_cnt ) ) FD_LOG_ERR(( "demote_block called with non-empty block" ));
851 3 : ulong staging_lane = block->staging_lane;
852 3 : block->staged = 0;
853 :
854 3 : per_lane_info_t * lane = disp->lanes + staging_lane;
855 3 : block_slist_t * sl = lane->block_ll;
856 :
857 3 : lane->inserted_cnt -= block->inserted_cnt;
858 3 : lane->dispatched_cnt -= block->dispatched_cnt;
859 3 : lane->completed_cnt -= block->completed_cnt;
860 :
861 3 : block->linear_block_number = (uint)disp->unstaged_lblk_num++;
862 :
863 : /* staged and schedule_ready means it must be the head of the staging lane */
864 3 : FD_TEST( block_slist_ele_peek_head( sl, block_pool )==block );
865 3 : block_slist_idx_pop_head( sl, block_pool );
866 :
867 3 : unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
868 :
869 3 : if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
870 3 : else free_lane( disp, staging_lane );
871 3 : return 0;
872 3 : }
873 :
874 : int
875 : fd_rdisp_rekey_block( fd_rdisp_t * disp,
876 : FD_RDISP_BLOCK_TAG_T new_tag,
877 0 : FD_RDISP_BLOCK_TAG_T old_tag ) {
878 0 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
879 :
880 0 : if( FD_UNLIKELY( NULL!= block_map_ele_query_const( disp->blockmap, &new_tag, NULL, block_pool ) ) ) return -1;
881 0 : fd_rdisp_blockinfo_t * block = block_map_ele_query ( disp->blockmap, &old_tag, NULL, block_pool );
882 0 : if( FD_UNLIKELY( NULL== block ) ) return -1;
883 :
884 0 : block->block = new_tag;
885 0 : block_map_ele_insert( disp->blockmap, block, block_pool );
886 0 : return 0;
887 0 : }
888 :
889 :
890 : /* "Registers" a reference to the account in info at transaction
891 : global_insert_cnt. Returns the value of the EMA, which is an
892 : estimate of the probability that the next transaction also references
893 : the account. This value does not matter for correctness, which is
894 : why floating point arithmetic is okay. */
895 : static inline float
896 : update_ema( acct_info_t * info,
897 9186144 : ulong global_insert_cnt ) {
898 : #if FD_RDISP_DISABLE_EMA
899 : (void)info;
900 : (void)global_insert_cnt;
901 : return 0.0f;
902 : #else
903 18372288 : #define ALPHA 0.005f
904 : /* The normal EMA update equation is
905 : e_i = (alpha) * x_i + (1-alpha)*e_{i-1},
906 : where alpha is a constant in (0,1). Let L be the last reference of
907 : the account, and G be the current global_insert_cnt value. We know
908 : that e_L = ema_refs, and that for i in [L+1, G), x_i = 0.
909 : That means
910 : e_{G-1} = (1-alpha)^(G-L-1) * ema_refs
911 : e_G = alpha + (1-alpha)^(G-L) * ema_refs
912 : */
913 : /* last_ref only captures the low 24 bits, so we guess its the highest
914 : value for them that would still make it less than
915 : global_insert_cnt. Turns out, we can calculate that with just an
916 : AND. */
917 9186144 : ulong last_ref = (ulong)info->last_ref;
918 9186144 : ulong delta = (global_insert_cnt - last_ref) & 0xFFFFFFUL;
919 9186144 : float ema_refs = ALPHA + powf( 1.0f-ALPHA, (float)delta ) * info->ema_refs;
920 :
921 9186144 : info->ema_refs = ema_refs;
922 9186144 : info->last_ref = (uint)(global_insert_cnt & 0xFFFFFFUL);
923 9186144 : #undef ALPHA
924 :
925 9186144 : return ema_refs;
926 9186144 : #endif
927 9186144 : }
928 :
929 : static void
930 : add_edges( fd_rdisp_t * disp,
931 : fd_rdisp_txn_t * ele,
932 : fd_acct_addr_t const * addrs,
933 : ulong addr_cnt,
934 : uint lane,
935 : int writable,
936 6998862 : int update_score ) {
937 :
938 6998862 : ulong acct_idx = (ele->edge_cnt_etc & 0x7FU) + ((ele->edge_cnt_etc>>7) & 0x7FU);
939 6998862 : ulong edge_idx = (ele->edge_cnt_etc & 0x7FU) + 3UL*((ele->edge_cnt_etc>>7) & 0x7FU);
940 :
941 16184931 : for( ulong i=0UL; i<addr_cnt; i++ ) {
942 9186069 : fd_acct_addr_t const * addr = addrs+i;
943 9186069 : acct_info_t * ai = NULL;
944 :
945 : /* Step 1: lookup the pubkey */
946 9186069 : ulong idx = acct_map_idx_query( disp->acct_map, addr, ULONG_MAX, disp->acct_pool );
947 9186069 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
948 1873431 : idx = acct_map_idx_query( disp->free_acct_map, addr, ULONG_MAX, disp->acct_pool );
949 1873431 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
950 : /* The acct pool is sized so that the list cannot be empty at this
951 : point. However, the element at the head might be the free
952 : map with a different pubkey. */
953 197907 : idx = free_dlist_idx_peek_head( disp->free_acct_dlist, disp->acct_pool );
954 197907 : ai = disp->acct_pool+idx;
955 :
956 : /* CACHED -> FREE transition */
957 197907 : if( FD_LIKELY( ai->next!=0U ) ) {
958 177357 : acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
959 177357 : }
960 :
961 : /* FREE -> ACTIVE transition */
962 197907 : ai->key = *addr;
963 197907 : ai->flags = 0U;
964 197907 : ai->last_ref = 0U;
965 197907 : ai->ema_refs = 0.0f;
966 1675524 : } else {
967 : /* CACHED -> ACTIVE transition */
968 1675524 : ai = disp->acct_pool+idx;
969 1675524 : ai->flags = 0U; /* FIXME: unnecessary */
970 1675524 : acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
971 1675524 : }
972 : /* In either case, at this point, the element is not in any map
973 : but is in free_acct_dlist. It has the right key. last_ref, and
974 : ema_refs are valid. flags is 0. */
975 1873431 : free_dlist_idx_remove( disp->free_acct_dlist, idx, disp->acct_pool );
976 1873431 : memset( ai->last_reference, '\0', sizeof(ai->last_reference) );
977 1873431 : acct_map_idx_insert( disp->acct_map, idx, disp->acct_pool );
978 1873431 : }
979 9186069 : ai = disp->acct_pool+idx;
980 : /* At this point, in all cases, the acct_info is now in the ACTIVE
981 : state. It's in acct_map, not in free_acct_map, and not in
982 : free_acct_dlist. */
983 :
984 : /* Assume that transactions are drawn randomly from some large
985 : distribution of potential transactions. We want to estimate the
986 : expected value of the probability that this transaction conflicts
987 : with the next transaction that is sampled. Two transactions
988 : conflict if they conflict on any account, and in general, they
989 : conflict if they both reference the same account, unless both
990 : this transaction and the next one only read it. We don't have
991 : read/write info, so the best guess we have is that the next
992 : transaction does the same thing to the account that this one
993 : does. That means we only really care about the accounts that
994 : this transaction writes to. Label those a_1, a_2, ..., a_i, and
995 : suppose the probability that the next transaction references a_i
996 : is p_i (determined using the EMA). Now then, assuming accounts
997 : are independent (which is false, but whatever), then the
998 : probability that the next transaction does not conflict with this
999 : account is:
1000 : (1-p_1) * (1-p_2) * (1 - p_i).
1001 :
1002 : Since for the treap, a lower value means we'll schedule it
1003 : earlier, we'll use the probability of non-conflict as the
1004 : fractional part of the score. */
1005 9186069 : float score_change = 1.0f - update_ema( ai, disp->global_insert_cnt );
1006 9186069 : ele->score *= fd_float_if( writable & update_score, score_change, 1.0f );
1007 :
1008 : /* Step 2: add edge. There are 4 cases depending on whether this is
1009 : a writer or not and whether the previous reference was a writer
1010 : or not. */
1011 9186069 : int _ignore;
1012 :
1013 9186069 : edge_t ref_to_pa = ai->last_reference[ lane ];
1014 9186069 : edge_t ref_to_me = (uint)((ulong)((ele - disp->pool)<<8) | acct_idx);
1015 9186069 : edge_t * pa = FOLLOW_EDGE( disp->pool, ai->last_reference[ lane ], _ignore );
1016 9186069 : edge_t * me = ele->edges + edge_idx;
1017 :
1018 : /* In the case that this is the first txn in the DAG, pa will point
1019 : to edges[0] of the sentinel element, pool[0]. We don't care
1020 : about what is stored there, so just set it up as a dummy element
1021 : to make the rest of the code work properly in this case too. If
1022 : this is a read, we want me->sibli to ref_to_me in case 4; if this
1023 : is a write, we want to set me->sibli to 0 in case 2, but we need
1024 : to make sure pb==pa. */
1025 9186069 : disp->pool->edges[0] = (1U<<31) | (uint)idx;
1026 9186069 : disp->pool->edges[1] = fd_uint_if( writable, 0U, ref_to_me );
1027 9186069 : disp->pool->edges[2] = fd_uint_if( writable, 0U, ref_to_me );
1028 :
1029 9186069 : int flags = ai->flags;
1030 :
1031 9186069 : if( writable ) { /* also should be known at compile time */
1032 3588582 : if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
1033 : /* Case 1: w-w. The parent is the special last pointer. Point
1034 : the parent to me, and set me to the last pointer. */
1035 1171821 : *me = *pa;
1036 1171821 : *pa = ref_to_me;
1037 2416761 : } else {
1038 : /* Case 2: r-w. This is the tricky case because there could be
1039 : multiple readers. We need to set all the last readers' child
1040 : pointers to me. */
1041 2416761 : *me = *pa;
1042 2416761 : *pa = ref_to_me;
1043 2416761 : edge_t * pb = FOLLOW_EDGE( disp->pool, pa[1], _ignore );
1044 : /* Intentionally skip the first in_degree increment, because it
1045 : will be done later */
1046 2810415 : while( pb!=pa ) {
1047 393654 : *pb = ref_to_me;
1048 393654 : ele->in_degree++;
1049 393654 : pb = FOLLOW_EDGE( disp->pool, pb[1], _ignore );
1050 393654 : }
1051 2416761 : flags |= ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) | ACCT_INFO_FLAG_ANY_WRITERS( lane );
1052 2416761 : }
1053 5597487 : } else {
1054 5597487 : if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
1055 : /* Case 3: w-r. Similar to case 1, but need to initialize my
1056 : next and prev sibling pointers too. */
1057 423078 : *me = *pa;
1058 423078 : *pa = ref_to_me;
1059 423078 : me[1] = ref_to_me; /* next */
1060 423078 : me[2] = ref_to_me; /* prev */
1061 423078 : flags &= ~(int)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ); /* clear bit */
1062 5174409 : } else {
1063 : /* Case 4: r-r. Add myself as a sibling instead of a child */
1064 5174409 : *me = *pa;
1065 5174409 : FOLLOW_EDGE( disp->pool, pa[1], _ignore )[2] = ref_to_me; /* prev->next->prev = me */
1066 5174409 : me[1] = pa[1]; /* me->next = prev->next */
1067 5174409 : me[2] = ref_to_pa; /* me->prev = prev */
1068 5174409 : pa[1] = ref_to_me; /* prev->next = me */
1069 5174409 : }
1070 5597487 : }
1071 :
1072 : /* Step 3: Update the final values */
1073 : /* In general, we want to increment the in_degree unless this
1074 : transaction is the first to reference this account. The
1075 : exception is that if this account has only readers, including
1076 : this transaction, we don't want to increment the in_degree
1077 : either. At this point, we can tell if that is the case based on
1078 : ANY_WRITERS. */
1079 9186069 : ele->in_degree += (uint)((ai->last_reference[ lane ]!=0U) & !!(flags & ACCT_INFO_FLAG_ANY_WRITERS( lane )));
1080 9186069 : ai->last_reference[ lane ] = ref_to_me;
1081 9186069 : ai->flags = (uchar)flags;
1082 9186069 : edge_idx += fd_uint_if( writable, 1U, 3U );
1083 9186069 : acct_idx++;
1084 9186069 : }
1085 6998862 : ele->edge_cnt_etc += (uint)addr_cnt<<fd_int_if( writable, 0, 7 ); /* Can't overflow by construction */
1086 6998862 : }
1087 :
1088 :
1089 : /* should be called with all writable accounts first */
1090 : static void
1091 : add_unstaged_edges( fd_rdisp_t * disp,
1092 : fd_rdisp_txn_t * ele,
1093 : fd_rdisp_unstaged_t * unstaged,
1094 : fd_acct_addr_t const * addr,
1095 : ulong addr_cnt,
1096 : int writable,
1097 216 : int update_score ) {
1098 216 : ulong base_idx = unstaged->writable_cnt+unstaged->readonly_cnt;
1099 216 : FD_TEST( !writable || unstaged->readonly_cnt==0U );
1100 366 : for( ulong i=0UL; i<addr_cnt; i++ ) {
1101 150 : unstaged->keys[ base_idx+i ] = addr[i];
1102 150 : if( FD_LIKELY( update_score ) ) {
1103 150 : ulong idx = acct_map_idx_query( disp->acct_map, addr+i, ULONG_MAX, disp->acct_pool );
1104 150 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) idx = acct_map_idx_query( disp->free_acct_map, addr+i, ULONG_MAX, disp->acct_pool );
1105 : /* since these are unstaged, we don't bother moving accounts
1106 : around */
1107 150 : float score_change = 1.0f;
1108 150 : if( FD_LIKELY( idx!=ULONG_MAX ) ) score_change = 1.0f - update_ema( disp->acct_pool+idx, disp->global_insert_cnt );
1109 150 : ele->score *= fd_float_if( writable & update_score, score_change, 1.0f );
1110 150 : }
1111 150 : }
1112 216 : *(fd_ptr_if( writable, &(unstaged->writable_cnt), &(unstaged->readonly_cnt) ) ) += (uint)addr_cnt;
1113 216 : }
1114 :
1115 : ulong
1116 : fd_rdisp_add_txn( fd_rdisp_t * disp,
1117 : FD_RDISP_BLOCK_TAG_T insert_block,
1118 : fd_txn_t const * txn,
1119 : uchar const * payload,
1120 : fd_acct_addr_t const * alts,
1121 1166511 : int serializing ) {
1122 :
1123 1166511 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &insert_block, NULL, disp->block_pool );
1124 1166511 : if( FD_UNLIKELY( !block || !block->insert_ready ) ) return 0UL;
1125 1166508 : if( FD_UNLIKELY( !pool_free( disp->pool ) ) ) return 0UL;
1126 :
1127 1166508 : ulong idx = pool_idx_acquire( disp->pool );
1128 1166508 : fd_rdisp_txn_t * rtxn = disp->pool + idx;
1129 1166508 : if( FD_UNLIKELY( rtxn->in_degree!=IN_DEGREE_FREE ) ) FD_LOG_CRIT(( "pool[%lu].in_degree==%u but free", idx, rtxn->in_degree ));
1130 :
1131 1166508 : fd_acct_addr_t const * imm_addrs = fd_txn_get_acct_addrs( txn, payload );
1132 :
1133 1166508 : if( FD_UNLIKELY( !block->staged ) ) {
1134 36 : rtxn->in_degree = IN_DEGREE_UNSTAGED;
1135 36 : rtxn->score = 0.999f;
1136 :
1137 36 : fd_rdisp_unstaged_t * unstaged = disp->unstaged + idx;
1138 36 : unstaged->block = insert_block;
1139 36 : unstaged->writable_cnt = 0U;
1140 36 : unstaged->readonly_cnt = 0U;
1141 :
1142 36 : add_unstaged_edges( disp, rtxn, unstaged, imm_addrs,
1143 36 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ), 1, 1 );
1144 36 : add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
1145 36 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), 1, 1 );
1146 36 : if( FD_LIKELY( alts ) )
1147 36 : add_unstaged_edges( disp, rtxn, unstaged, alts,
1148 36 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ), 1, 1 );
1149 36 : add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
1150 36 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER ), 0, 1 );
1151 36 : add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
1152 36 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), 0, 1 );
1153 36 : if( FD_LIKELY( alts ) )
1154 36 : add_unstaged_edges( disp, rtxn, unstaged, alts +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
1155 36 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ), 0, 1 );
1156 :
1157 36 : unstaged_txn_ll_ele_push_tail( block->ll, rtxn, disp->pool );
1158 1166472 : } else {
1159 1166472 : uint lane = block->staging_lane;
1160 :
1161 1166472 : rtxn->in_degree = 0U;
1162 1166472 : rtxn->score = 0.999f;
1163 1166472 : rtxn->edge_cnt_etc = (block->linear_block_number<<16) | (lane<<14);
1164 :
1165 1166472 : add_edges( disp, rtxn, imm_addrs,
1166 1166472 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ), lane, 1, 1 );
1167 1166472 : add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
1168 1166472 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), lane, 1, 1 );
1169 1166472 : if( FD_LIKELY( alts ) )
1170 1166469 : add_edges( disp, rtxn, alts,
1171 1166469 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ), lane, 1, 1 );
1172 1166472 : add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
1173 1166472 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER ), lane, 0, 1 );
1174 1166472 : add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
1175 1166472 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), lane, 0, 1 );
1176 1166472 : if( FD_LIKELY( alts ) )
1177 1166469 : add_edges( disp, rtxn, alts +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
1178 1166469 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ), lane, 0, 1 );
1179 1166472 : }
1180 :
1181 1166508 : if( FD_UNLIKELY( serializing | block->last_insert_was_serializing ) ) {
1182 6 : block->last_serializing = block->inserted_cnt;
1183 6 : }
1184 1166508 : block->last_insert_was_serializing = (uint)!!serializing;
1185 1166508 : rtxn->score += (float)block->last_serializing;
1186 :
1187 1166508 : block->inserted_cnt++;
1188 1166508 : disp->global_insert_cnt++;
1189 :
1190 1166508 : if( FD_LIKELY( (block->staged) & (rtxn->in_degree==0U) ) ) {
1191 575862 : pending_prq_ele_t temp[1] = {{ .score = rtxn->score, .linear_block_number = block->linear_block_number, .txn_idx = (uint)idx }};
1192 575862 : pending_prq_insert( disp->lanes[ block->staging_lane ].pending, temp );
1193 575862 : }
1194 :
1195 1166508 : return idx;
1196 1166508 : }
1197 :
1198 : ulong
1199 : fd_rdisp_get_next_ready( fd_rdisp_t * disp,
1200 2023440 : FD_RDISP_BLOCK_TAG_T schedule_block ) {
1201 2023440 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &schedule_block, NULL, disp->block_pool );
1202 2023440 : if( FD_UNLIKELY( !block || !block->schedule_ready ) ) return 0UL;
1203 :
1204 2023425 : ulong idx;
1205 2023425 : if( FD_LIKELY( block->staged ) ) {
1206 2023398 : ulong staging_lane = block->staging_lane;
1207 2023398 : per_lane_info_t * l = disp->lanes + staging_lane;
1208 :
1209 2023398 : if( FD_UNLIKELY( !pending_prq_cnt( l->pending ) ) ) return 0UL;
1210 1166496 : if( FD_UNLIKELY( l->pending->linear_block_number != block->linear_block_number ) ) return 0UL;
1211 : /* e.g. when completed_cnt==0, we can accept any score below 1.0 */
1212 1166490 : if( FD_UNLIKELY( l->pending->score>=(float)(block->completed_cnt+1U) ) ) return 0UL;
1213 1166490 : idx = l->pending->txn_idx;
1214 1166490 : pending_prq_remove_min( l->pending );
1215 1166490 : disp->pool[ idx ].in_degree = IN_DEGREE_DISPATCHED;
1216 1166490 : } else {
1217 27 : if( FD_UNLIKELY( block->dispatched_cnt!=block->completed_cnt ) ) return 0UL;
1218 24 : if( FD_UNLIKELY( unstaged_txn_ll_is_empty( block->ll, disp->pool ) ) ) return 0UL;
1219 18 : idx = unstaged_txn_ll_idx_peek_head( block->ll, disp->pool );
1220 18 : disp->pool[ idx ].in_degree = IN_DEGREE_UNSTAGED_DISPATCHED;
1221 18 : }
1222 1166508 : block->dispatched_cnt++;
1223 :
1224 1166508 : return idx;
1225 2023425 : }
1226 :
1227 : void
1228 : fd_rdisp_complete_txn( fd_rdisp_t * disp,
1229 : ulong txn_idx,
1230 1166511 : int reclaim ) {
1231 :
1232 1166511 : fd_rdisp_txn_t * rtxn = disp->pool + txn_idx;
1233 1166511 : fd_rdisp_blockinfo_t * block = NULL;
1234 :
1235 1166511 : if( FD_UNLIKELY( rtxn->in_degree==IN_DEGREE_UNSTAGED_DISPATCHED ) ) {
1236 : /* Unstaged */
1237 18 : block = block_map_ele_query( disp->blockmap, &disp->unstaged[ txn_idx ].block, NULL, disp->block_pool );
1238 18 : FD_TEST( rtxn==unstaged_txn_ll_ele_peek_head( block->ll, disp->pool ) );
1239 18 : unstaged_txn_ll_ele_pop_head( block->ll, disp->pool );
1240 18 : block->completed_cnt++;
1241 1166493 : } else if( FD_LIKELY( rtxn->in_degree==IN_DEGREE_DISPATCHED ) ) {
1242 : /* Staged */
1243 1166490 : ulong w_cnt = (rtxn->edge_cnt_etc ) & 0x7FU;
1244 1166490 : ulong r_cnt = (rtxn->edge_cnt_etc>> 7) & 0x7FU;
1245 1166490 : ulong lane = (rtxn->edge_cnt_etc>>14) & 0x3U;
1246 1166490 : uint tail_linear_block_num = (uint)(disp->lanes[lane].linear_block_number);
1247 1166490 : ulong edge_idx = 0UL;
1248 10352559 : for( ulong i=0UL; i<w_cnt+r_cnt; i++ ) {
1249 9186069 : edge_t const * e = rtxn->edges+edge_idx;
1250 9186069 : edge_t const e0 = *e;
1251 9186069 : edge_t ref_to_me = (uint)((txn_idx<<8) | i);
1252 :
1253 : /* To help with explanations, consider the following DAG:
1254 :
1255 : --> B --\ --> E --\
1256 : / V / V
1257 : A D (G)
1258 : \ ^ \ ^
1259 : --> C --/ --> F --/ */
1260 :
1261 9186069 : if( FD_UNLIKELY( EDGE_IS_LAST( e0 ) ) ) {
1262 6682734 : ulong acct_idx = e0 & 0x7FFFFFFFU;
1263 6682734 : acct_info_t * ai = disp->acct_pool + acct_idx;
1264 : /* If this is a writer, e.g. node G above, we know it's the last
1265 : one, so we can clear last_reference.
1266 : If this is a reader, e.g. node E and F above, if node G
1267 : didn't exist, we need to check if it's the last one
1268 : (me==me->next). If so, we can clear last_reference. If not,
1269 : we need to delete this node from the linked list */
1270 6682734 : if( edge_idx<w_cnt || e[1]==ref_to_me ) {
1271 4103505 : ai->last_reference[ lane ] = 0U;
1272 4103505 : ai->flags = (uchar)(ai->flags & (~(ACCT_INFO_FLAG_ANY_WRITERS( lane ) | ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ))));
1273 4103505 : } else {
1274 2579229 : int _ignore;
1275 2579229 : FOLLOW_EDGE( disp->pool, e[1], _ignore )[2] = e[2]; /* me->next->prev = me->prev */
1276 2579229 : FOLLOW_EDGE( disp->pool, e[2], _ignore )[1] = e[1]; /* me->prev->next = me->next */
1277 2579229 : ai->last_reference[ lane ]= fd_uint_if( ai->last_reference[ lane ]==ref_to_me, e[1], ai->last_reference[ lane ] );
1278 2579229 : }
1279 :
1280 : /* Potentially transition from ACTIVE -> CACHED */
1281 6682734 : if( FD_UNLIKELY( (ai->last_reference[ 0 ]==0U)&(ai->last_reference[ 1 ]==0U)&
1282 6682734 : (ai->last_reference[ 2 ]==0U)&(ai->last_reference[ 3 ]==0U) ) ) {
1283 1873431 : ai->flags = 0;
1284 1873431 : acct_map_idx_remove_fast( disp->acct_map, acct_idx, disp->acct_pool );
1285 1873431 : acct_map_idx_insert ( disp->free_acct_map, acct_idx, disp->acct_pool );
1286 1873431 : free_dlist_idx_push_tail( disp->free_acct_dlist, acct_idx, disp->acct_pool );
1287 1873431 : }
1288 6682734 : } else {
1289 2503335 : int child_is_writer;
1290 :
1291 2503335 : edge_t next_e = e0;
1292 2503335 : edge_t const * child_edge;
1293 2713623 : while( 1 ) {
1294 : /* This loop first traverses the me->child link, and then
1295 : traverses any sibling links. For example, in the case that
1296 : we're completing node D above, the first child_txn is E,
1297 : and the second child_txn is F. */
1298 2713623 : /* */ child_edge = FOLLOW_EDGE( disp->pool, next_e, child_is_writer );
1299 2713623 : fd_rdisp_txn_t * child_txn = FOLLOW_EDGE_TXN( disp->pool, next_e );
1300 :
1301 : /* Sanity test */
1302 2713623 : FD_TEST( child_txn->in_degree>0U );
1303 2713623 : FD_TEST( child_txn->in_degree<IN_DEGREE_DISPATCHED );
1304 :
1305 2713623 : if( FD_UNLIKELY( 0U==(--(child_txn->in_degree)) ) ) {
1306 : /* We need an operation something like
1307 : fd_frag_meta_ts_decomp. child_txn has the low 16 bits,
1308 : and tail_linear_block_num has the full 32 bits, except
1309 : for tail_linear_block_num refers to a block < block_depth
1310 : later. Since block_depth<2^16, that means we can resolve
1311 : this unambiguously. Basically, we copy the high 16 bits
1312 : frorm tail_linear_block_num unless that would make
1313 : linear_block_num larger than tail_linear_block_num, in
1314 : which case, we subtract 2^16. */
1315 590625 : uint low_16_bits = child_txn->edge_cnt_etc>>16;
1316 590625 : uint linear_block_num = ((tail_linear_block_num & ~0xFFFFU) | low_16_bits) - (uint)((low_16_bits>(tail_linear_block_num&0xFFFFU))<<16);
1317 590625 : pending_prq_ele_t temp[1] = {{ .score = child_txn->score,
1318 590625 : .linear_block_number = linear_block_num,
1319 590625 : .txn_idx = (uint)(child_txn-disp->pool) }};
1320 590625 : pending_prq_insert( disp->lanes[ lane ].pending, temp );
1321 590625 : }
1322 2713623 : if( child_is_writer || child_edge[1]==e0 ) break;
1323 210288 : next_e = child_edge[1];
1324 210288 : }
1325 : /* In the case that the completed transaction is a reader, say B
1326 : or C above, it seems like we should need to remove it from
1327 : the doubly linked list, but we actually don't. The times
1328 : that we need to read the sibling pointers are:
1329 : 1. Completing the writer before a reader (e.g. completing A)
1330 : 2. Completing a reader that's the last in the DAG (e.g.
1331 : completing E/F if G didn't exist)
1332 : 3. Adding another reader to the same set of readers (e.g. if
1333 : G were added as a reader instead of a writer)
1334 : 4. Adding a writer after a set of readers (e.g. adding G).
1335 :
1336 : Supposing that the completed transaction is a reader, since
1337 : we checked EDGE_IS_LAST( e0 ), we know that there is at least
1338 : one writer that follows this reader, e.g. D above.
1339 :
1340 : And so none of these reason can apply to this group of readers:
1341 : 1. Completing B or C implies that A has already completed,
1342 : so it can't complete again.
1343 : 2. We know that we're not in that case because we checked
1344 : EDGE_IS_LAST( e0 ), and if we're not last now, we cannot
1345 : become last later, because the growth happens in the
1346 : other direction.
1347 : 3. Similarly, because we checked EDGE_IS_LAST, any future
1348 : additions of readers won't be to this group of readers.
1349 : 4. Similarly, we know that there already is a writer that
1350 : follows this group of readers, so a later writer would
1351 : not read this set of readers.
1352 :
1353 : So then, we don't need to deal with the sibling edges. The
1354 : fact that we only need to do it in one case almost calls into
1355 : question whether we need to maintain the whole circular
1356 : system in the first place, and whether we could get away with
1357 : a reference count or something instead, but it's important in
1358 : one critical case: suppose we add a bunch of readers, then
1359 : some of them complete, and then we add a writer (case 4
1360 : above). We need to be able to enumerate the nodes in the DAG
1361 : that have not yet completed, and we need to be able to remove
1362 : them from that set in O(1). Those two requirements don't
1363 : leave us with many alternatives besides a doubly linked list. */
1364 :
1365 2503335 : if( FD_UNLIKELY( EDGE_IS_LAST( *child_edge ) ) ) {
1366 : /* For example, either:
1367 : 1. completing D when if G didn't exist
1368 : 2. completing E or F with G in the DAG
1369 :
1370 : After completing this transaction, there's either 0 writers
1371 : left (case 1) or 1 writer left (case 2) in the DAG. and if
1372 : there is one, it is the last reference.
1373 :
1374 : Either way, we want to set ANY_WRITERS to
1375 : LAST_REF_WAS_WRITE. */
1376 1965492 : acct_info_t * ai = disp->acct_pool + (*child_edge & 0x7FFFFFFFU);
1377 1965492 : ulong flags = ai->flags;
1378 1965492 : flags &= ~(ulong)ACCT_INFO_FLAG_ANY_WRITERS( lane );
1379 1965492 : flags |= (flags & (ulong)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ))<<1;
1380 1965492 : ai->flags = (uchar)flags;
1381 1965492 : }
1382 2503335 : }
1383 9186069 : edge_idx += fd_ulong_if( i<w_cnt, 1UL, 3UL );
1384 9186069 : }
1385 1166490 : block = block_slist_ele_peek_head( disp->lanes[ lane ].block_ll, disp->block_pool );
1386 1166490 : block->completed_cnt++;
1387 1166490 : } else if( FD_LIKELY( rtxn->in_degree==IN_DEGREE_ZOMBIE ) ) {
1388 3 : FD_TEST( reclaim );
1389 3 : block = block_pool_ele( disp->block_pool, rtxn->block_idx );
1390 3 : zombie_dlist_ele_remove( block->zombie_list, rtxn, disp->pool );
1391 : /* Fall through to the pool release branch below. */
1392 3 : } else {
1393 0 : FD_LOG_CRIT(( "completed un-dispatched transaction %lu", txn_idx ));
1394 0 : }
1395 1166511 : if( reclaim ) {
1396 : /* For testing purposes, to make sure we don't read a completed
1397 : transaction, we can clobber the memory. */
1398 : /* memset( disp->pool+txn_idx, '\xCC', sizeof(fd_rdisp_txn_t) ); */
1399 1166508 : rtxn->in_degree = IN_DEGREE_FREE;
1400 1166508 : pool_idx_release( disp->pool, txn_idx );
1401 1166508 : } else {
1402 3 : rtxn->in_degree = IN_DEGREE_ZOMBIE;
1403 3 : zombie_dlist_ele_push_tail( block->zombie_list, rtxn, disp->pool );
1404 3 : rtxn->block_idx = (uint)block_pool_idx( disp->block_pool, block );
1405 3 : }
1406 1166511 : }
1407 :
1408 :
1409 : ulong
1410 : fd_rdisp_staging_lane_info( fd_rdisp_t const * disp,
1411 36 : fd_rdisp_staging_lane_info_t out_sched[ static 4 ] ) {
1412 180 : for( ulong i=0UL; i<4UL; i++ ) {
1413 144 : if( !(disp->free_lanes & (1<<i) ) ) {
1414 51 : block_slist_t const * sl = disp->lanes[ i ].block_ll;
1415 51 : out_sched[ i ].insert_ready_block = block_slist_ele_peek_tail( sl, disp->block_pool )->block;
1416 51 : out_sched[ i ].schedule_ready_block = block_slist_ele_peek_head( sl, disp->block_pool )->block;
1417 51 : }
1418 144 : }
1419 36 : return 0xFUL & ~(ulong)disp->free_lanes;
1420 36 : }
1421 :
1422 : void
1423 : fd_rdisp_verify( fd_rdisp_t const * disp,
1424 776976 : uint * scratch ) {
1425 776976 : ulong acct_depth = disp->depth*MAX_ACCT_PER_TXN;
1426 776976 : ulong block_depth = disp->block_depth;
1427 776976 : FD_TEST( 0==acct_map_verify ( disp->acct_map, acct_depth+1UL, disp->acct_pool ) );
1428 776976 : FD_TEST( 0==acct_map_verify ( disp->free_acct_map, acct_depth+1UL, disp->acct_pool ) );
1429 776976 : FD_TEST( 0==block_map_verify( disp->blockmap, block_depth+1UL, disp->block_pool ) );
1430 :
1431 : /* Check all the in degree counts are right */
1432 776976 : memset( scratch, '\0', sizeof(uint)*(disp->depth+1UL) );
1433 776976 : scratch[ 0 ] = UINT_MAX;
1434 233866176 : for( ulong j=1UL; j<disp->depth+1UL; j++ ) {
1435 233089200 : fd_rdisp_txn_t const * rtxn = disp->pool+j;
1436 233089200 : if( rtxn->in_degree==IN_DEGREE_FREE ) { scratch[ j ]=UINT_MAX; continue; }
1437 :
1438 57500856 : if( (rtxn->in_degree==IN_DEGREE_UNSTAGED_DISPATCHED) |
1439 57500856 : (rtxn->in_degree==IN_DEGREE_UNSTAGED) |
1440 57500856 : (rtxn->in_degree==IN_DEGREE_ZOMBIE) ) continue;
1441 :
1442 57500829 : ulong w_cnt = (rtxn->edge_cnt_etc ) & 0x7FU;
1443 57500829 : ulong r_cnt = (rtxn->edge_cnt_etc>> 7) & 0x7FU;
1444 57500829 : ulong edge_idx = 0UL;
1445 :
1446 721543131 : for( ulong i=0UL; i<w_cnt+r_cnt; i++ ) {
1447 664042302 : edge_t const * e = rtxn->edges+edge_idx;
1448 664042302 : edge_t const e0 = *e;
1449 :
1450 664042302 : edge_idx += fd_ulong_if( i<w_cnt, 1UL, 3UL );
1451 :
1452 664042302 : if( FD_UNLIKELY( EDGE_IS_LAST( e0 ) ) ) continue;
1453 :
1454 144900756 : edge_t next_e = e0;
1455 144900756 : edge_t const * child_edge;
1456 144900756 : edge_t last_e = 0U;
1457 151021773 : while( 1 ) {
1458 151021773 : int child_is_writer;
1459 : /* This loop first traverses the me->child link, and then
1460 : traverses any sibling links. For example, in the case that
1461 : we're completing node D above, the first child_txn is E,
1462 : and the second child_txn is F. */
1463 151021773 : /* */ child_edge = FOLLOW_EDGE( disp->pool, next_e, child_is_writer );
1464 151021773 : fd_rdisp_txn_t * child_txn = FOLLOW_EDGE_TXN( disp->pool, next_e );
1465 151021773 : scratch[ child_txn - disp->pool ]++;
1466 151021773 : if( child_is_writer || child_edge[1]==e0 ) break;
1467 6121017 : if( last_e!=0U ) FD_TEST( child_edge[2]==last_e );
1468 6121017 : last_e = next_e;
1469 6121017 : next_e = child_edge[1];
1470 6121017 : FD_TEST( next_e>=0x100U );
1471 6121017 : }
1472 144900756 : }
1473 57500829 : }
1474 233866176 : for( ulong i=1UL; i<disp->depth+1UL; i++ ) {
1475 233089200 : FD_TEST( scratch[ i ]==UINT_MAX ||
1476 233089200 : disp->pool[ i ].in_degree==IN_DEGREE_DISPATCHED ||
1477 233089200 : disp->pool[ i ].in_degree==IN_DEGREE_UNSTAGED ||
1478 233089200 : disp->pool[ i ].in_degree==IN_DEGREE_UNSTAGED_DISPATCHED ||
1479 233089200 : disp->pool[ i ].in_degree==IN_DEGREE_ZOMBIE ||
1480 233089200 : disp->pool[ i ].in_degree==scratch[ i ] );
1481 233089200 : }
1482 776976 : }
1483 :
1484 9 : void * fd_rdisp_leave ( fd_rdisp_t * disp ) { return disp; }
1485 9 : void * fd_rdisp_delete( void * mem ) { return mem; }
|