Line data Source code
1 : #include "fd_rdisp.h"
2 : #include <math.h> /* for the EMA */
3 :
4 : /* The conflict graph that this file builds is not a general DAG, but
5 : the union of several special account-conflict graphs. Each
6 : account-conflict graph has special structure:
7 : ---> 3 --
8 : / \
9 : / v
10 : 1 ---> 2 -----> 4 --> 6 ----> 7
11 : \ ^
12 : \ /
13 : ----> 5 --
14 :
15 : That is, the graph is almost a line, but may have fan-out and fan-in
16 : regions. The tricky part about representing a graph like this
17 : without dynamic memory allocation is that nodes may have arbitrary
18 : in-degree and out-degree. Thus, we use a pretty standard trick and
19 : use sibling pointers, denoted below with dotted lines. Each node
20 : maintains at most one successor pointer and at most one sibling
21 : pointer. Although not shown below, the sibling pointers are
22 : circularly linked, so 5's sibling is 3. Additionally, though not
23 : shown below, the child of the last node (7) stores information about
24 : what account address all this is for, which facilitates deleting the
25 : last node in the graph.
26 :
27 :
28 : --> 3 --
29 : / ^ \
30 : / : \
31 : / V v
32 : 1 ---> 2 4 --> 6 ----> 7
33 : ^ ^
34 : : /
35 : V /
36 : 5 --
37 :
38 : The normal edge 2->3 along with the sibling edge 3..>4 implies a
39 : normal edge 2->4. That then transitively implies an edge 2->5.
40 :
41 : We want each node to maintain a count of its in-degree so that we
42 : know when it can be executed. The implied edges also count for the
43 : in-degree. In this example, node 1 has in-degree 0, node 6 has
44 : in-degree 3, and the rest have in-degree 1.
45 :
46 : Maintaining each account-conflict graph is relatively easy given the
47 : operations we want to support. Only the details about sibling edges
48 : are worth mentioning. For example, when deleting node 2, we
49 : decrement the in-degree count for its successor, and then follow the
50 : sibling pointers, decrementing all the in-degree counts as we go to
51 : mark the deletion of the implied edges. Sibling edges also need to
52 : be doubly linked, so that e.g. nodes 3 and 5 can be re-linked in O(1)
53 : if node 4 is deleted. They're also circularly linked as well for
54 : convenience.
55 :
56 : When building the graph, we maintain a map of account to the last
57 : node that references it, whether that was a read or write, and
58 : whether there are any writers to that account in the graph right now.
59 : If the new node reads from an account that was last read, the new
60 : node becomes a sibling of the last read, with in-degree increased if
61 : there are any writers. Otherwise, it becomes a successor of the node
62 : that last referenced the account. */
63 :
64 : /* For a task like this with lots of graph traversal and pointer
65 : chasing, performance is typically limited by memory latency. That
66 : means that the more that can fit in cache, the better the
67 : performance. This implementation uses a lot of bit-packing to
68 : improve cache footprint. */
69 :
70 : /* The following structs are all very local to this compilation unit,
71 : and so they don't have globally acceptable type names (e.g.
72 : fd_rdisp_edge_t). */
73 :
74 155541 : #define MAX_ACCT_PER_TXN 128UL
75 :
76 : /* edge_t: Fields typed edge_t represent an edge in one of the parallel
77 : account-conflict DAGs. Each transaction stores a list of all its
78 : outgoing edges. The type is actually a union of bitfield, but C
79 : bitfields are gross, so we just do it manually with macros. If the
80 : high bit is set, that means the transaction storing this edge_t value
81 : is the last in this specific DAG, and the lower 31 bits of the
82 : value are an index in the map_pool for the account pubkey for this
83 : DAG. See the comments about the hidden edge outgoing from node 7 in
84 : the DAG at the top of this file for an example.
85 : If the high bit is not set, then the next 23 bits store the
86 : destination of the edge, represented by its index in the pool of
87 : transactions. Then the lowest 8 bits store the account index within
88 : that transaction of the edge that is part of the same
89 : account-specific DAG as this edge. Because the max depth is 2^23-1,
90 : and each transaction can reference 128 accounts, the max accounts
91 : that can be referenced fits in 30 bits.
92 : The proper type would be something like
93 : typedef union {
94 : struct {
95 : uint is_last:1;
96 : uint map_pool_idx:31;
97 : } last;
98 : struct {
99 : uint is_last:1;
100 : uint txn_idx:23;
101 : uint acct_idx:8;
102 : } e;
103 : } edge_t;
104 :
105 : */
106 : typedef uint edge_t;
107 :
108 :
109 : /* fd_rdisp_txn is the representation of a transaction as a node in the
110 : DAG. */
111 : struct fd_rdisp_txn {
112 : /* in_degree: The total number of edges summed across all account DAGs
113 : with this node as their destination. In the worst case, all the
114 : other transactions in the pool read from each of the max number of
115 : accounts that this transaction writes to, so there are
116 : MAX_ACCT_PER_TXN*depth edges that come into this node, which fits in
117 : about 30 bits, so we have some room for special values. If
118 : in_degree is one of the following values, then
119 : the transaction is: */
120 47385540 : #define IN_DEGREE_FREE (UINT_MAX )
121 11538690 : #define IN_DEGREE_UNSTAGED (UINT_MAX-1U)/* unstaged, not dispatched */
122 552831 : #define IN_DEGREE_DISPATCHED (UINT_MAX-2U)/* staged, dispatched */
123 11538372 : #define IN_DEGREE_UNSTAGED_DISPATCHED (UINT_MAX-3U)/* unstaged, dispatched */
124 11538057 : #define IN_DEGREE_ZOMBIE (UINT_MAX-4U)/* zombie */
125 : /* a transaction that is staged and dispatched is must have an
126 : in_degree of 0. in_degree isn't a meaningful concept for unstaged
127 : transactions. */
128 : uint in_degree;
129 :
130 : /* score: integer part stores how many transactions in the block must
131 : have completed before this transaction can be scheduled. This is
132 : useful for transactions marked as serializing. The fractional part
133 : gives some measure of how urgent the transaction is, where lower is
134 : more urgent. This means we can't have more transactions in a block
135 : marked as serializing than the first integer that a float cannot
136 : represent. That value is about 16M, which is much higher than the
137 : maximum number of transactions in a block, so this is not a
138 : problem. If in the very rare case that there are more than just a
139 : few serializing transactions, we will lose a bit of precision for
140 : the fractional portion, which makes total sense; if there's not
141 : much room for parallel execution, having the optimal parallel
142 : execution is not very important. */
143 : float score;
144 :
145 : /* edge_cnt_etc:
146 : 0xFFFF0000 (16 bits) for linear block number,
147 : 0x0000C000 (2 bits) for concurrency lane,
148 : 0x00003F80 (7 bits) for r_cnt
149 : 0x0000007F (7 bits) for w_cnt_1. Unfortunately, transactions can
150 : have up to 128 writable accounts, but they have at least 1, so by
151 : storing w_cnt_1 = w_cnt - 1, we can still store it in 7 bits. */
152 : union {
153 : uint edge_cnt_etc;
154 : /* edge_cnt_etc is only used when the transaction is STAGED and
155 : PENDING, READY, or DISPATCHED. If UNSTAGED or FREE, the next
156 : pointer is also here. It can't be UNSTAGED and FREE at the same
157 : time, so there's no conflict with storage there either. */
158 : uint unstaged_next;
159 : uint free_next;
160 : /* If ZOMBIE, pointer back to block is here. No storage conflict
161 : because ZOMBIE is an exclusive state. */
162 : uint block_idx;
163 : };
164 :
165 :
166 : /* When a transaction writes to an account, it only creates one
167 : link. When a transaction reads from an account, we need the full
168 : doubly linked list with its siblings, so it creates 3 edges
169 : (child, next, prev). All edges from writable accounts come first,
170 : and we keep track of how many there are. In the worst case, all
171 : accounts are reads, so we size it appropriately. */
172 : edge_t edges[3UL*MAX_ACCT_PER_TXN]; /* addressed [0, w_cnt_1+1+3*r_cnt) */
173 : };
174 : typedef struct fd_rdisp_txn fd_rdisp_txn_t;
175 :
176 : #define EDGE_IS_LAST(x) ((x)&0x80000000U)
177 :
178 : /* Two more definitions:
179 : An edge index is an array position within the edges array. An
180 : account index is a position within a transaction's account addresses,
181 : reordered so that the writable ones come first. Since writable
182 : accounts use one position in the edges array per account address,
183 : these often coincide. */
184 :
185 :
186 : /* FOLLOW_EDGE and FOLLOW_EDGE_TXN are helper macros for dealing with
187 : edges. Given an edge_t x, and transaction pool base, FOLLOW_EDGE_TXN
188 : returns a pointer to transaction that the edge points to; FOLLOW_EDGE
189 : returns a pointer to the (first) edge_t within that transaction that
190 : is part of the same DAG as this edge. FOLLOW_EDGE and
191 : FOLLOW_EDGE_TXN must not be called if EDGE_IS_LAST is non-zero.
192 :
193 : Then the edge index, i.e. the position in the edges array of the
194 : (first) edge for an account index is:
195 : acct_idx if acct_idx<=w_cnt_1
196 : w_cnt_1+1 + 3*(acct_idx-w_cnt_1-1) else.
197 : Simplifying gives
198 : acct_idx + 2*signed_max( 0, acct_idx-w_cnt_1-1 ).
199 : In doing this calculation, we also basically get for free whether the
200 : edge is for a writable account or a readonly account, so we return
201 : that as well via the w parameter. If the child transaction only reads
202 : the account address for this DAG, then the next and prev pointers can
203 : be accessed using the returned value +1 and +2, respectively. */
204 38179194 : #define FOLLOW_EDGE(base, x, w) (__extension__({ \
205 38179194 : uint __e = (x); \
206 38179194 : fd_rdisp_txn_t * __txn = ((base)+(__e>>8)); \
207 38179194 : uint __w_cnt_1 = __txn->edge_cnt_etc & 0x7FU; \
208 38179194 : uint __idx = (__e & 0xFFU); \
209 38179194 : (w) = __idx<=__w_cnt_1; \
210 38179194 : (void)(w); /* not robust... */ \
211 38179194 : __txn->edges + __idx + 2*fd_int_max( 0, (int)(__idx)-(int)(__w_cnt_1)-1 ); }))
212 30971187 : #define FOLLOW_EDGE_TXN(base, x) ( (base)+((x)>>8) )
213 :
214 : /* The pool and slist are almost the same, but they are used
215 : differently, so keep them as different structures for now. */
216 :
217 : #define POOL_NAME pool
218 63 : #define POOL_T fd_rdisp_txn_t
219 : #define POOL_IDX_T uint
220 1310289 : #define POOL_NEXT free_next
221 : #define POOL_SENTINEL 1
222 : #include "../../util/tmpl/fd_pool.c"
223 :
224 : #define SLIST_NAME unstaged_txn_ll
225 : #define SLIST_ELE_T fd_rdisp_txn_t
226 318 : #define SLIST_IDX_T uint
227 1272 : #define SLIST_NEXT unstaged_next
228 : #include "../../util/tmpl/fd_slist.c"
229 :
230 : #define DLIST_IDX_T uint
231 6 : #define DLIST_PREV edges[0]
232 6 : #define DLIST_NEXT edges[1]
233 : #define DLIST_NAME zombie_dlist
234 : #define DLIST_ELE_T fd_rdisp_txn_t
235 : #include "../../util/tmpl/fd_dlist.c"
236 :
237 :
238 : /* ACCT_INFO_FLAG: It's a bit unfortunate that we have to maintain these
239 : flags, but basically we need to be able to distinguish the case where
240 : there are only readers so that we don't increment in_degree when
241 : adding a new fd_rdisp_txn. If we have any writers, the only way to
242 : transition into a state where there are only readers is to complete
243 : the last writer. We know we are in this case when the completed
244 : node's child doesn't have a child, and the completed node's child is
245 : a reader, as indicated by the LAST_REF_WAS_WRITE bit.
246 : LAST_REF_WAS_WRITE also has the advantage of being easy to
247 : maintain. */
248 6068064 : #define ACCT_INFO_FLAG_LAST_REF_WAS_WRITE(lane) (((uchar)1)<<(2*(lane)))
249 5969337 : #define ACCT_INFO_FLAG_ANY_WRITERS( lane) (((uchar)2)<<(2*(lane)))
250 :
251 : /* acct_info_t is a node in a map_chain that contains the metadata for a
252 : single account address's conflict graph DAG. In particular, it
253 : contains the information needed to know where to insert a node that
254 : reads from or writes to the account. The objects of this type follow
255 : this state machine:
256 :
257 : FREE -----> ACTIVE ----> CACHED
258 : ^ |
259 : |-------------
260 : When FREE, it is in free_acct_dlist only. When ACTIVE, it is in
261 : acct_map only. When CACHED, it is in both free_acct_dlist and
262 : cached_acct_map.
263 : */
264 : struct acct_info {
265 : /* key, next, and prev are the map_chain fields. Used in the ACTIVE
266 : and CACHED states. next and prev set to 0 when in the FREE state.
267 : Element 0 is a sentinel and isn't inserted to the free_acct_dlist,
268 : so this is unambiguous. */
269 : fd_acct_addr_t key;
270 : uint next;
271 : uint prev;
272 :
273 : union {
274 : struct {
275 : /* This is effectively a pointer to the last node in the DAG for
276 : this pubkey, one for each staging lane.
277 : EDGE_IS_LAST(FOLLOW_EDGE(base, last_reference[i])) is non-zero.
278 : */
279 : edge_t last_reference[4];
280 :
281 : }; /* When in the ACTIVE state */
282 : struct {
283 : uint free_ll_next;
284 : uint free_ll_prev;
285 : /* 8 bytes of padding here */
286 : }; /* When not in the ACTIVE state, used by the free_acct_dlist */
287 : };
288 : /* flags: a combination of ACCT_INFO_FLAG_* bitfields above. Used
289 : when ACTIVE. */
290 : uint flags:8;
291 :
292 :
293 : /* We want to dispatch the READY transactions in an order that
294 : maximizes parallelism, but we also want to be able to start
295 : dispatching transactions decently well before we have the full
296 : conflict graph. We can do that because we know that contentious
297 : accounts tend to stay contentious and uncontentious accounts tend
298 : to stay uncontentious.
299 :
300 : To accomplish this, we maintain a special EMA. Let x_i be 1 if
301 : transaction i references it and 0 if not. If we squint and assume
302 : the transactions are independent and all drawn from some
303 : distribution (which is not true, but that's why we squint), an EMA
304 : of x_i estimates the probability that the next transaction
305 : references this account. How we use this value is detailed later.
306 :
307 : We can't update this value for every pubkey for each transaction,
308 : so we maintain it in a lazy way, by applying updates only when we
309 : need to read the value, which also happens to be every time we want
310 : to add a 1 value to the EMA. We then just need to maintain the
311 : last index i at which x_i was updated and the current value. We
312 : only have 24 bits for the index, which means that we can't maintain
313 : it in a fork-aware way, which doesn't seem like a problem. Also,
314 : it's possible it can overflow, and that can result in an incorrect
315 : value, but that means the account is only referenced ~ 1/2^24
316 : transactions, which is also fine.
317 :
318 : last_ref is in the domain of global_inserted_txn_cnt. ema_refs is
319 : in [0, 1]. Both fields are used in ACTIVE and CACHED. Maintaining
320 : these fields is actually the main reason CACHED exists. */
321 : uint last_ref:24;
322 : float ema_refs;
323 : };
324 : typedef struct acct_info acct_info_t;
325 :
326 : FD_STATIC_ASSERT( sizeof(acct_info_t)==64UL, acct_info_t );
327 :
328 : /* For the acct_map and the free_acct_map */
329 : #define MAP_NAME acct_map
330 1729506 : #define MAP_ELE_T acct_info_t
331 119902212 : #define MAP_IDX_T uint
332 : #define MAP_KEY_T fd_acct_addr_t
333 226145622 : #define MAP_KEY_HASH(k,s) fd_hash( (s), (k)->b, 32UL )
334 113022618 : #define MAP_KEY_EQ(k0,k1) (!memcmp( (k0)->b, (k1)->b, 32UL ))
335 : #define MAP_OPTIMIZE_RANDOM_ACCESS_REMOVAL 1
336 : #include "../../util/tmpl/fd_map_chain.c"
337 :
338 :
339 : #define DLIST_IDX_T uint
340 28268700 : #define DLIST_PREV free_ll_prev
341 28268700 : #define DLIST_NEXT free_ll_next
342 : #define DLIST_NAME free_dlist
343 : #define DLIST_ELE_T acct_info_t
344 : #include "../../util/tmpl/fd_dlist.c"
345 :
346 :
347 : struct pending_prq_ele {
348 : /* lower score means should be scheduled sooner. Integer part has a
349 : special meaning as explained above. */
350 : float score;
351 :
352 : uint linear_block_number;
353 : uint txn_idx;
354 :
355 : /* It seems like we should be able to get this whole struct in 8
356 : bytes, but we're a few bits over.
357 :
358 : 23 bits for txn_idx
359 : 16 bit for linear_block_number
360 : Then the integer part of the score needs to be at least 18 or 19
361 : bits, but we can't use a custom floating point number. */
362 : };
363 :
364 : typedef struct pending_prq_ele pending_prq_ele_t;
365 :
366 :
367 : #define PRQ_NAME pending_prq
368 13535220 : #define PRQ_T pending_prq_ele_t
369 : #define PRQ_EXPLICIT_TIMEOUT 0
370 : /* returns 1 if x is strictly after y */
371 12069504 : #define PRQ_AFTER(x,y) (__extension__( { \
372 12069504 : int cmp0 = (int)(x).linear_block_number - (int)(y).linear_block_number; \
373 12069504 : fd_int_if( cmp0!=0, cmp0>0, (x).score>(y).score ); \
374 12069504 : }))
375 : #include "../../util/tmpl/fd_prq.c"
376 :
377 : /* fd_rdisp_blockinfo_t maintains a little metadata about transactions for each
378 : slot. */
379 : struct fd_rdisp_blockinfo {
380 : FD_RDISP_BLOCK_TAG_T block;
381 : uint linear_block_number;
382 :
383 : uint insert_ready:1;
384 : uint schedule_ready:1;
385 : uint staged:1;
386 : uint staging_lane:2; /* ignored if staged==0 */
387 : uint last_insert_was_serializing:1;
388 :
389 : uint inserted_cnt;
390 : uint dispatched_cnt;
391 : uint completed_cnt;
392 : uint last_serializing;
393 :
394 : uint map_chain_next;
395 : uint ll_next;
396 : unstaged_txn_ll_t ll[ 1 ]; /* used only when unstaged */
397 : zombie_dlist_t zombie_list[ 1 ];
398 : };
399 : typedef struct fd_rdisp_blockinfo fd_rdisp_blockinfo_t;
400 :
401 : #define POOL_NAME block_pool
402 63 : #define POOL_T fd_rdisp_blockinfo_t
403 : #define POOL_IDX_T uint
404 988128 : #define POOL_NEXT ll_next
405 : #define POOL_SENTINEL 1
406 : #include "../../util/tmpl/fd_pool.c"
407 :
408 : #define MAP_NAME block_map
409 : #define MAP_ELE_T fd_rdisp_blockinfo_t
410 : #define MAP_KEY_T FD_RDISP_BLOCK_TAG_T
411 395694 : #define MAP_KEY block
412 4048962 : #define MAP_NEXT map_chain_next
413 5308065 : #define MAP_IDX_T uint
414 : #include "../../util/tmpl/fd_map_chain.c"
415 :
416 : #define SLIST_NAME block_slist
417 : #define SLIST_ELE_T fd_rdisp_blockinfo_t
418 395679 : #define SLIST_IDX_T uint
419 791361 : #define SLIST_NEXT ll_next
420 : #include "../../util/tmpl/fd_slist.c"
421 :
422 : struct fd_rdisp_unstaged {
423 : FD_RDISP_BLOCK_TAG_T block;
424 :
425 : uint writable_cnt;
426 : uint readonly_cnt;
427 : fd_acct_addr_t keys[MAX_ACCT_PER_TXN];
428 : };
429 : typedef struct fd_rdisp_unstaged fd_rdisp_unstaged_t;
430 :
431 : typedef struct {
432 : pending_prq_ele_t * pending;
433 : ulong linear_block_number;
434 : block_slist_t block_ll[1];
435 : ulong inserted_cnt;
436 : ulong dispatched_cnt;
437 : ulong completed_cnt;
438 : } per_lane_info_t;
439 :
440 : /* We maintain two maps from pubkeys to acct_info_t. The first one is
441 : the main acct_map, just called acct_map. All pubkeys in this map
442 : have >0 references in the one of the staging lane DAGs. When an
443 : account goes to 0 references, it gets removed from main map_chain and
444 : moved to free map_chain, called free_acct_map. The free_acct_map
445 : exists to maintain the reference count EMA information lazily.
446 : Unless we need the acct_info_t for something in the DAG, we might as
447 : well maintain the EMA info.
448 :
449 : When we start up, all the acct_info_t structs are in the
450 : free_acct_dlist. Whenever something is added to the free_acct_map,
451 : it's also added to the tail of the free_acct_dlist. When we need an
452 : acct_info_t that's not in the free_acct_map, we pop the head of the
453 : free_acct_dlist. In general, the free_acct_dlist contains everything
454 : in the free_acct_map, potentially plus some elements that have never
455 : been used; all acct_info_t objects are in exactly one of the main
456 : acct_map and the free_acct_dlist (not free_acct_map). See
457 : acct_info_t for more information about this. */
458 :
459 : struct fd_rdisp {
460 : ulong depth;
461 : ulong block_depth;
462 :
463 : ulong global_insert_cnt;
464 : ulong unstaged_lblk_num;
465 :
466 : /* pool: an fd_pool, indexed [0, depth+1), with 0 being a sentinel */
467 : fd_rdisp_txn_t * pool;
468 : fd_rdisp_unstaged_t * unstaged; /* parallel to pool with additional info */
469 :
470 : block_map_t * blockmap; /* map chain */
471 : fd_rdisp_blockinfo_t * block_pool;
472 :
473 : int free_lanes; /* a bitmask */
474 : per_lane_info_t lanes[4];
475 :
476 : acct_map_t * acct_map;
477 : acct_map_t * free_acct_map;
478 : /* acct_pool is not an fd_pool, but is just a flat array, since we
479 : don't need to acquire and release from it because of the dlist. */
480 : acct_info_t * acct_pool;
481 : free_dlist_t free_acct_dlist[1];
482 : };
483 :
484 : typedef struct fd_rdisp fd_rdisp_t;
485 :
486 :
487 : #define ACCT_ITER_TO_PTR( iter ) (__extension__( { \
488 : ulong __idx = fd_txn_acct_iter_idx( iter ); \
489 : fd_ptr_if( __idx<fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_IMM ), accts, alt_adj )+__idx; \
490 : }))
491 :
492 :
493 507 : ulong fd_rdisp_align( void ) { return 128UL; }
494 :
495 : ulong
496 : fd_rdisp_footprint( ulong depth,
497 45 : ulong block_depth ) {
498 45 : if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH) | (depth<2UL) |
499 45 : (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) | (block_depth<4UL) ) ) return 0UL;
500 :
501 45 : ulong chain_cnt = block_map_chain_cnt_est( block_depth );
502 45 : ulong acct_depth = depth*MAX_ACCT_PER_TXN;
503 45 : ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
504 :
505 45 : ulong l = FD_LAYOUT_INIT;
506 45 : l = FD_LAYOUT_APPEND( l, fd_rdisp_align(), sizeof(fd_rdisp_t) );
507 45 : l = FD_LAYOUT_APPEND( l, pool_align(), pool_footprint ( depth+1UL ) ); /* pool */
508 45 : l = FD_LAYOUT_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL ) ); /* unstaged */
509 45 : l = FD_LAYOUT_APPEND( l, block_map_align(), block_map_footprint ( chain_cnt ) ); /* blockmap */
510 45 : l = FD_LAYOUT_APPEND( l, block_pool_align(), block_pool_footprint ( block_depth+1UL ) ); /* block_pool */
511 45 : l = FD_LAYOUT_APPEND( l, pending_prq_align(), 4UL*pending_prq_footprint ( depth ) ); /* pending */
512 45 : l = FD_LAYOUT_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) ); /* acct_map */
513 45 : l = FD_LAYOUT_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) ); /* free_acct_map */
514 45 : l = FD_LAYOUT_APPEND( l, alignof(acct_info_t), (acct_depth+1UL)*sizeof(acct_info_t) ); /* acct_pool */
515 45 : return FD_LAYOUT_FINI( l, fd_rdisp_align() );
516 45 : }
517 :
518 : void *
519 : fd_rdisp_new( void * mem,
520 : ulong depth,
521 : ulong block_depth,
522 21 : ulong seed ) {
523 21 : if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH) | (depth<2UL) |
524 21 : (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) | (block_depth<4UL) ) ) return NULL;
525 :
526 21 : ulong chain_cnt = block_map_chain_cnt_est( block_depth );
527 21 : ulong acct_depth = depth*MAX_ACCT_PER_TXN;
528 21 : ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
529 :
530 21 : FD_SCRATCH_ALLOC_INIT( l, mem );
531 21 : fd_rdisp_t * disp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), sizeof(fd_rdisp_t) );
532 21 : void * _pool = FD_SCRATCH_ALLOC_APPEND( l, pool_align(), pool_footprint ( depth+1UL ) );
533 21 : void * _unstaged = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL ) );
534 21 : void * _bmap = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(), block_map_footprint ( chain_cnt ) );
535 21 : void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(), block_pool_footprint ( block_depth+1UL ) );
536 21 : uchar * _pending = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(), 4UL*pending_prq_footprint ( depth ) );
537 21 : void * _acct_map = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) );
538 21 : void * _freea_map = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) );
539 21 : acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t), (acct_depth+1UL)*sizeof(acct_info_t) );
540 21 : FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
541 :
542 21 : disp->depth = depth;
543 21 : disp->block_depth = block_depth;
544 21 : disp->global_insert_cnt = 0UL;
545 21 : disp->unstaged_lblk_num = 0UL;
546 :
547 21 : fd_rdisp_txn_t * temp_pool_join = pool_join( pool_new( _pool, depth+1UL ) );
548 203991 : for( ulong i=0UL; i<depth+1UL; i++ ) temp_pool_join[ i ].in_degree = IN_DEGREE_FREE;
549 21 : pool_leave( temp_pool_join );
550 :
551 21 : memset( _unstaged, '\0', sizeof(fd_rdisp_unstaged_t)*(depth+1UL) );
552 :
553 21 : block_map_new ( _bmap, chain_cnt, seed );
554 21 : block_pool_new( _bpool, block_depth+1UL );
555 :
556 21 : fd_rdisp_blockinfo_t * bpool_temp_join = block_pool_join( _bpool );
557 196767 : for( ulong i=0UL; i<block_depth+1UL; i++ ) zombie_dlist_new( bpool_temp_join[ i ].zombie_list );
558 21 : block_pool_leave( bpool_temp_join );
559 :
560 21 : disp->free_lanes = 0xF;
561 105 : for( ulong i=0UL; i<4UL; i++ ) {
562 84 : pending_prq_new( _pending, depth );
563 84 : _pending += pending_prq_footprint( depth );
564 :
565 84 : disp->lanes[i].linear_block_number = 0UL;
566 84 : disp->lanes[i].inserted_cnt = 0U;
567 84 : disp->lanes[i].dispatched_cnt = 0U;
568 84 : disp->lanes[i].completed_cnt = 0U;
569 84 : }
570 :
571 21 : acct_map_new( _acct_map, acct_chain_cnt, fd_ulong_hash( seed+1UL ) );
572 21 : acct_map_new( _freea_map, acct_chain_cnt, fd_ulong_hash( seed+2UL ) );
573 :
574 21 : free_dlist_t * temp_join = free_dlist_join( free_dlist_new( disp->free_acct_dlist ) );
575 26105493 : for( ulong i=1UL; i<acct_depth+1UL; i++ ) {
576 26105472 : apool[ i ].next = apool[ i ].prev = 0U;
577 26105472 : free_dlist_idx_push_tail( disp->free_acct_dlist, i, apool );
578 26105472 : }
579 21 : free_dlist_leave( temp_join );
580 :
581 21 : return disp;
582 21 : }
583 :
584 : fd_rdisp_t *
585 21 : fd_rdisp_join( void * mem ) {
586 21 : fd_rdisp_t * disp = (fd_rdisp_t *)mem;
587 :
588 21 : ulong depth = disp->depth;
589 21 : ulong block_depth = disp->block_depth;
590 21 : ulong chain_cnt = block_map_chain_cnt_est( block_depth );
591 21 : ulong acct_depth = depth*MAX_ACCT_PER_TXN;
592 21 : ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
593 :
594 21 : FD_SCRATCH_ALLOC_INIT( l, mem );
595 21 : /* */ FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), sizeof(fd_rdisp_t) );
596 21 : void * _pool = FD_SCRATCH_ALLOC_APPEND( l, pool_align(), pool_footprint ( depth+1UL ) );
597 21 : void * _unstaged = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL ) );
598 21 : void * _bmap = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(), block_map_footprint ( chain_cnt ) );
599 21 : void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(), block_pool_footprint ( block_depth+1UL ) );
600 21 : uchar * _pending = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(), 4UL*pending_prq_footprint ( depth ) );
601 21 : void * _acct_map = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) );
602 21 : void * _freea_map = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(), acct_map_footprint ( acct_chain_cnt ) );
603 21 : acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t), (acct_depth+1UL)*sizeof(acct_info_t) );
604 21 : FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
605 :
606 21 : disp->pool = pool_join( _pool );
607 21 : disp->unstaged = (fd_rdisp_unstaged_t *)_unstaged;
608 21 : disp->blockmap = block_map_join( _bmap );
609 21 : disp->block_pool = block_pool_join( _bpool );
610 :
611 196767 : for( ulong i=0UL; i<block_depth+1UL; i++ ) zombie_dlist_join( disp->block_pool[ i ].zombie_list );
612 :
613 105 : for( ulong i=0UL; i<4UL; i++ ) {
614 84 : disp->lanes[i].pending = pending_prq_join( _pending );
615 84 : _pending += pending_prq_footprint( depth );
616 84 : }
617 :
618 21 : disp->acct_map = acct_map_join( _acct_map );
619 21 : disp->free_acct_map = acct_map_join( _freea_map );
620 21 : disp->acct_pool = apool;
621 21 : free_dlist_join( disp->free_acct_dlist );
622 :
623 21 : return disp;
624 21 : }
625 :
626 : static inline void
627 : free_lane( fd_rdisp_t * disp,
628 2460 : ulong staging_lane ) {
629 2460 : disp->free_lanes |= 1<<staging_lane;
630 2460 : per_lane_info_t * l = disp->lanes+staging_lane;
631 2460 : FD_TEST( pending_prq_cnt( l->pending )==0UL );
632 2460 : l->linear_block_number = 0UL;
633 2460 : l->inserted_cnt = 0UL;
634 2460 : l->dispatched_cnt = 0UL;
635 2460 : l->completed_cnt = 0UL;
636 2460 : block_slist_delete( block_slist_leave( l->block_ll ) );
637 2460 : }
638 :
639 : static inline void
640 : alloc_lane( fd_rdisp_t * disp,
641 2463 : ulong staging_lane ) {
642 2463 : disp->free_lanes &= ~(1<<staging_lane);
643 2463 : block_slist_join( block_slist_new( disp->lanes[staging_lane].block_ll ) );
644 2463 : }
645 :
646 : ulong
647 : fd_rdisp_suggest_staging_lane( fd_rdisp_t const * disp,
648 : FD_RDISP_BLOCK_TAG_T parent_block,
649 0 : int duplicate ) {
650 :
651 : /* 1. If it's a duplicate, suggest FD_RDISP_UNSTAGED */
652 0 : if( FD_UNLIKELY( duplicate ) ) return FD_RDISP_UNSTAGED;
653 :
654 : /* 2. If parent is the last block in any existing staging lane, suggest
655 : that lane */
656 0 : fd_rdisp_blockinfo_t const * block_pool = disp->block_pool;
657 0 : fd_rdisp_blockinfo_t const * block = block_map_ele_query_const( disp->blockmap, &parent_block, NULL, block_pool );
658 0 : if( FD_LIKELY( block && block->insert_ready && block->staged ) ) return block->staging_lane;
659 :
660 : /* 3. If there is at least one free lane, suggest a free lane */
661 0 : if( FD_LIKELY( disp->free_lanes!=0 ) ) return (ulong)fd_uint_find_lsb( (uint)disp->free_lanes );
662 :
663 : /* 4. Else, suggest FD_RDISP_UNSTAGED */
664 0 : return FD_RDISP_UNSTAGED;
665 0 : }
666 :
667 : int
668 : fd_rdisp_add_block( fd_rdisp_t * disp,
669 : FD_RDISP_BLOCK_TAG_T new_block,
670 395694 : ulong staging_lane ) {
671 395694 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
672 :
673 395694 : if( FD_UNLIKELY( !block_pool_free( block_pool ) ) ) return -1;
674 395694 : if( FD_UNLIKELY( ULONG_MAX!=block_map_idx_query_const( disp->blockmap, &new_block, ULONG_MAX, block_pool ) ) ) return -1;
675 395688 : fd_rdisp_blockinfo_t * block = block_pool_ele_acquire( block_pool );
676 395688 : block->block = new_block;
677 395688 : block_map_ele_insert( disp->blockmap, block, block_pool );
678 :
679 395688 : block->insert_ready = 1;
680 395688 : block->staged = staging_lane!=FD_RDISP_UNSTAGED;
681 395688 : block->staging_lane = (uint)(staging_lane & 0x3UL);
682 395688 : block->last_insert_was_serializing = 0U;
683 :
684 395688 : block->inserted_cnt = 0U;
685 395688 : block->dispatched_cnt = 0U;
686 395688 : block->completed_cnt = 0U;
687 395688 : block->last_serializing = 0U;
688 :
689 395688 : if( FD_UNLIKELY( staging_lane==FD_RDISP_UNSTAGED ) ) {
690 18 : block->schedule_ready = 1;
691 18 : block->linear_block_number = (uint)disp->unstaged_lblk_num++;
692 18 : unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
693 395670 : } else {
694 395670 : block->linear_block_number = (uint)disp->lanes[staging_lane].linear_block_number++;
695 395670 : block->schedule_ready = (uint)(1 & (disp->free_lanes >> staging_lane));
696 :
697 395670 : if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
698 :
699 395670 : block_slist_t * sl = disp->lanes[staging_lane].block_ll;
700 395670 : if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
701 395670 : block_slist_ele_push_tail( sl, block, block_pool );
702 395670 : }
703 395688 : return 0;
704 395694 : }
705 :
706 :
707 :
708 : int
709 : fd_rdisp_remove_block( fd_rdisp_t * disp,
710 395670 : FD_RDISP_BLOCK_TAG_T block_tag ) {
711 395670 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
712 :
713 395670 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
714 395670 : if( FD_UNLIKELY( block==NULL ) ) return -1;
715 :
716 395661 : FD_TEST( block->schedule_ready );
717 395661 : FD_TEST( block->completed_cnt==block->inserted_cnt );
718 395661 : FD_TEST( zombie_dlist_is_empty( block->zombie_list, disp->pool ) );
719 :
720 395661 : if( FD_LIKELY( block->staged ) ) {
721 395652 : ulong staging_lane = (ulong)block->staging_lane;
722 395652 : block_slist_t * sl = disp->lanes[staging_lane].block_ll;
723 :
724 395652 : FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
725 395652 : block_slist_idx_pop_head( sl, block_pool );
726 395652 : if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
727 2436 : else free_lane( disp, staging_lane );
728 395652 : } else {
729 9 : unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
730 9 : }
731 395661 : block_pool_idx_release( block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, block_pool ) );
732 :
733 395661 : return 0;
734 395661 : }
735 :
736 :
737 : int
738 : fd_rdisp_abandon_block( fd_rdisp_t * disp,
739 15 : FD_RDISP_BLOCK_TAG_T block_tag ) {
740 15 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
741 :
742 15 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &block_tag, NULL, disp->block_pool );
743 15 : if( FD_UNLIKELY( block==NULL ) ) return -1;
744 :
745 12 : FD_TEST( block->schedule_ready );
746 12 : FD_TEST( block->dispatched_cnt==block->completed_cnt ); /* TODO: remove this when it can call complete properly */
747 21 : while( block->completed_cnt<block->inserted_cnt ) {
748 : /* because there is nothing DISPATCHED, there has to be something
749 : READY */
750 9 : ulong txn = fd_rdisp_get_next_ready( disp, block_tag );
751 9 : FD_TEST( txn );
752 9 : fd_rdisp_complete_txn( disp, txn, 1 );
753 9 : }
754 12 : while( !zombie_dlist_is_empty( block->zombie_list, disp->pool ) ) {
755 0 : fd_rdisp_complete_txn( disp, zombie_dlist_idx_pop_head( block->zombie_list, disp->pool ), 1 );
756 0 : }
757 :
758 12 : if( FD_LIKELY( block->staged ) ) {
759 12 : ulong staging_lane = (ulong)block->staging_lane;
760 12 : block_slist_t * sl = disp->lanes[staging_lane].block_ll;
761 :
762 12 : FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
763 12 : block_slist_idx_pop_head( sl, block_pool );
764 12 : if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
765 9 : else free_lane( disp, staging_lane );
766 12 : } else {
767 0 : unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
768 0 : }
769 12 : block_pool_idx_release( disp->block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, disp->block_pool ) );
770 :
771 12 : return 0;
772 12 : }
773 :
774 : static void
775 : add_edges( fd_rdisp_t * disp,
776 : fd_rdisp_txn_t * ele,
777 : fd_acct_addr_t const * addr,
778 : ulong addr_cnt,
779 : uint staging_lane,
780 : int writable,
781 : int update_score );
782 : /* updates in_degree, edge_cnt_etc */
783 :
784 : int
785 : fd_rdisp_promote_block( fd_rdisp_t * disp,
786 : FD_RDISP_BLOCK_TAG_T block_tag,
787 12 : ulong staging_lane ) {
788 12 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
789 12 : per_lane_info_t * lane = disp->lanes + staging_lane;
790 12 : block_slist_t * sl = lane->block_ll;
791 :
792 12 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
793 12 : if( FD_UNLIKELY( block==NULL ) ) return -1;
794 12 : if( FD_UNLIKELY( block->staged ) ) return -1;
795 :
796 12 : block->staged = 1;
797 12 : block->staging_lane = (uint)(staging_lane & 0x3);
798 12 : block->insert_ready = 1;
799 12 : block->schedule_ready = (uint)(1 & (disp->free_lanes >> staging_lane));
800 :
801 12 : if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
802 :
803 12 : if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
804 12 : block_slist_ele_push_tail( sl, block, block_pool );
805 12 : uint linear_block_number = (uint)lane->linear_block_number++;
806 12 : block->linear_block_number = linear_block_number;
807 :
808 12 : unstaged_txn_ll_iter_t next;
809 12 : for( unstaged_txn_ll_iter_t iter = unstaged_txn_ll_iter_init( block->ll, disp->pool );
810 330 : !unstaged_txn_ll_iter_done( iter, block->ll, disp->pool );
811 318 : iter = next ) {
812 318 : next = unstaged_txn_ll_iter_next( iter, block->ll, disp->pool );
813 :
814 318 : fd_rdisp_txn_t * ele = unstaged_txn_ll_iter_ele( iter, block->ll, disp->pool );
815 318 : fd_rdisp_unstaged_t * uns = disp->unstaged + unstaged_txn_ll_iter_idx( iter, block->ll, disp->pool );
816 318 : FD_TEST( ele->in_degree==IN_DEGREE_UNSTAGED );
817 :
818 318 : ele->in_degree = 0U;
819 318 : ele->edge_cnt_etc = (uint)(-1); /* set w_cnt_1 to -1 */
820 :
821 318 : add_edges( disp, ele, uns->keys, uns->writable_cnt, (uint)staging_lane, 1, 0 );
822 318 : add_edges( disp, ele, uns->keys+uns->writable_cnt, uns->readonly_cnt, (uint)staging_lane, 0, 0 );
823 :
824 318 : ele->edge_cnt_etc &= 0x3FFFU;
825 318 : ele->edge_cnt_etc |= (uint)staging_lane<<14;
826 318 : ele->edge_cnt_etc |= linear_block_number<<16;
827 :
828 318 : if( FD_UNLIKELY( ele->in_degree==0U ) ) {
829 6 : pending_prq_ele_t temp[1] = {{ .score = ele->score, .linear_block_number = linear_block_number, .txn_idx = (uint)(ele-disp->pool)}};
830 6 : pending_prq_insert( lane->pending, temp );
831 6 : }
832 318 : }
833 12 : unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
834 :
835 12 : lane->inserted_cnt += block->inserted_cnt;
836 12 : lane->dispatched_cnt += block->dispatched_cnt;
837 12 : lane->completed_cnt += block->completed_cnt;
838 :
839 12 : return 0;
840 12 : }
841 :
842 : int
843 : fd_rdisp_demote_block( fd_rdisp_t * disp,
844 15 : FD_RDISP_BLOCK_TAG_T block_tag ) {
845 15 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
846 :
847 15 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
848 15 : if( FD_UNLIKELY( block==NULL ) ) return -1;
849 15 : if( FD_UNLIKELY( !block->staged ) ) return -1;
850 15 : if( FD_UNLIKELY( !block->schedule_ready ) ) return -1;
851 15 : if( FD_UNLIKELY( block->completed_cnt!=block->inserted_cnt ) ) FD_LOG_ERR(( "demote_block called with non-empty block" ));
852 15 : ulong staging_lane = block->staging_lane;
853 15 : block->staged = 0;
854 :
855 15 : per_lane_info_t * lane = disp->lanes + staging_lane;
856 15 : block_slist_t * sl = lane->block_ll;
857 :
858 15 : lane->inserted_cnt -= block->inserted_cnt;
859 15 : lane->dispatched_cnt -= block->dispatched_cnt;
860 15 : lane->completed_cnt -= block->completed_cnt;
861 :
862 15 : block->linear_block_number = (uint)disp->unstaged_lblk_num++;
863 :
864 : /* staged and schedule_ready means it must be the head of the staging lane */
865 15 : FD_TEST( block_slist_ele_peek_head( sl, block_pool )==block );
866 15 : block_slist_idx_pop_head( sl, block_pool );
867 :
868 15 : unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
869 :
870 15 : if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
871 15 : else free_lane( disp, staging_lane );
872 15 : return 0;
873 15 : }
874 :
875 : int
876 : fd_rdisp_rekey_block( fd_rdisp_t * disp,
877 : FD_RDISP_BLOCK_TAG_T new_tag,
878 6 : FD_RDISP_BLOCK_TAG_T old_tag ) {
879 6 : fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
880 :
881 6 : if( FD_UNLIKELY( NULL!= block_map_ele_query_const( disp->blockmap, &new_tag, NULL, block_pool ) ) ) return -1;
882 6 : fd_rdisp_blockinfo_t * block = block_map_ele_query ( disp->blockmap, &old_tag, NULL, block_pool );
883 6 : if( FD_UNLIKELY( NULL== block ) ) return -1;
884 :
885 6 : block_map_idx_remove( disp->blockmap, &old_tag, ULONG_MAX, block_pool );
886 6 : block->block = new_tag;
887 6 : block_map_ele_insert( disp->blockmap, block, block_pool );
888 6 : return 0;
889 6 : }
890 :
891 :
892 : /* "Registers" a reference to the account in info at transaction
893 : global_insert_cnt. Returns the value of the EMA, which is an
894 : estimate of the probability that the next transaction also references
895 : the account. This value does not matter for correctness, which is
896 : why floating point arithmetic is okay. */
897 : static inline float
898 : update_ema( acct_info_t * info,
899 2960016 : ulong global_insert_cnt ) {
900 : #if FD_RDISP_DISABLE_EMA
901 : (void)info;
902 : (void)global_insert_cnt;
903 : return 0.0f;
904 : #else
905 5920032 : #define ALPHA 0.005f
906 : /* The normal EMA update equation is
907 : e_i = (alpha) * x_i + (1-alpha)*e_{i-1},
908 : where alpha is a constant in (0,1). Let L be the last reference of
909 : the account, and G be the current global_insert_cnt value. We know
910 : that e_L = ema_refs, and that for i in [L+1, G), x_i = 0.
911 : That means
912 : e_{G-1} = (1-alpha)^(G-L-1) * ema_refs
913 : e_G = alpha + (1-alpha)^(G-L) * ema_refs
914 : */
915 : /* last_ref only captures the low 24 bits, so we guess its the highest
916 : value for them that would still make it less than
917 : global_insert_cnt. Turns out, we can calculate that with just an
918 : AND. We need to make sure that in the rare case it is actually
919 : 2^24 away, we don't use a delta of 0. We also want to make sure
920 : that even with inexact float math, we never get to 1.0, so we bias
921 : the exponent up slightly. */
922 2960016 : ulong last_ref = (ulong)info->last_ref;
923 2960016 : ulong delta = fd_ulong_max( (global_insert_cnt - last_ref) & 0xFFFFFFUL, 1UL );
924 2960016 : float ema_refs = ALPHA + powf( 1.0f-ALPHA, 0.05f+(float)delta ) * info->ema_refs;
925 :
926 2960016 : info->ema_refs = ema_refs;
927 2960016 : info->last_ref = (uint)(global_insert_cnt & 0xFFFFFFUL);
928 2960016 : #undef ALPHA
929 :
930 2960016 : return ema_refs;
931 2960016 : #endif
932 2960016 : }
933 :
934 : static void
935 : add_edges( fd_rdisp_t * disp,
936 : fd_rdisp_txn_t * ele,
937 : fd_acct_addr_t const * addrs,
938 : ulong addr_cnt,
939 : uint lane,
940 : int writable,
941 3315708 : int update_score ) {
942 : /* When we first start, the low 14 bits are 0x3FFF, so that gives us
943 : w_cnt==0, r_cnt==0, as desired. Then otherwise, the low 7 bits are
944 : w_cnt_1, so adding 1 gives w_cnt. If we've actually added 128
945 : writable accounts, then r_cnt will be incorrect, but then we
946 : actually can't add any more readonly accounts, so the function call
947 : will be a no-op. */
948 :
949 3315708 : ulong w_cnt = (ele->edge_cnt_etc + 1U) & 0x7FU;
950 3315708 : ulong r_cnt = ((ele->edge_cnt_etc + 1U)>>7) & 0x7FU;
951 3315708 : ulong acct_idx = w_cnt + r_cnt;
952 3315708 : ulong edge_idx = w_cnt + 3UL*r_cnt;
953 :
954 6237978 : for( ulong i=0UL; i<addr_cnt; i++ ) {
955 2922270 : fd_acct_addr_t const * addr = addrs+i;
956 2922270 : acct_info_t * ai = NULL;
957 :
958 : /* Step 1: lookup the pubkey */
959 2922270 : ulong idx = acct_map_idx_query( disp->acct_map, addr, ULONG_MAX, disp->acct_pool );
960 2922270 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
961 1081614 : idx = acct_map_idx_query( disp->free_acct_map, addr, ULONG_MAX, disp->acct_pool );
962 1081614 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
963 : /* The acct pool is sized so that the list cannot be empty at this
964 : point. However, the element at the head might be the free
965 : map with a different pubkey. */
966 597945 : idx = free_dlist_idx_peek_head( disp->free_acct_dlist, disp->acct_pool );
967 597945 : ai = disp->acct_pool+idx;
968 :
969 : /* CACHED -> FREE transition */
970 597945 : if( FD_LIKELY( ai->next!=0U ) ) {
971 164223 : acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
972 164223 : }
973 :
974 : /* FREE -> ACTIVE transition */
975 597945 : ai->key = *addr;
976 597945 : ai->flags = 0U;
977 597945 : ai->last_ref = 0U;
978 597945 : ai->ema_refs = 0.0f;
979 597945 : } else {
980 : /* CACHED -> ACTIVE transition */
981 483669 : ai = disp->acct_pool+idx;
982 483669 : ai->flags = 0U; /* FIXME: unnecessary */
983 483669 : acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
984 483669 : }
985 : /* In either case, at this point, the element is not in any map
986 : but is in free_acct_dlist. It has the right key. last_ref, and
987 : ema_refs are valid. flags is 0. */
988 1081614 : free_dlist_idx_remove( disp->free_acct_dlist, idx, disp->acct_pool );
989 1081614 : memset( ai->last_reference, '\0', sizeof(ai->last_reference) );
990 1081614 : acct_map_idx_insert( disp->acct_map, idx, disp->acct_pool );
991 1081614 : }
992 2922270 : ai = disp->acct_pool+idx;
993 : /* At this point, in all cases, the acct_info is now in the ACTIVE
994 : state. It's in acct_map, not in free_acct_map, and not in
995 : free_acct_dlist. */
996 :
997 : /* Assume that transactions are drawn randomly from some large
998 : distribution of potential transactions. We want to estimate the
999 : expected value of the probability that this transaction conflicts
1000 : with the next transaction that is sampled. Two transactions
1001 : conflict if they conflict on any account, and in general, they
1002 : conflict if they both reference the same account, unless both
1003 : this transaction and the next one only read it. We don't have
1004 : read/write info, so the best guess we have is that the next
1005 : transaction does the same thing to the account that this one
1006 : does. That means we only really care about the accounts that
1007 : this transaction writes to. Label those a_1, a_2, ..., a_i, and
1008 : suppose the probability that the next transaction references a_i
1009 : is p_i (determined using the EMA). Now then, assuming accounts
1010 : are independent (which is false, but whatever), then the
1011 : probability that the next transaction does not conflict with this
1012 : account is:
1013 : (1-p_1) * (1-p_2) * (1 - p_i).
1014 :
1015 : Since for the treap, a lower value means we'll schedule it
1016 : earlier, we'll use the probability of non-conflict as the
1017 : fractional part of the score. */
1018 2922270 : if( FD_LIKELY( update_score ) ) {
1019 2883777 : float score_change = 1.0f - update_ema( ai, disp->global_insert_cnt );
1020 2883777 : ele->score *= fd_float_if( writable, score_change, 1.0f );
1021 2883777 : }
1022 :
1023 : /* Step 2: add edge. There are 4 cases depending on whether this is
1024 : a writer or not and whether the previous reference was a writer
1025 : or not. */
1026 2922270 : int _ignore;
1027 :
1028 2922270 : edge_t ref_to_pa = ai->last_reference[ lane ];
1029 2922270 : edge_t ref_to_me = (uint)((ulong)((ele - disp->pool)<<8) | acct_idx);
1030 2922270 : edge_t * pa = FOLLOW_EDGE( disp->pool, ai->last_reference[ lane ], _ignore );
1031 2922270 : edge_t * me = ele->edges + edge_idx;
1032 :
1033 : /* In the case that this is the first txn in the DAG, pa will point
1034 : to edges[0] of the sentinel element, pool[0]. We don't care
1035 : about what is stored there, so just set it up as a dummy element
1036 : to make the rest of the code work properly in this case too. If
1037 : this is a read, we want me->sibli to ref_to_me in case 4; if this
1038 : is a write, we want to set me->sibli to 0 in case 2, but we need
1039 : to make sure pb==pa. */
1040 2922270 : disp->pool->edges[0] = (1U<<31) | (uint)idx;
1041 2922270 : disp->pool->edges[1] = fd_uint_if( writable, 0U, ref_to_me );
1042 2922270 : disp->pool->edges[2] = fd_uint_if( writable, 0U, ref_to_me );
1043 :
1044 2922270 : int flags = ai->flags;
1045 :
1046 2922270 : if( writable ) { /* also should be known at compile time */
1047 1399407 : if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
1048 : /* Case 1: w-w. The parent is the special last pointer. Point
1049 : the parent to me, and set me to the last pointer. */
1050 271902 : *me = *pa;
1051 271902 : *pa = ref_to_me;
1052 1127505 : } else {
1053 : /* Case 2: r-w. This is the tricky case because there could be
1054 : multiple readers. We need to set all the last readers' child
1055 : pointers to me. */
1056 1127505 : *me = *pa;
1057 1127505 : *pa = ref_to_me;
1058 1127505 : edge_t * pb = FOLLOW_EDGE( disp->pool, pa[1], _ignore );
1059 : /* Intentionally skip the first in_degree increment, because it
1060 : will be done later */
1061 1210065 : while( pb!=pa ) {
1062 82560 : *pb = ref_to_me;
1063 82560 : ele->in_degree++;
1064 82560 : pb = FOLLOW_EDGE( disp->pool, pb[1], _ignore );
1065 82560 : }
1066 1127505 : flags |= ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) | ACCT_INFO_FLAG_ANY_WRITERS( lane );
1067 1127505 : }
1068 1522863 : } else {
1069 1522863 : if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
1070 : /* Case 3: w-r. Similar to case 1, but need to initialize my
1071 : next and prev sibling pointers too. */
1072 98727 : *me = *pa;
1073 98727 : *pa = ref_to_me;
1074 98727 : me[1] = ref_to_me; /* next */
1075 98727 : me[2] = ref_to_me; /* prev */
1076 98727 : flags &= ~(int)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ); /* clear bit */
1077 1424136 : } else {
1078 : /* Case 4: r-r. Add myself as a sibling instead of a child */
1079 1424136 : *me = *pa;
1080 1424136 : FOLLOW_EDGE( disp->pool, pa[1], _ignore )[2] = ref_to_me; /* prev->next->prev = me */
1081 1424136 : me[1] = pa[1]; /* me->next = prev->next */
1082 1424136 : me[2] = ref_to_pa; /* me->prev = prev */
1083 1424136 : pa[1] = ref_to_me; /* prev->next = me */
1084 1424136 : }
1085 1522863 : }
1086 :
1087 : /* Step 3: Update the final values */
1088 : /* In general, we want to increment the in_degree unless this
1089 : transaction is the first to reference this account. The
1090 : exception is that if this account has only readers, including
1091 : this transaction, we don't want to increment the in_degree
1092 : either. At this point, we can tell if that is the case based on
1093 : ANY_WRITERS. */
1094 2922270 : ele->in_degree += (uint)((ai->last_reference[ lane ]!=0U) & !!(flags & ACCT_INFO_FLAG_ANY_WRITERS( lane )));
1095 2922270 : ai->last_reference[ lane ] = ref_to_me;
1096 2922270 : ai->flags = (uchar)flags;
1097 2922270 : edge_idx += fd_uint_if( writable, 1U, 3U );
1098 2922270 : acct_idx++;
1099 2922270 : }
1100 : /* Can't overflow by construction, except for the intentional overflow on the first time. */
1101 3315708 : ele->edge_cnt_etc += (uint)addr_cnt<<fd_int_if( writable, 0, 7 );
1102 3315708 : }
1103 :
1104 :
1105 : /* should be called with all writable accounts first */
1106 : static void
1107 : add_unstaged_edges( fd_rdisp_t * disp,
1108 : fd_rdisp_txn_t * ele,
1109 : fd_rdisp_unstaged_t * unstaged,
1110 : fd_acct_addr_t const * addr,
1111 : ulong addr_cnt,
1112 : int writable,
1113 3816 : int update_score ) {
1114 3816 : ulong base_idx = unstaged->writable_cnt+unstaged->readonly_cnt;
1115 3816 : FD_TEST( !writable || unstaged->readonly_cnt==0U );
1116 80802 : for( ulong i=0UL; i<addr_cnt; i++ ) {
1117 76986 : unstaged->keys[ base_idx+i ] = addr[i];
1118 76986 : if( FD_LIKELY( update_score ) ) {
1119 76986 : ulong idx = acct_map_idx_query( disp->acct_map, addr+i, ULONG_MAX, disp->acct_pool );
1120 76986 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) idx = acct_map_idx_query( disp->free_acct_map, addr+i, ULONG_MAX, disp->acct_pool );
1121 : /* since these are unstaged, we don't bother moving accounts
1122 : around */
1123 76986 : float score_change = 1.0f;
1124 76986 : if( FD_LIKELY( idx!=ULONG_MAX ) ) score_change = 1.0f - update_ema( disp->acct_pool+idx, disp->global_insert_cnt );
1125 76986 : ele->score *= fd_float_if( writable & update_score, score_change, 1.0f );
1126 76986 : }
1127 76986 : }
1128 3816 : *(fd_ptr_if( writable, &(unstaged->writable_cnt), &(unstaged->readonly_cnt) ) ) += (uint)addr_cnt;
1129 3816 : }
1130 :
1131 : ulong
1132 : fd_rdisp_add_txn( fd_rdisp_t * disp,
1133 : FD_RDISP_BLOCK_TAG_T insert_block,
1134 : fd_txn_t const * txn,
1135 : uchar const * payload,
1136 : fd_acct_addr_t const * alts,
1137 553152 : int serializing ) {
1138 :
1139 553152 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &insert_block, NULL, disp->block_pool );
1140 553152 : if( FD_UNLIKELY( !block || !block->insert_ready ) ) return 0UL;
1141 553149 : if( FD_UNLIKELY( !pool_free( disp->pool ) ) ) return 0UL;
1142 :
1143 553149 : ulong idx = pool_idx_acquire( disp->pool );
1144 553149 : fd_rdisp_txn_t * rtxn = disp->pool + idx;
1145 553149 : if( FD_UNLIKELY( rtxn->in_degree!=IN_DEGREE_FREE ) ) FD_LOG_CRIT(( "pool[%lu].in_degree==%u but free", idx, rtxn->in_degree ));
1146 :
1147 553149 : fd_acct_addr_t const * imm_addrs = fd_txn_get_acct_addrs( txn, payload );
1148 :
1149 553149 : if( FD_UNLIKELY( !block->staged ) ) {
1150 636 : rtxn->in_degree = IN_DEGREE_UNSTAGED;
1151 636 : rtxn->score = 0.999f;
1152 :
1153 636 : fd_rdisp_unstaged_t * unstaged = disp->unstaged + idx;
1154 636 : unstaged->block = insert_block;
1155 636 : unstaged->writable_cnt = 0U;
1156 636 : unstaged->readonly_cnt = 0U;
1157 :
1158 636 : add_unstaged_edges( disp, rtxn, unstaged, imm_addrs,
1159 636 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ), 1, 1 );
1160 636 : add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
1161 636 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), 1, 1 );
1162 636 : if( FD_LIKELY( alts ) )
1163 636 : add_unstaged_edges( disp, rtxn, unstaged, alts,
1164 636 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ), 1, 1 );
1165 636 : add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
1166 636 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER ), 0, 1 );
1167 636 : add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
1168 636 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), 0, 1 );
1169 636 : if( FD_LIKELY( alts ) )
1170 636 : add_unstaged_edges( disp, rtxn, unstaged, alts +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
1171 636 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ), 0, 1 );
1172 :
1173 636 : unstaged_txn_ll_ele_push_tail( block->ll, rtxn, disp->pool );
1174 552513 : } else {
1175 552513 : uint lane = block->staging_lane;
1176 :
1177 552513 : rtxn->in_degree = 0U;
1178 552513 : rtxn->score = 0.999f;
1179 : /* There's not a good way to initialize w_cnt_1 to -1, which is what
1180 : it should be at this point, but this is close. We must be sure
1181 : to add at least one writer (which we are assured of because we
1182 : know the transaction passed fd_txn_parse) to correct this value. */
1183 552513 : rtxn->edge_cnt_etc = ((block->linear_block_number<<16) | (lane<<14)) - 1U;
1184 :
1185 552513 : add_edges( disp, rtxn, imm_addrs,
1186 552513 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ), lane, 1, 1 );
1187 552513 : add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
1188 552513 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), lane, 1, 1 );
1189 552513 : if( FD_LIKELY( alts ) )
1190 552510 : add_edges( disp, rtxn, alts,
1191 552510 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ), lane, 1, 1 );
1192 552513 : add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
1193 552513 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER ), lane, 0, 1 );
1194 552513 : add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
1195 552513 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), lane, 0, 1 );
1196 552513 : if( FD_LIKELY( alts ) )
1197 552510 : add_edges( disp, rtxn, alts +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
1198 552510 : fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ), lane, 0, 1 );
1199 552513 : }
1200 :
1201 553149 : if( FD_UNLIKELY( serializing | block->last_insert_was_serializing ) ) {
1202 6 : block->last_serializing = block->inserted_cnt;
1203 6 : }
1204 553149 : block->last_insert_was_serializing = (uint)!!serializing;
1205 553149 : rtxn->score += (float)block->last_serializing;
1206 :
1207 553149 : block->inserted_cnt++;
1208 553149 : disp->global_insert_cnt++;
1209 :
1210 553149 : if( FD_LIKELY( (block->staged) & (rtxn->in_degree==0U) ) ) {
1211 434178 : pending_prq_ele_t temp[1] = {{ .score = rtxn->score, .linear_block_number = block->linear_block_number, .txn_idx = (uint)idx }};
1212 434178 : pending_prq_insert( disp->lanes[ block->staging_lane ].pending, temp );
1213 434178 : }
1214 :
1215 553149 : return idx;
1216 553149 : }
1217 :
1218 : ulong
1219 : fd_rdisp_get_next_ready( fd_rdisp_t * disp,
1220 725691 : FD_RDISP_BLOCK_TAG_T schedule_block ) {
1221 725691 : fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &schedule_block, NULL, disp->block_pool );
1222 725691 : if( FD_UNLIKELY( !block || !block->schedule_ready ) ) return 0UL;
1223 :
1224 725676 : ulong idx;
1225 725676 : if( FD_LIKELY( block->staged ) ) {
1226 725049 : ulong staging_lane = block->staging_lane;
1227 725049 : per_lane_info_t * l = disp->lanes + staging_lane;
1228 :
1229 725049 : if( FD_UNLIKELY( !pending_prq_cnt( l->pending ) ) ) return 0UL;
1230 552837 : if( FD_UNLIKELY( l->pending->linear_block_number != block->linear_block_number ) ) return 0UL;
1231 : /* e.g. when completed_cnt==0, we can accept any score below 1.0 */
1232 552831 : if( FD_UNLIKELY( l->pending->score>=(float)(block->completed_cnt+1U) ) ) return 0UL;
1233 552831 : idx = l->pending->txn_idx;
1234 552831 : pending_prq_remove_min( l->pending );
1235 552831 : disp->pool[ idx ].in_degree = IN_DEGREE_DISPATCHED;
1236 552831 : } else {
1237 627 : if( FD_UNLIKELY( block->dispatched_cnt!=block->completed_cnt ) ) return 0UL;
1238 324 : if( FD_UNLIKELY( unstaged_txn_ll_is_empty( block->ll, disp->pool ) ) ) return 0UL;
1239 318 : idx = unstaged_txn_ll_idx_peek_head( block->ll, disp->pool );
1240 318 : disp->pool[ idx ].in_degree = IN_DEGREE_UNSTAGED_DISPATCHED;
1241 318 : }
1242 553149 : block->dispatched_cnt++;
1243 :
1244 553149 : return idx;
1245 725676 : }
1246 :
1247 : void
1248 : fd_rdisp_complete_txn( fd_rdisp_t * disp,
1249 : ulong txn_idx,
1250 553152 : int reclaim ) {
1251 :
1252 553152 : fd_rdisp_txn_t * rtxn = disp->pool + txn_idx;
1253 553152 : fd_rdisp_blockinfo_t * block = NULL;
1254 :
1255 553152 : if( FD_UNLIKELY( rtxn->in_degree==IN_DEGREE_UNSTAGED_DISPATCHED ) ) {
1256 : /* Unstaged */
1257 318 : block = block_map_ele_query( disp->blockmap, &disp->unstaged[ txn_idx ].block, NULL, disp->block_pool );
1258 318 : FD_TEST( rtxn==unstaged_txn_ll_ele_peek_head( block->ll, disp->pool ) );
1259 318 : unstaged_txn_ll_ele_pop_head( block->ll, disp->pool );
1260 318 : block->completed_cnt++;
1261 552834 : } else if( FD_LIKELY( rtxn->in_degree==IN_DEGREE_DISPATCHED ) ) {
1262 : /* Staged */
1263 552831 : ulong w_cnt_1 = (rtxn->edge_cnt_etc ) & 0x7FU;
1264 552831 : ulong r_cnt = (rtxn->edge_cnt_etc>> 7) & 0x7FU;
1265 552831 : ulong lane = (rtxn->edge_cnt_etc>>14) & 0x3U;
1266 552831 : uint tail_linear_block_num = (uint)(disp->lanes[lane].linear_block_number);
1267 552831 : ulong edge_idx = 0UL;
1268 3475101 : for( ulong i=0UL; i<=w_cnt_1+r_cnt; i++ ) {
1269 2922270 : edge_t const * e = rtxn->edges+edge_idx;
1270 2922270 : edge_t const e0 = *e;
1271 2922270 : edge_t ref_to_me = (uint)((txn_idx<<8) | i);
1272 :
1273 : /* To help with explanations, consider the following DAG:
1274 :
1275 : --> B --\ --> E --\
1276 : / V / V
1277 : A D (G)
1278 : \ ^ \ ^
1279 : --> C --/ --> F --/ */
1280 :
1281 2922270 : if( FD_UNLIKELY( EDGE_IS_LAST( e0 ) ) ) {
1282 2351685 : ulong acct_idx = e0 & 0x7FFFFFFFU;
1283 2351685 : acct_info_t * ai = disp->acct_pool + acct_idx;
1284 : /* If this is a writer, e.g. node G above, we know it's the last
1285 : one, so we can clear last_reference.
1286 : If this is a reader, e.g. node E and F above, if node G
1287 : didn't exist, we need to check if it's the last one
1288 : (me==me->next). If so, we can clear last_reference. If not,
1289 : we need to delete this node from the linked list */
1290 2351685 : if( edge_idx<=w_cnt_1 || e[1]==ref_to_me ) {
1291 1525917 : ai->last_reference[ lane ] = 0U;
1292 1525917 : ai->flags = (uchar)(ai->flags & (~(ACCT_INFO_FLAG_ANY_WRITERS( lane ) | ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ))));
1293 1525917 : } else {
1294 825768 : int _ignore;
1295 825768 : FOLLOW_EDGE( disp->pool, e[1], _ignore )[2] = e[2]; /* me->next->prev = me->prev */
1296 825768 : FOLLOW_EDGE( disp->pool, e[2], _ignore )[1] = e[1]; /* me->prev->next = me->next */
1297 825768 : ai->last_reference[ lane ]= fd_uint_if( ai->last_reference[ lane ]==ref_to_me, e[1], ai->last_reference[ lane ] );
1298 825768 : }
1299 :
1300 : /* Potentially transition from ACTIVE -> CACHED */
1301 2351685 : if( FD_UNLIKELY( (ai->last_reference[ 0 ]==0U)&(ai->last_reference[ 1 ]==0U)&
1302 2351685 : (ai->last_reference[ 2 ]==0U)&(ai->last_reference[ 3 ]==0U) ) ) {
1303 1081614 : ai->flags = 0;
1304 1081614 : acct_map_idx_remove_fast( disp->acct_map, acct_idx, disp->acct_pool );
1305 1081614 : acct_map_idx_insert ( disp->free_acct_map, acct_idx, disp->acct_pool );
1306 1081614 : free_dlist_idx_push_tail( disp->free_acct_dlist, acct_idx, disp->acct_pool );
1307 1081614 : }
1308 2351685 : } else {
1309 570585 : int child_is_writer;
1310 :
1311 570585 : edge_t next_e = e0;
1312 570585 : edge_t const * child_edge;
1313 616599 : while( 1 ) {
1314 : /* This loop first traverses the me->child link, and then
1315 : traverses any sibling links. For example, in the case that
1316 : we're completing node D above, the first child_txn is E,
1317 : and the second child_txn is F. */
1318 616599 : /* */ child_edge = FOLLOW_EDGE( disp->pool, next_e, child_is_writer );
1319 616599 : fd_rdisp_txn_t * child_txn = FOLLOW_EDGE_TXN( disp->pool, next_e );
1320 :
1321 : /* Sanity test */
1322 616599 : FD_TEST( child_txn->in_degree>0U );
1323 616599 : FD_TEST( child_txn->in_degree<IN_DEGREE_DISPATCHED );
1324 :
1325 616599 : if( FD_UNLIKELY( 0U==(--(child_txn->in_degree)) ) ) {
1326 : /* We need an operation something like
1327 : fd_frag_meta_ts_decomp. child_txn has the low 16 bits,
1328 : and tail_linear_block_num has the full 32 bits, except
1329 : for tail_linear_block_num refers to a block < block_depth
1330 : later. Since block_depth<2^16, that means we can resolve
1331 : this unambiguously. Basically, we copy the high 16 bits
1332 : frorm tail_linear_block_num unless that would make
1333 : linear_block_num larger than tail_linear_block_num, in
1334 : which case, we subtract 2^16. */
1335 118647 : uint low_16_bits = child_txn->edge_cnt_etc>>16;
1336 118647 : uint linear_block_num = ((tail_linear_block_num & ~0xFFFFU) | low_16_bits) - (uint)((low_16_bits>(tail_linear_block_num&0xFFFFU))<<16);
1337 118647 : pending_prq_ele_t temp[1] = {{ .score = child_txn->score,
1338 118647 : .linear_block_number = linear_block_num,
1339 118647 : .txn_idx = (uint)(child_txn-disp->pool) }};
1340 118647 : pending_prq_insert( disp->lanes[ lane ].pending, temp );
1341 118647 : }
1342 616599 : if( child_is_writer || child_edge[1]==e0 ) break;
1343 46014 : next_e = child_edge[1];
1344 46014 : }
1345 : /* In the case that the completed transaction is a reader, say B
1346 : or C above, it seems like we should need to remove it from
1347 : the doubly linked list, but we actually don't. The times
1348 : that we need to read the sibling pointers are:
1349 : 1. Completing the writer before a reader (e.g. completing A)
1350 : 2. Completing a reader that's the last in the DAG (e.g.
1351 : completing E/F if G didn't exist)
1352 : 3. Adding another reader to the same set of readers (e.g. if
1353 : G were added as a reader instead of a writer)
1354 : 4. Adding a writer after a set of readers (e.g. adding G).
1355 :
1356 : Supposing that the completed transaction is a reader, since
1357 : we checked EDGE_IS_LAST( e0 ), we know that there is at least
1358 : one writer that follows this reader, e.g. D above.
1359 :
1360 : And so none of these reason can apply to this group of readers:
1361 : 1. Completing B or C implies that A has already completed,
1362 : so it can't complete again.
1363 : 2. We know that we're not in that case because we checked
1364 : EDGE_IS_LAST( e0 ), and if we're not last now, we cannot
1365 : become last later, because the growth happens in the
1366 : other direction.
1367 : 3. Similarly, because we checked EDGE_IS_LAST, any future
1368 : additions of readers won't be to this group of readers.
1369 : 4. Similarly, we know that there already is a writer that
1370 : follows this group of readers, so a later writer would
1371 : not read this set of readers.
1372 :
1373 : So then, we don't need to deal with the sibling edges. The
1374 : fact that we only need to do it in one case almost calls into
1375 : question whether we need to maintain the whole circular
1376 : system in the first place, and whether we could get away with
1377 : a reference count or something instead, but it's important in
1378 : one critical case: suppose we add a bunch of readers, then
1379 : some of them complete, and then we add a writer (case 4
1380 : above). We need to be able to enumerate the nodes in the DAG
1381 : that have not yet completed, and we need to be able to remove
1382 : them from that set in O(1). Those two requirements don't
1383 : leave us with many alternatives besides a doubly linked list. */
1384 :
1385 570585 : if( FD_UNLIKELY( EDGE_IS_LAST( *child_edge ) ) ) {
1386 : /* For example, either:
1387 : 1. completing D when if G didn't exist
1388 : 2. completing E or F with G in the DAG
1389 :
1390 : After completing this transaction, there's either 0 writers
1391 : left (case 1) or 1 writer left (case 2) in the DAG. and if
1392 : there is one, it is the last reference.
1393 :
1394 : Either way, we want to set ANY_WRITERS to
1395 : LAST_REF_WAS_WRITE. */
1396 393645 : acct_info_t * ai = disp->acct_pool + (*child_edge & 0x7FFFFFFFU);
1397 393645 : ulong flags = ai->flags;
1398 393645 : flags &= ~(ulong)ACCT_INFO_FLAG_ANY_WRITERS( lane );
1399 393645 : flags |= (flags & (ulong)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ))<<1;
1400 393645 : ai->flags = (uchar)flags;
1401 393645 : }
1402 570585 : }
1403 2922270 : edge_idx += fd_ulong_if( i<=w_cnt_1, 1UL, 3UL );
1404 2922270 : }
1405 552831 : block = block_slist_ele_peek_head( disp->lanes[ lane ].block_ll, disp->block_pool );
1406 552831 : block->completed_cnt++;
1407 552831 : } else if( FD_LIKELY( rtxn->in_degree==IN_DEGREE_ZOMBIE ) ) {
1408 3 : FD_TEST( reclaim );
1409 3 : block = block_pool_ele( disp->block_pool, rtxn->block_idx );
1410 3 : zombie_dlist_ele_remove( block->zombie_list, rtxn, disp->pool );
1411 : /* Fall through to the pool release branch below. */
1412 3 : } else {
1413 0 : FD_LOG_CRIT(( "completed un-dispatched transaction %lu", txn_idx ));
1414 0 : }
1415 553152 : if( reclaim ) {
1416 : /* For testing purposes, to make sure we don't read a completed
1417 : transaction, we can clobber the memory. */
1418 : /* memset( disp->pool+txn_idx, '\xCC', sizeof(fd_rdisp_txn_t) ); */
1419 553149 : rtxn->in_degree = IN_DEGREE_FREE;
1420 553149 : pool_idx_release( disp->pool, txn_idx );
1421 553149 : } else {
1422 3 : rtxn->in_degree = IN_DEGREE_ZOMBIE;
1423 3 : zombie_dlist_ele_push_tail( block->zombie_list, rtxn, disp->pool );
1424 3 : rtxn->block_idx = (uint)block_pool_idx( disp->block_pool, block );
1425 3 : }
1426 553152 : }
1427 :
1428 :
1429 : ulong
1430 : fd_rdisp_staging_lane_info( fd_rdisp_t const * disp,
1431 45 : fd_rdisp_staging_lane_info_t out_sched[ static 4 ] ) {
1432 225 : for( ulong i=0UL; i<4UL; i++ ) {
1433 180 : if( !(disp->free_lanes & (1<<i) ) ) {
1434 66 : block_slist_t const * sl = disp->lanes[ i ].block_ll;
1435 66 : out_sched[ i ].insert_ready_block = block_slist_ele_peek_tail( sl, disp->block_pool )->block;
1436 66 : out_sched[ i ].schedule_ready_block = block_slist_ele_peek_head( sl, disp->block_pool )->block;
1437 66 : }
1438 180 : }
1439 45 : return 0xFUL & ~(ulong)disp->free_lanes;
1440 45 : }
1441 :
1442 : void
1443 : fd_rdisp_verify( fd_rdisp_t const * disp,
1444 155454 : uint * scratch ) {
1445 155454 : ulong acct_depth = disp->depth*MAX_ACCT_PER_TXN;
1446 155454 : ulong block_depth = disp->block_depth;
1447 155454 : FD_TEST( 0==acct_map_verify ( disp->acct_map, acct_depth+1UL, disp->acct_pool ) );
1448 155454 : FD_TEST( 0==acct_map_verify ( disp->free_acct_map, acct_depth+1UL, disp->acct_pool ) );
1449 155454 : FD_TEST( 0==block_map_verify( disp->blockmap, block_depth+1UL, disp->block_pool ) );
1450 :
1451 : /* Check all the in degree counts are right */
1452 155454 : memset( scratch, '\0', sizeof(uint)*(disp->depth+1UL) );
1453 155454 : scratch[ 0 ] = UINT_MAX;
1454 46783854 : for( ulong j=1UL; j<disp->depth+1UL; j++ ) {
1455 46628400 : fd_rdisp_txn_t const * rtxn = disp->pool+j;
1456 46628400 : if( rtxn->in_degree==IN_DEGREE_FREE ) { scratch[ j ]=UINT_MAX; continue; }
1457 :
1458 11538054 : if( (rtxn->in_degree==IN_DEGREE_UNSTAGED_DISPATCHED) |
1459 11538054 : (rtxn->in_degree==IN_DEGREE_UNSTAGED) |
1460 11538054 : (rtxn->in_degree==IN_DEGREE_ZOMBIE) ) continue;
1461 :
1462 11537427 : ulong w_cnt_1 = (rtxn->edge_cnt_etc ) & 0x7FU;
1463 11537427 : ulong r_cnt = (rtxn->edge_cnt_etc>> 7) & 0x7FU;
1464 11537427 : ulong edge_idx = 0UL;
1465 :
1466 155631774 : for( ulong i=0UL; i<=w_cnt_1+r_cnt; i++ ) {
1467 144094347 : edge_t const * e = rtxn->edges+edge_idx;
1468 144094347 : edge_t const e0 = *e;
1469 :
1470 144094347 : edge_idx += fd_ulong_if( i<=w_cnt_1, 1UL, 3UL );
1471 :
1472 144094347 : if( FD_UNLIKELY( EDGE_IS_LAST( e0 ) ) ) continue;
1473 :
1474 29105160 : edge_t next_e = e0;
1475 29105160 : edge_t const * child_edge;
1476 29105160 : edge_t last_e = 0U;
1477 30354588 : while( 1 ) {
1478 30354588 : int child_is_writer;
1479 : /* This loop first traverses the me->child link, and then
1480 : traverses any sibling links. For example, in the case that
1481 : we're completing node D above, the first child_txn is E,
1482 : and the second child_txn is F. */
1483 30354588 : /* */ child_edge = FOLLOW_EDGE( disp->pool, next_e, child_is_writer );
1484 30354588 : fd_rdisp_txn_t * child_txn = FOLLOW_EDGE_TXN( disp->pool, next_e );
1485 30354588 : scratch[ child_txn - disp->pool ]++;
1486 30354588 : if( child_is_writer || child_edge[1]==e0 ) break;
1487 1249428 : if( last_e!=0U ) FD_TEST( child_edge[2]==last_e );
1488 1249428 : last_e = next_e;
1489 1249428 : next_e = child_edge[1];
1490 1249428 : FD_TEST( next_e>=0x100U );
1491 1249428 : }
1492 29105160 : }
1493 11537427 : }
1494 46783854 : for( ulong i=1UL; i<disp->depth+1UL; i++ ) {
1495 46628400 : FD_TEST( scratch[ i ]==UINT_MAX ||
1496 46628400 : disp->pool[ i ].in_degree==IN_DEGREE_DISPATCHED ||
1497 46628400 : disp->pool[ i ].in_degree==IN_DEGREE_UNSTAGED ||
1498 46628400 : disp->pool[ i ].in_degree==IN_DEGREE_UNSTAGED_DISPATCHED ||
1499 46628400 : disp->pool[ i ].in_degree==IN_DEGREE_ZOMBIE ||
1500 46628400 : disp->pool[ i ].in_degree==scratch[ i ] );
1501 46628400 : }
1502 155454 : }
1503 :
1504 9 : void * fd_rdisp_leave ( fd_rdisp_t * disp ) { return disp; }
1505 9 : void * fd_rdisp_delete( void * mem ) { return mem; }
|