LCOV - code coverage report
Current view: top level - discof/replay - fd_rdisp.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 619 645 96.0 %
Date: 2025-10-13 04:42:14 Functions: 21 23 91.3 %

          Line data    Source code
       1             : #include "fd_rdisp.h"
       2             : #include <math.h> /* for the EMA */
       3             : 
       4             : /* The conflict graph that this file builds is not a general DAG, but
       5             :    the union of several special account-conflict graphs.  Each
       6             :    account-conflict graph has special structure:
       7             :                            ---> 3 --
       8             :                           /          \
       9             :                          /            v
      10             :                1  ---> 2 -----> 4 --> 6 ----> 7
      11             :                         \             ^
      12             :                          \           /
      13             :                           ----> 5 --
      14             : 
      15             :    That is, the graph is almost a line, but may have fan-out and fan-in
      16             :    regions.  The tricky part about representing a graph like this
      17             :    without dynamic memory allocation is that nodes may have arbitrary
      18             :    in-degree and out-degree.  Thus, we use a pretty standard trick and
      19             :    use sibling pointers, denoted below with dotted lines.  Each node
      20             :    maintains at most one successor pointer and at most one sibling
      21             :    pointer.  Although not shown below, the sibling pointers are
      22             :    circularly linked, so 5's sibling is 3.  Additionally, though not
      23             :    shown below, the child of the last node (7) stores information about
      24             :    what account address all this is for, which facilitates deleting the
      25             :    last node in the graph.
      26             : 
      27             : 
      28             :                               --> 3 --
      29             :                              /    ^   \
      30             :                             /     :    \
      31             :                            /      V     v
      32             :                  1  ---> 2        4 --> 6 ----> 7
      33             :                                   ^     ^
      34             :                                   :    /
      35             :                                   V   /
      36             :                                   5 --
      37             : 
      38             :    The normal edge 2->3 along with the sibling edge 3..>4 implies a
      39             :    normal edge 2->4.  That then transitively implies an edge 2->5.
      40             : 
      41             :    We want each node to maintain a count of its in-degree so that we
      42             :    know when it can be executed.  The implied edges also count for the
      43             :    in-degree.  In this example, node 1 has in-degree 0, node 6 has
      44             :    in-degree 3, and the rest have in-degree 1.
      45             : 
      46             :    Maintaining each account-conflict graph is relatively easy given the
      47             :    operations we want to support.  Only the details about sibling edges
      48             :    are worth mentioning.  For example, when deleting node 2, we
      49             :    decrement the in-degree count for its successor, and then follow the
      50             :    sibling pointers, decrementing all the in-degree counts as we go to
      51             :    mark the deletion of the implied edges.  Sibling edges also need to
      52             :    be doubly linked, so that e.g. nodes 3 and 5 can be re-linked in O(1)
      53             :    if node 4 is deleted.  They're also circularly linked as well for
      54             :    convenience.
      55             : 
      56             :    When building the graph, we maintain a map of account to the last
      57             :    node that references it, whether that was a read or write, and
      58             :    whether there are any writers to that account in the graph right now.
      59             :    If the new node reads from an account that was last read, the new
      60             :    node becomes a sibling of the last read, with in-degree increased if
      61             :    there are any writers.  Otherwise, it becomes a successor of the node
      62             :    that last referenced the account. */
      63             : 
      64             : /* For a task like this with lots of graph traversal and pointer
      65             :    chasing, performance is typically limited by memory latency.  That
      66             :    means that the more that can fit in cache, the better the
      67             :    performance.  This implementation uses a lot of bit-packing to
      68             :    improve cache footprint. */
      69             : 
      70             : /* The following structs are all very local to this compilation unit,
      71             :    and so they don't have globally acceptable type names (e.g.
      72             :    fd_rdisp_edge_t). */
      73             : 
      74             : /* Everything is set up to allow this to be 128, but we can save the
      75             :    space until it's necessary. */
      76      777003 : #define MAX_ACCT_PER_TXN 64UL
      77             : 
      78             : /* edge_t: Fields typed edge_t represent an edge in one of the parallel
      79             :    account-conflict DAGs.  Each transaction stores a list of all its
      80             :    outgoing edges.  The type is actually a union of bitfield, but C
      81             :    bitfields are gross, so we just do it manually with macros.  If the
      82             :    high bit is set, that means the transaction storing this edge_t value
      83             :    is the last in this specific DAG, and the lower 31 bits of the
      84             :    value are an index in the map_pool for the account pubkey for this
      85             :    DAG.  See the comments about the hidden edge outgoing from node 7 in
      86             :    the DAG at the top of this file for an example.
      87             :    If the high bit is not set, then the next 23 bits store the
      88             :    destination of the edge, represented by its index in the pool of
      89             :    transactions.  Then the lowest 8 bits store the account index within
      90             :    that transaction of the edge that is part of the same
      91             :    account-specific DAG as this edge.  Because the max depth is 2^23-1,
      92             :    and each transaction can reference 128 accounts, the max accounts
      93             :    that can be referenced fits in 30 bits.
      94             :    The proper type would be something like
      95             :    typedef union {
      96             :      struct {
      97             :        uint is_last:1;
      98             :        uint map_pool_idx:31;
      99             :      } last;
     100             :      struct {
     101             :        uint is_last:1;
     102             :        uint txn_idx:23;
     103             :        uint acct_idx:8;
     104             :      } e;
     105             :    } edge_t;
     106             : 
     107             :    */
     108             : typedef uint edge_t;
     109             : 
     110             : 
     111             : /* fd_rdisp_txn is the representation of a transaction as a node in the
     112             :    DAG. */
     113             : struct fd_rdisp_txn {
     114             :   /* in_degree: The total number of edges summed across all account DAGs
     115             :      with this node as their destination.  In the worst case, all the
     116             :      other transactions in the pool read from each of the max number of
     117             :      accounts that this transaction writes to,  so there are
     118             :      MAX_ACCT_PER_TXN*depth edges that come into this node, which fits in
     119             :      about 30 bits, so we have some room for special values.  If
     120             :      in_degree is one of the following values, then
     121             :      the transaction is: */
     122   234453531 : #define IN_DEGREE_FREE                (UINT_MAX   )
     123    57500892 : #define IN_DEGREE_UNSTAGED            (UINT_MAX-1U)/* unstaged, not dispatched */
     124     1166490 : #define IN_DEGREE_DISPATCHED          (UINT_MAX-2U)/* staged,   dispatched */
     125    57500874 : #define IN_DEGREE_UNSTAGED_DISPATCHED (UINT_MAX-3U)/* unstaged, dispatched */
     126    57500859 : #define IN_DEGREE_ZOMBIE              (UINT_MAX-4U)/* zombie */
     127             :   /* a transaction that is staged and dispatched is must have an
     128             :      in_degree of 0.  in_degree isn't a meaningful concept for unstaged
     129             :      transactions. */
     130             :   uint    in_degree;
     131             : 
     132             :   /* score: integer part stores how many transactions in the block must
     133             :      have completed before this transaction can be scheduled.  This is
     134             :      useful for transactions marked as serializing.  The fractional part
     135             :      gives some measure of how urgent the transaction is, where lower is
     136             :      more urgent.  This means we can't have more transactions in a block
     137             :      marked as serializing than the first integer that a float cannot
     138             :      represent.  That value is about 16M, which is much higher than the
     139             :      maximum number of transactions in a block, so this is not a
     140             :      problem.  If in the very rare case that there are more than just a
     141             :      few serializing transactions, we will lose a bit of precision for
     142             :      the fractional portion, which makes total sense; if there's not
     143             :      much room for parallel execution, having the optimal parallel
     144             :      execution is not very important. */
     145             :   float   score;
     146             : 
     147             :   /* edge_cnt_etc:
     148             :      0xFFFF0000 (16 bits) for linear block number,
     149             :      0x0000C000 (2 bits) for concurrency lane,
     150             :      0x00003F80 (7 bits) for r_cnt
     151             :      0x0000007F (7 bits) for w_cnt */
     152             :   union {
     153             :     uint edge_cnt_etc;
     154             :     /* edge_cnt_etc is only used when the transaction is STAGED and
     155             :        PENDING, READY, or DISPATCHED.  If UNSTAGED or FREE, the next
     156             :        pointer is also here.  It can't be UNSTAGED and FREE at the same
     157             :        time, so there's no conflict with storage there either. */
     158             :     uint unstaged_next;
     159             :     uint free_next;
     160             :     /* If ZOMBIE, pointer back to block is here.  No storage conflict
     161             :        because ZOMBIE is an exclusive state. */
     162             :     uint block_idx;
     163             :   };
     164             : 
     165             : 
     166             :   /* When a transaction writes to an account, it only creates one
     167             :      link.  When a transaction reads from an account, we need the full
     168             :      doubly linked list with its siblings, so it creates 3 edges
     169             :      (child, next, prev).  All edges from writable accounts come first,
     170             :      and we keep track of how many there are.  In the worst case, all
     171             :      accounts are reads, so we size it appropriately. */
     172             :   edge_t edges[3UL*MAX_ACCT_PER_TXN]; /* addressed [0, w_cnt+3*r_cnt) */
     173             : };
     174             : typedef struct fd_rdisp_txn fd_rdisp_txn_t;
     175             : 
     176             : #define EDGE_IS_LAST(x) ((x)&0x80000000U)
     177             : 
     178             : /* Two more definitions:
     179             :    An edge index is an array position within the edges array.  An
     180             :    account index is a position within a transaction's account addresses,
     181             :    reordered so that the writable ones come first.  Since writable
     182             :    accounts use one position in the edges array per account address,
     183             :    these often coincide. */
     184             : 
     185             : 
     186             : /* FOLLOW_EDGE and FOLLOW_EDGE_TXN are helper macros for dealing with
     187             :    edges.  Given an edge_t x, and transaction pool base, FOLLOW_EDGE_TXN
     188             :    returns a pointer to transaction that the edge points to; FOLLOW_EDGE
     189             :    returns a pointer to the (first) edge_t within that transaction that
     190             :    is part of the same DAG as this edge.  FOLLOW_EDGE and
     191             :    FOLLOW_EDGE_TXN must not be called if EDGE_IS_LAST is non-zero.
     192             : 
     193             :    Then the edge index, i.e. the position in the edges array of the
     194             :    (first) edge for an account index is:
     195             :           acct_idx                      if acct_idx<w_cnt
     196             :           w_cnt + 3*(acct_idx-w_cnt)    else.
     197             :   Simplifying gives
     198             :          acct_idx + 2*signed_max( 0, acct_idx-w_cnt ).
     199             :   In doing this calculation, we also basically get for free whether the
     200             :   edge is for a writable account or a readonly account, so we return
     201             :   that as well via the w parameter.  If the child transaction only reads
     202             :   the account address for this DAG, then the next and prev pointers can
     203             :   be accessed using the returned value +1 and +2, respectively. */
     204   176064747 : #define FOLLOW_EDGE(base, x, w) (__extension__({                                 \
     205   176064747 :         uint __e = (x);                                                          \
     206   176064747 :         fd_rdisp_txn_t * __txn = ((base)+(__e>>8));                              \
     207   176064747 :         uint __wcnt = __txn->edge_cnt_etc & 0x7FU;                               \
     208   176064747 :         uint __idx  = (__e & 0xFFU);                                             \
     209   176064747 :         (w) = __idx<__wcnt;                                                      \
     210   176064747 :         (void)(w);  /* not robust... */                                          \
     211   176064747 :         __txn->edges + __idx + 2*fd_int_max( 0, (int)(__idx)-(int)(__wcnt) ); }))
     212   153735396 : #define FOLLOW_EDGE_TXN(base, x) ( (base)+((x)>>8) )
     213             : 
     214             : /* The pool and slist are almost the same, but they are used
     215             :    differently, so keep them as different structures for now. */
     216             : 
     217             : #define POOL_NAME     pool
     218          27 : #define POOL_T        fd_rdisp_txn_t
     219             : #define POOL_IDX_T    uint
     220     2530839 : #define POOL_NEXT     free_next
     221             : #define POOL_SENTINEL 1
     222             : #include "../../util/tmpl/fd_pool.c"
     223             : 
     224             : #define SLIST_NAME unstaged_txn_ll
     225             : #define SLIST_ELE_T fd_rdisp_txn_t
     226          18 : #define SLIST_IDX_T uint
     227          72 : #define SLIST_NEXT  unstaged_next
     228             : #include "../../util/tmpl/fd_slist.c"
     229             : 
     230             : #define DLIST_IDX_T uint
     231           6 : #define DLIST_PREV  edges[0]
     232           6 : #define DLIST_NEXT  edges[1]
     233             : #define DLIST_NAME  zombie_dlist
     234             : #define DLIST_ELE_T fd_rdisp_txn_t
     235             : #include "../../util/tmpl/fd_dlist.c"
     236             : 
     237             : 
     238             : /* ACCT_INFO_FLAG: It's a bit unfortunate that we have to maintain these
     239             :    flags, but basically we need to be able to distinguish the case where
     240             :    there are only readers so that we don't increment in_degree when
     241             :    adding a new fd_rdisp_txn.  If we have any writers, the only way to
     242             :    transition into a state where there are only readers is to complete
     243             :    the last writer.  We know we are in this case when the completed
     244             :    node's child doesn't have a child, and the completed node's child is
     245             :    a reader, as indicated by the LAST_REF_WAS_WRITE bit.
     246             :    LAST_REF_WAS_WRITE also has the advantage of being easy to
     247             :    maintain. */
     248    18094905 : #define ACCT_INFO_FLAG_LAST_REF_WAS_WRITE(lane) (((uchar)1)<<(2*(lane)))
     249    17671827 : #define ACCT_INFO_FLAG_ANY_WRITERS(       lane) (((uchar)2)<<(2*(lane)))
     250             : 
     251             : /* acct_info_t is a node in a map_chain that contains the metadata for a
     252             :    single account address's conflict graph DAG.  In particular, it
     253             :    contains the information needed to know where to insert a node that
     254             :    reads from or writes to the account.  The objects of this type follow
     255             :    this state machine:
     256             : 
     257             :                  FREE  -----> ACTIVE ----> CACHED
     258             :                                 ^            |
     259             :                                 |-------------
     260             :    When FREE, it is in free_acct_dlist only.  When ACTIVE, it is in
     261             :    acct_map only.  When CACHED, it is in both free_acct_dlist and
     262             :    cached_acct_map.
     263             : */
     264             : struct acct_info {
     265             :   /* key, next, and prev are the map_chain fields. Used in the ACTIVE
     266             :      and CACHED states.  next and prev set to 0 when in the FREE state.
     267             :      Element 0 is a sentinel and isn't inserted to the free_acct_dlist,
     268             :      so this is unambiguous. */
     269             :   fd_acct_addr_t key;
     270             :   uint next;
     271             :   uint prev;
     272             : 
     273             :   union {
     274             :     struct {
     275             :       /* This is effectively a pointer to the last node in the DAG for
     276             :          this pubkey, one for each staging lane.
     277             :          EDGE_IS_LAST(FOLLOW_EDGE(base, last_reference[i])) is non-zero.
     278             :          */
     279             :       edge_t last_reference[4];
     280             : 
     281             :     }; /* When in the ACTIVE state */
     282             :     struct {
     283             :       uint free_ll_next;
     284             :       uint free_ll_prev;
     285             :       /* 8 bytes of padding here */
     286             :     }; /* When not in the ACTIVE state, used by the free_acct_dlist */
     287             :   };
     288             :   /* flags: a combination of ACCT_INFO_FLAG_* bitfields above.  Used
     289             :      when ACTIVE. */
     290             :   uint flags:8;
     291             : 
     292             : 
     293             :   /* We want to dispatch the READY transactions in an order that
     294             :      maximizes parallelism, but we also want to be able to start
     295             :      dispatching transactions decently well before we have the full
     296             :      conflict graph.  We can do that because we know that contentious
     297             :      accounts tend to stay contentious and uncontentious accounts tend
     298             :      to stay uncontentious.
     299             : 
     300             :      To accomplish this, we maintain a special EMA.  Let x_i be 1 if
     301             :      transaction i references it and 0 if not.  If we squint and assume
     302             :      the transactions are independent and all drawn from some
     303             :      distribution (which is not true, but that's why we squint), an EMA
     304             :      of x_i estimates the probability that the next transaction
     305             :      references this account.  How we use this value is detailed later.
     306             : 
     307             :      We can't update this value for every pubkey for each transaction,
     308             :      so we maintain it in a lazy way, by applying updates only when we
     309             :      need to read the value, which also happens to be every time we want
     310             :      to add a 1 value to the EMA.  We then just need to maintain the
     311             :      last index i at which x_i was updated and the current value.  We
     312             :      only have 24 bits for the index, which means that we can't maintain
     313             :      it in a fork-aware way, which doesn't seem like a problem.  Also,
     314             :      it's possible it can overflow, and that can result in an incorrect
     315             :      value, but that means the account is only referenced ~ 1/2^24
     316             :      transactions, which is also fine.
     317             : 
     318             :      last_ref is in the domain of global_inserted_txn_cnt.  ema_refs is
     319             :      in [0, 1].  Both fields are used in ACTIVE and CACHED.  Maintaining
     320             :      these fields is actually the main reason CACHED exists. */
     321             :   uint   last_ref:24;
     322             :   float  ema_refs;
     323             : };
     324             : typedef struct acct_info acct_info_t;
     325             : 
     326             : FD_STATIC_ASSERT( sizeof(acct_info_t)==64UL, acct_info_t );
     327             : 
     328             : /* For the acct_map and the free_acct_map */
     329             : #define MAP_NAME          acct_map
     330     3726312 : #define MAP_ELE_T         acct_info_t
     331   375691212 : #define MAP_IDX_T         uint
     332             : #define MAP_KEY_T         fd_acct_addr_t
     333   714941709 : #define MAP_KEY_HASH(k,s) fd_hash( (s), (k)->b, 32UL )
     334   360889470 : #define MAP_KEY_EQ(k0,k1) (!memcmp( (k0)->b, (k1)->b, 32UL ))
     335             : #define MAP_OPTIMIZE_RANDOM_ACCESS_REMOVAL 1
     336             : #include "../../util/tmpl/fd_map_chain.c"
     337             : 
     338             : 
     339             : #define DLIST_IDX_T uint
     340    16406382 : #define DLIST_PREV  free_ll_prev
     341    16406382 : #define DLIST_NEXT  free_ll_next
     342             : #define DLIST_NAME  free_dlist
     343             : #define DLIST_ELE_T acct_info_t
     344             : #include "../../util/tmpl/fd_dlist.c"
     345             : 
     346             : 
     347             : struct pending_prq_ele {
     348             :   /* lower score means should be scheduled sooner.  Integer part has a
     349             :      special meaning as explained above. */
     350             :   float score;
     351             : 
     352             :   uint linear_block_number;
     353             :   uint txn_idx;
     354             : 
     355             :   /* It seems like we should be able to get this whole struct in 8
     356             :      bytes, but we're a few bits over.
     357             : 
     358             :      23 bits for txn_idx
     359             :      16 bit for linear_block_number
     360             :      Then the integer part of the score needs to be at least 18 or 19
     361             :      bits, but we can't use a custom floating point number. */
     362             : };
     363             : 
     364             : typedef struct pending_prq_ele pending_prq_ele_t;
     365             : 
     366             : 
     367             : #define PRQ_NAME pending_prq
     368    15249270 : #define PRQ_T    pending_prq_ele_t
     369             : #define PRQ_EXPLICIT_TIMEOUT 0
     370             : /* returns 1 if x is strictly after y */
     371    12665712 : #define PRQ_AFTER(x,y) (__extension__( {                                            \
     372    12665712 :             int cmp0 = (int)(x).linear_block_number - (int)(y).linear_block_number; \
     373    12665712 :             fd_int_if( cmp0!=0, cmp0>0, (x).score>(y).score );                      \
     374    12665712 :             }))
     375             : #include "../../util/tmpl/fd_prq.c"
     376             : 
     377             : /* fd_rdisp_blockinfo_t maintains a little metadata about transactions for each
     378             :    slot.  */
     379             : struct fd_rdisp_blockinfo {
     380             :   FD_RDISP_BLOCK_TAG_T block;
     381             :   uint  linear_block_number;
     382             : 
     383             :   uint  insert_ready:1;
     384             :   uint  schedule_ready:1;
     385             :   uint  staged:1;
     386             :   uint  staging_lane:2; /* ignored if staged==0 */
     387             :   uint  last_insert_was_serializing:1;
     388             : 
     389             :   uint inserted_cnt;
     390             :   uint dispatched_cnt;
     391             :   uint completed_cnt;
     392             :   uint last_serializing;
     393             : 
     394             :   uint map_chain_next;
     395             :   uint ll_next;
     396             :   unstaged_txn_ll_t ll[ 1 ]; /* used only when unstaged */
     397             :   zombie_dlist_t    zombie_list[ 1 ];
     398             : };
     399             : typedef struct fd_rdisp_blockinfo fd_rdisp_blockinfo_t;
     400             : 
     401             : #define POOL_NAME     block_pool
     402          27 : #define POOL_T        fd_rdisp_blockinfo_t
     403             : #define POOL_IDX_T    uint
     404     1007172 : #define POOL_NEXT     ll_next
     405             : #define POOL_SENTINEL 1
     406             : #include "../../util/tmpl/fd_pool.c"
     407             : 
     408             : #define MAP_NAME  block_map
     409             : #define MAP_ELE_T fd_rdisp_blockinfo_t
     410             : #define MAP_KEY_T FD_RDISP_BLOCK_TAG_T
     411      405246 : #define MAP_KEY   block
     412    10124793 : #define MAP_NEXT  map_chain_next
     413    12261066 : #define MAP_IDX_T uint
     414             : #include "../../util/tmpl/fd_map_chain.c"
     415             : 
     416             : #define SLIST_NAME  block_slist
     417             : #define SLIST_ELE_T fd_rdisp_blockinfo_t
     418      405246 : #define SLIST_IDX_T uint
     419      810495 : #define SLIST_NEXT  ll_next
     420             : #include "../../util/tmpl/fd_slist.c"
     421             : 
     422             : struct fd_rdisp_unstaged {
     423             :   FD_RDISP_BLOCK_TAG_T block;
     424             : 
     425             :   uint writable_cnt;
     426             :   uint readonly_cnt;
     427             :   fd_acct_addr_t keys[MAX_ACCT_PER_TXN];
     428             : };
     429             : typedef struct fd_rdisp_unstaged fd_rdisp_unstaged_t;
     430             : 
     431             : typedef struct {
     432             :   pending_prq_ele_t * pending;
     433             :   ulong               linear_block_number;
     434             :   block_slist_t       block_ll[1];
     435             :   ulong               inserted_cnt;
     436             :   ulong               dispatched_cnt;
     437             :   ulong               completed_cnt;
     438             : } per_lane_info_t;
     439             : 
     440             : /* We maintain two maps from pubkeys to acct_info_t.  The first one is
     441             :    the main acct_map, just called acct_map.  All pubkeys in this map
     442             :    have >0 references in the one of the staging lane DAGs.  When an
     443             :    account goes to 0 references, it gets removed from main map_chain and
     444             :    moved to free map_chain, called free_acct_map.  The free_acct_map
     445             :    exists to maintain the reference count EMA information lazily.
     446             :    Unless we need the acct_info_t for something in the DAG, we might as
     447             :    well maintain the EMA info.
     448             : 
     449             :    When we start up, all the acct_info_t structs are in the
     450             :    free_acct_dlist.  Whenever something is added to the free_acct_map,
     451             :    it's also added to the tail of the free_acct_dlist.  When we need an
     452             :    acct_info_t that's not in the free_acct_map, we pop the head of the
     453             :    free_acct_dlist.  In general, the free_acct_dlist contains everything
     454             :    in the free_acct_map, potentially plus some elements that have never
     455             :    been used; all acct_info_t objects are in exactly one of the main
     456             :    acct_map and the free_acct_dlist (not free_acct_map).  See
     457             :    acct_info_t for more information about this. */
     458             : 
     459             : struct fd_rdisp {
     460             :   ulong depth;
     461             :   ulong block_depth;
     462             : 
     463             :   ulong global_insert_cnt;
     464             :   ulong unstaged_lblk_num;
     465             : 
     466             :   /* pool: an fd_pool, indexed [0, depth+1), with 0 being a sentinel */
     467             :   fd_rdisp_txn_t       * pool;
     468             :   fd_rdisp_unstaged_t  * unstaged; /* parallel to pool with additional info */
     469             : 
     470             :   block_map_t          * blockmap; /* map chain */
     471             :   fd_rdisp_blockinfo_t * block_pool;
     472             : 
     473             :   int free_lanes; /* a bitmask */
     474             :   per_lane_info_t lanes[4];
     475             : 
     476             :   acct_map_t   * acct_map;
     477             :   acct_map_t   * free_acct_map;
     478             :   /* acct_pool is not an fd_pool, but is just a flat array, since we
     479             :      don't need to acquire and release from it because of the dlist. */
     480             :   acct_info_t  * acct_pool;
     481             :   free_dlist_t   free_acct_dlist[1];
     482             : };
     483             : 
     484             : typedef struct fd_rdisp fd_rdisp_t;
     485             : 
     486             : 
     487             :  #define ACCT_ITER_TO_PTR( iter ) (__extension__( {                                             \
     488             :        ulong __idx = fd_txn_acct_iter_idx( iter );                                              \
     489             :        fd_ptr_if( __idx<fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_IMM ), accts, alt_adj )+__idx; \
     490             :        }))
     491             : 
     492             : 
     493         117 : ulong fd_rdisp_align( void ) { return 128UL; }
     494             : 
     495             : ulong
     496             : fd_rdisp_footprint( ulong depth,
     497           9 :                     ulong block_depth ) {
     498           9 :   if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH)             | (depth<2UL) |
     499           9 :                    (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) | (block_depth<4UL) ) ) return 0UL;
     500             : 
     501           9 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     502           9 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     503           9 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     504             : 
     505           9 :   ulong l = FD_LAYOUT_INIT;
     506           9 :   l = FD_LAYOUT_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     507           9 :   l = FD_LAYOUT_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) ); /* pool       */
     508           9 :   l = FD_LAYOUT_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) ); /* unstaged   */
     509           9 :   l = FD_LAYOUT_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) ); /* blockmap   */
     510           9 :   l = FD_LAYOUT_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) ); /* block_pool */
     511           9 :   l = FD_LAYOUT_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) ); /* pending    */
     512           9 :   l = FD_LAYOUT_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) ); /* acct_map   */
     513           9 :   l = FD_LAYOUT_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) ); /* free_acct_map */
     514           9 :   l = FD_LAYOUT_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            ); /* acct_pool  */
     515           9 :   return FD_LAYOUT_FINI( l, fd_rdisp_align() );
     516           9 : }
     517             : 
     518             : void *
     519             : fd_rdisp_new( void * mem,
     520             :               ulong  depth,
     521             :               ulong  block_depth,
     522           9 :               ulong  seed ) {
     523           9 :   if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH)             | (depth<2UL) |
     524           9 :                    (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) | (block_depth<4UL) ) ) return 0UL;
     525             : 
     526           9 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     527           9 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     528           9 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     529             : 
     530           9 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     531           9 :   fd_rdisp_t * disp   = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     532           9 :   void  * _pool       = FD_SCRATCH_ALLOC_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) );
     533           9 :   void  * _unstaged   = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) );
     534           9 :   void  * _bmap       = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) );
     535           9 :   void  * _bpool      = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) );
     536           9 :   uchar * _pending    = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) );
     537           9 :   void  * _acct_map   = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     538           9 :   void  * _freea_map  = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     539           9 :   acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            );
     540           9 :   FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
     541             : 
     542           9 :   disp->depth             = depth;
     543           9 :   disp->block_depth       = block_depth;
     544           9 :   disp->global_insert_cnt = 0UL;
     545           9 :   disp->unstaged_lblk_num = 0UL;
     546             : 
     547           9 :   fd_rdisp_txn_t * temp_pool_join = pool_join( pool_new( _pool, depth+1UL ) );
     548      197823 :   for( ulong i=0UL; i<depth+1UL; i++ ) temp_pool_join[ i ].in_degree = IN_DEGREE_FREE;
     549           9 :   pool_leave( temp_pool_join );
     550             : 
     551           9 :   memset( _unstaged, '\0', sizeof(fd_rdisp_unstaged_t)*(depth+1UL) );
     552             : 
     553           9 :   block_map_new ( _bmap,  chain_cnt, seed );
     554           9 :   block_pool_new( _bpool, block_depth+1UL );
     555             : 
     556           9 :   fd_rdisp_blockinfo_t * bpool_temp_join = block_pool_join( _bpool );
     557      196683 :   for( ulong i=0UL; i<block_depth+1UL; i++ ) zombie_dlist_new( bpool_temp_join[ i ].zombie_list );
     558           9 :   block_pool_leave( bpool_temp_join );
     559             : 
     560           9 :   disp->free_lanes = 0xF;
     561          45 :   for( ulong i=0UL; i<4UL; i++ ) {
     562          36 :     pending_prq_new( _pending, depth );
     563          36 :     _pending += pending_prq_footprint( depth );
     564             : 
     565          36 :     disp->lanes[i].linear_block_number = 0UL;
     566          36 :     disp->lanes[i].inserted_cnt        = 0U;
     567          36 :     disp->lanes[i].dispatched_cnt      = 0U;
     568          36 :     disp->lanes[i].completed_cnt       = 0U;
     569          36 :   }
     570             : 
     571           9 :   acct_map_new( _acct_map,  acct_chain_cnt, fd_ulong_hash( seed+1UL ) );
     572           9 :   acct_map_new( _freea_map, acct_chain_cnt, fd_ulong_hash( seed+2UL ) );
     573             : 
     574           9 :   free_dlist_t * temp_join = free_dlist_join( free_dlist_new( disp->free_acct_dlist ) );
     575    12659529 :   for( ulong i=1UL; i<acct_depth+1UL; i++ ) {
     576    12659520 :     apool[ i ].next = apool[ i ].prev = 0U;
     577    12659520 :     free_dlist_idx_push_tail( disp->free_acct_dlist, i, apool );
     578    12659520 :   }
     579           9 :   free_dlist_leave( temp_join );
     580             : 
     581           9 :   return disp;
     582           9 : }
     583             : 
     584             : fd_rdisp_t *
     585           9 : fd_rdisp_join( void * mem ) {
     586           9 :   fd_rdisp_t * disp = (fd_rdisp_t *)mem;
     587             : 
     588           9 :   ulong depth          = disp->depth;
     589           9 :   ulong block_depth    = disp->block_depth;
     590           9 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     591           9 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     592           9 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     593             : 
     594           9 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     595           9 :   /*                 */ FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     596           9 :   void  * _pool       = FD_SCRATCH_ALLOC_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) );
     597           9 :   void  * _unstaged   = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) );
     598           9 :   void  * _bmap       = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) );
     599           9 :   void  * _bpool      = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) );
     600           9 :   uchar * _pending    = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) );
     601           9 :   void  * _acct_map   = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     602           9 :   void  * _freea_map  = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     603           9 :   acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            );
     604           9 :   FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
     605             : 
     606           9 :   disp->pool       = pool_join( _pool );
     607           9 :   disp->unstaged   = (fd_rdisp_unstaged_t *)_unstaged;
     608           9 :   disp->blockmap   = block_map_join( _bmap );
     609           9 :   disp->block_pool = block_pool_join( _bpool );
     610             : 
     611      196683 :   for( ulong i=0UL; i<block_depth+1UL; i++ ) zombie_dlist_join( disp->block_pool[ i ].zombie_list );
     612             : 
     613          45 :   for( ulong i=0UL; i<4UL; i++ ) {
     614          36 :     disp->lanes[i].pending = pending_prq_join( _pending );
     615          36 :     _pending += pending_prq_footprint( depth );
     616          36 :   }
     617             : 
     618           9 :   disp->acct_map      = acct_map_join( _acct_map );
     619           9 :   disp->free_acct_map = acct_map_join( _freea_map );
     620           9 :   disp->acct_pool     = apool;
     621           9 :   free_dlist_join( disp->free_acct_dlist );
     622             : 
     623           9 :   return disp;
     624           9 : }
     625             : 
     626             : static inline void
     627             : free_lane( fd_rdisp_t * disp,
     628       12027 :            ulong        staging_lane ) {
     629       12027 :   disp->free_lanes |= 1<<staging_lane;
     630       12027 :   per_lane_info_t * l = disp->lanes+staging_lane;
     631       12027 :   FD_TEST( pending_prq_cnt( l->pending )==0UL );
     632       12027 :   l->linear_block_number = 0UL;
     633       12027 :   l->inserted_cnt   = 0UL;
     634       12027 :   l->dispatched_cnt = 0UL;
     635       12027 :   l->completed_cnt  = 0UL;
     636       12027 :   block_slist_delete( block_slist_leave( l->block_ll ) );
     637       12027 : }
     638             : 
     639             : static inline void
     640             : alloc_lane( fd_rdisp_t * disp,
     641       12030 :             ulong        staging_lane ) {
     642       12030 :   disp->free_lanes &= ~(1<<staging_lane);
     643       12030 :   block_slist_join( block_slist_new( disp->lanes[staging_lane].block_ll ) );
     644       12030 : }
     645             : 
     646             : ulong
     647             : fd_rdisp_suggest_staging_lane( fd_rdisp_t const *   disp,
     648             :                                FD_RDISP_BLOCK_TAG_T parent_block,
     649           0 :                                int                  duplicate ) {
     650             : 
     651             :   /* 1. If it's a duplicate, suggest FD_RDISP_UNSTAGED */
     652           0 :   if( FD_UNLIKELY( duplicate ) ) return FD_RDISP_UNSTAGED;
     653             : 
     654             :   /* 2. If parent is the last block in any existing staging lane, suggest
     655             :         that lane */
     656           0 :   fd_rdisp_blockinfo_t const * block_pool = disp->block_pool;
     657           0 :   fd_rdisp_blockinfo_t const * block = block_map_ele_query_const( disp->blockmap, &parent_block, NULL, block_pool );
     658           0 :   if( FD_LIKELY( block && block->insert_ready && block->staged ) ) return block->staging_lane;
     659             : 
     660             :   /* 3. If there is at least one free lane, suggest a free lane */
     661           0 :   if( FD_LIKELY( disp->free_lanes!=0 ) ) return (ulong)fd_uint_find_lsb( (uint)disp->free_lanes );
     662             : 
     663             :   /* 4. Else, suggest FD_RDISP_UNSTAGED */
     664           0 :   return FD_RDISP_UNSTAGED;
     665           0 : }
     666             : 
     667             : int
     668             : fd_rdisp_add_block( fd_rdisp_t          * disp,
     669             :                    FD_RDISP_BLOCK_TAG_T   new_block,
     670      405252 :                    ulong                  staging_lane ) {
     671      405252 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     672             : 
     673      405252 :   if( FD_UNLIKELY( !block_pool_free( block_pool )                                                             ) ) return -1;
     674      405252 :   if( FD_UNLIKELY(  ULONG_MAX!=block_map_idx_query_const( disp->blockmap, &new_block, ULONG_MAX, block_pool ) ) ) return -1;
     675      405246 :   fd_rdisp_blockinfo_t * block = block_pool_ele_acquire( block_pool );
     676      405246 :   block->block = new_block;
     677      405246 :   block_map_ele_insert( disp->blockmap, block, block_pool );
     678             : 
     679      405246 :   block->insert_ready = 1;
     680      405246 :   block->staged       = staging_lane!=FD_RDISP_UNSTAGED;
     681      405246 :   block->staging_lane = (uint)(staging_lane & 0x3UL);
     682      405246 :   block->last_insert_was_serializing = 0U;
     683             : 
     684      405246 :   block->inserted_cnt     = 0U;
     685      405246 :   block->dispatched_cnt   = 0U;
     686      405246 :   block->completed_cnt    = 0U;
     687      405246 :   block->last_serializing = 0U;
     688             : 
     689      405246 :   if( FD_UNLIKELY( staging_lane==FD_RDISP_UNSTAGED ) ) {
     690           3 :     block->schedule_ready = 1;
     691           3 :     block->linear_block_number = (uint)disp->unstaged_lblk_num++;
     692           3 :     unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
     693      405243 :   } else {
     694      405243 :     block->linear_block_number = (uint)disp->lanes[staging_lane].linear_block_number++;
     695      405243 :     block->schedule_ready      = (uint)(1 & (disp->free_lanes >> staging_lane));
     696             : 
     697      405243 :     if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
     698             : 
     699      405243 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     700      405243 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) )  block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
     701      405243 :     block_slist_ele_push_tail( sl, block, block_pool );
     702      405243 :   }
     703      405246 :   return 0;
     704      405252 : }
     705             : 
     706             : 
     707             : 
     708             : int
     709             : fd_rdisp_remove_block( fd_rdisp_t          * disp,
     710      405243 :                        FD_RDISP_BLOCK_TAG_T  block_tag ) {
     711      405243 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     712             : 
     713      405243 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     714      405243 :   if( FD_UNLIKELY( block==NULL ) ) return -1;
     715             : 
     716      405240 :   FD_TEST( block->schedule_ready );
     717      405240 :   FD_TEST( block->completed_cnt==block->inserted_cnt );
     718      405240 :   FD_TEST( zombie_dlist_is_empty( block->zombie_list, disp->pool ) );
     719             : 
     720      405240 :   if( FD_LIKELY( block->staged ) ) {
     721      405240 :     ulong staging_lane = (ulong)block->staging_lane;
     722      405240 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     723             : 
     724      405240 :     FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
     725      405240 :     block_slist_idx_pop_head( sl, block_pool );
     726      405240 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     727       12024 :     else                                                       free_lane( disp, staging_lane );
     728      405240 :   } else {
     729           0 :     unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     730           0 :   }
     731      405240 :   block_pool_idx_release( block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, block_pool ) );
     732             : 
     733      405240 :   return 0;
     734      405240 : }
     735             : 
     736             : 
     737             : int
     738             : fd_rdisp_abandon_block( fd_rdisp_t          * disp,
     739           6 :                         FD_RDISP_BLOCK_TAG_T  block_tag ) {
     740           6 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     741             : 
     742           6 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, disp->block_pool );
     743           6 :   if( FD_UNLIKELY( block==NULL ) ) return -1;
     744             : 
     745           3 :   FD_TEST( block->schedule_ready );
     746           3 :   FD_TEST( block->dispatched_cnt==block->completed_cnt ); /* TODO: remove this when it can call complete properly */
     747          12 :   while( block->completed_cnt<block->inserted_cnt ) {
     748             :     /* because there is nothing DISPATCHED, there has to be something
     749             :        READY */
     750           9 :     ulong txn = fd_rdisp_get_next_ready( disp, block_tag );
     751           9 :     FD_TEST( txn );
     752           9 :     fd_rdisp_complete_txn( disp, txn, 1 );
     753           9 :   }
     754           3 :   while( !zombie_dlist_is_empty( block->zombie_list, disp->pool ) ) {
     755           0 :     fd_rdisp_complete_txn( disp, zombie_dlist_idx_pop_head( block->zombie_list, disp->pool ), 1 );
     756           0 :   }
     757             : 
     758           3 :   if( FD_LIKELY( block->staged ) ) {
     759           3 :     ulong staging_lane = (ulong)block->staging_lane;
     760           3 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     761             : 
     762           3 :     FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
     763           3 :     block_slist_idx_pop_head( sl, block_pool );
     764           3 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     765           0 :     else                                                       free_lane( disp, staging_lane );
     766           3 :   } else {
     767           0 :     unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     768           0 :   }
     769           3 :   block_pool_idx_release( disp->block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, disp->block_pool ) );
     770             : 
     771           3 :   return 0;
     772           3 : }
     773             : 
     774             : static void
     775             : add_edges( fd_rdisp_t           * disp,
     776             :            fd_rdisp_txn_t       * ele,
     777             :            fd_acct_addr_t const * addr,
     778             :            ulong                  addr_cnt,
     779             :            uint                   staging_lane,
     780             :            int                    writable,
     781             :            int                    update_score );
     782             : /* updates in_degree, edge_cnt_etc */
     783             : 
     784             : int
     785             : fd_rdisp_promote_block( fd_rdisp_t *          disp,
     786             :                         FD_RDISP_BLOCK_TAG_T  block_tag,
     787           6 :                         ulong                 staging_lane ) {
     788           6 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     789           6 :   per_lane_info_t * lane = disp->lanes + staging_lane;
     790           6 :   block_slist_t * sl = lane->block_ll;
     791             : 
     792           6 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     793           6 :   if( FD_UNLIKELY( block==NULL   ) ) return -1;
     794           6 :   if( FD_UNLIKELY( block->staged ) ) return -1;
     795             : 
     796           6 :   block->staged = 1;
     797           6 :   block->staging_lane = (uint)(staging_lane & 0x3);
     798           6 :   block->insert_ready = 1;
     799           6 :   block->schedule_ready = (uint)(1 & (disp->free_lanes >> staging_lane));
     800             : 
     801           6 :   if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
     802             : 
     803           6 :   if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) )  block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
     804           6 :   block_slist_ele_push_tail( sl, block, block_pool );
     805           6 :   uint linear_block_number = (uint)lane->linear_block_number++;
     806           6 :   block->linear_block_number = linear_block_number;
     807             : 
     808           6 :   unstaged_txn_ll_iter_t next;
     809           6 :   for( unstaged_txn_ll_iter_t iter = unstaged_txn_ll_iter_init( block->ll, disp->pool );
     810          24 :       !unstaged_txn_ll_iter_done( iter, block->ll, disp->pool );
     811          18 :       iter = next ) {
     812          18 :       next = unstaged_txn_ll_iter_next( iter, block->ll, disp->pool );
     813             : 
     814          18 :     fd_rdisp_txn_t      * ele = unstaged_txn_ll_iter_ele( iter, block->ll, disp->pool );
     815          18 :     fd_rdisp_unstaged_t * uns = disp->unstaged + unstaged_txn_ll_iter_idx( iter, block->ll, disp->pool );
     816          18 :     FD_TEST( ele->in_degree==IN_DEGREE_UNSTAGED );
     817             : 
     818          18 :     ele->in_degree    = 0U;
     819          18 :     ele->edge_cnt_etc = 0U;
     820             : 
     821          18 :     add_edges( disp, ele, uns->keys,                   uns->writable_cnt, (uint)staging_lane, 1, 0 );
     822          18 :     add_edges( disp, ele, uns->keys+uns->writable_cnt, uns->readonly_cnt, (uint)staging_lane, 0, 0 );
     823             : 
     824          18 :     ele->edge_cnt_etc |= (uint)staging_lane<<14;
     825          18 :     ele->edge_cnt_etc |= linear_block_number<<16;
     826             : 
     827          18 :     if( FD_UNLIKELY( ele->in_degree==0U ) ) {
     828           3 :       pending_prq_ele_t temp[1] = {{ .score = ele->score, .linear_block_number = linear_block_number, .txn_idx = (uint)(ele-disp->pool)}};
     829           3 :       pending_prq_insert( lane->pending, temp );
     830           3 :     }
     831          18 :   }
     832           6 :   unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     833             : 
     834           6 :   lane->inserted_cnt   += block->inserted_cnt;
     835           6 :   lane->dispatched_cnt += block->dispatched_cnt;
     836           6 :   lane->completed_cnt  += block->completed_cnt;
     837             : 
     838           6 :   return 0;
     839           6 : }
     840             : 
     841             : int
     842             : fd_rdisp_demote_block( fd_rdisp_t *          disp,
     843           3 :                        FD_RDISP_BLOCK_TAG_T  block_tag ) {
     844           3 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     845             : 
     846           3 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     847           3 :   if( FD_UNLIKELY(  block==NULL           ) ) return -1;
     848           3 :   if( FD_UNLIKELY( !block->staged         ) ) return -1;
     849           3 :   if( FD_UNLIKELY( !block->schedule_ready ) ) return -1;
     850           3 :   if( FD_UNLIKELY(  block->completed_cnt!=block->inserted_cnt ) ) FD_LOG_ERR(( "demote_block called with non-empty block" ));
     851           3 :   ulong staging_lane = block->staging_lane;
     852           3 :   block->staged = 0;
     853             : 
     854           3 :   per_lane_info_t * lane = disp->lanes + staging_lane;
     855           3 :   block_slist_t * sl = lane->block_ll;
     856             : 
     857           3 :   lane->inserted_cnt   -= block->inserted_cnt;
     858           3 :   lane->dispatched_cnt -= block->dispatched_cnt;
     859           3 :   lane->completed_cnt  -= block->completed_cnt;
     860             : 
     861           3 :   block->linear_block_number = (uint)disp->unstaged_lblk_num++;
     862             : 
     863             :   /* staged and schedule_ready means it must be the head of the staging lane */
     864           3 :   FD_TEST( block_slist_ele_peek_head( sl, block_pool )==block );
     865           3 :   block_slist_idx_pop_head( sl, block_pool );
     866             : 
     867           3 :   unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
     868             : 
     869           3 :   if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     870           3 :   else                                                       free_lane( disp, staging_lane );
     871           3 :   return 0;
     872           3 : }
     873             : 
     874             : int
     875             : fd_rdisp_rekey_block( fd_rdisp_t *           disp,
     876             :                       FD_RDISP_BLOCK_TAG_T   new_tag,
     877           0 :                       FD_RDISP_BLOCK_TAG_T   old_tag ) {
     878           0 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     879             : 
     880           0 :   if( FD_UNLIKELY(        NULL!= block_map_ele_query_const( disp->blockmap, &new_tag, NULL, block_pool ) ) ) return -1;
     881           0 :   fd_rdisp_blockinfo_t * block = block_map_ele_query      ( disp->blockmap, &old_tag, NULL, block_pool );
     882           0 :   if( FD_UNLIKELY(        NULL== block ) )                                                                   return -1;
     883             : 
     884           0 :   block->block = new_tag;
     885           0 :   block_map_ele_insert( disp->blockmap, block, block_pool );
     886           0 :   return 0;
     887           0 : }
     888             : 
     889             : 
     890             : /* "Registers" a reference to the account in info at transaction
     891             :    global_insert_cnt.  Returns the value of the EMA, which is an
     892             :    estimate of the probability that the next transaction also references
     893             :    the account.  This value does not matter for correctness, which is
     894             :    why floating point arithmetic is okay. */
     895             : static inline float
     896             : update_ema( acct_info_t * info,
     897     9186144 :             ulong         global_insert_cnt ) {
     898             : #if FD_RDISP_DISABLE_EMA
     899             :   (void)info;
     900             :   (void)global_insert_cnt;
     901             :   return 0.0f;
     902             : #else
     903    18372288 : #define ALPHA 0.005f
     904             :   /* The normal EMA update equation is
     905             :                 e_i = (alpha) * x_i + (1-alpha)*e_{i-1},
     906             :      where alpha is a constant in (0,1).  Let L be the last reference of
     907             :      the account, and G be the current global_insert_cnt value.  We know
     908             :      that e_L = ema_refs, and that for i in [L+1, G), x_i = 0.
     909             :      That means
     910             :                e_{G-1} =         (1-alpha)^(G-L-1) * ema_refs
     911             :                e_G     = alpha + (1-alpha)^(G-L)   * ema_refs
     912             :    */
     913             :   /* last_ref only captures the low 24 bits, so we guess its the highest
     914             :      value for them that would still make it less than
     915             :      global_insert_cnt.  Turns out, we can calculate that with just an
     916             :      AND. */
     917     9186144 :   ulong last_ref = (ulong)info->last_ref;
     918     9186144 :   ulong delta = (global_insert_cnt - last_ref) & 0xFFFFFFUL;
     919     9186144 :   float ema_refs = ALPHA + powf( 1.0f-ALPHA, (float)delta ) * info->ema_refs;
     920             : 
     921     9186144 :   info->ema_refs = ema_refs;
     922     9186144 :   info->last_ref = (uint)(global_insert_cnt & 0xFFFFFFUL);
     923     9186144 : #undef ALPHA
     924             : 
     925     9186144 :   return ema_refs;
     926     9186144 : #endif
     927     9186144 : }
     928             : 
     929             : static void
     930             : add_edges( fd_rdisp_t           * disp,
     931             :            fd_rdisp_txn_t       * ele,
     932             :            fd_acct_addr_t const * addrs,
     933             :            ulong                  addr_cnt,
     934             :            uint                   lane,
     935             :            int                    writable,
     936     6998862 :            int                    update_score ) {
     937             : 
     938     6998862 :   ulong acct_idx = (ele->edge_cnt_etc & 0x7FU) +     ((ele->edge_cnt_etc>>7) & 0x7FU);
     939     6998862 :   ulong edge_idx = (ele->edge_cnt_etc & 0x7FU) + 3UL*((ele->edge_cnt_etc>>7) & 0x7FU);
     940             : 
     941    16184931 :   for( ulong i=0UL; i<addr_cnt; i++ ) {
     942     9186069 :     fd_acct_addr_t const * addr = addrs+i;
     943     9186069 :     acct_info_t * ai = NULL;
     944             : 
     945             :     /* Step 1: lookup the pubkey */
     946     9186069 :     ulong idx = acct_map_idx_query( disp->acct_map, addr, ULONG_MAX, disp->acct_pool );
     947     9186069 :     if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
     948     1873431 :       idx = acct_map_idx_query( disp->free_acct_map, addr, ULONG_MAX, disp->acct_pool );
     949     1873431 :       if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
     950             :         /* The acct pool is sized so that the list cannot be empty at this
     951             :            point.  However, the element at the head might be the free
     952             :            map with a different pubkey. */
     953      197907 :         idx = free_dlist_idx_peek_head( disp->free_acct_dlist, disp->acct_pool );
     954      197907 :         ai = disp->acct_pool+idx;
     955             : 
     956             :         /* CACHED -> FREE transition */
     957      197907 :         if( FD_LIKELY( ai->next!=0U ) ) {
     958      177357 :           acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
     959      177357 :         }
     960             : 
     961             :         /* FREE -> ACTIVE transition */
     962      197907 :         ai->key      = *addr;
     963      197907 :         ai->flags    = 0U;
     964      197907 :         ai->last_ref = 0U;
     965      197907 :         ai->ema_refs = 0.0f;
     966     1675524 :       } else {
     967             :         /* CACHED -> ACTIVE transition */
     968     1675524 :         ai = disp->acct_pool+idx;
     969     1675524 :         ai->flags    = 0U; /* FIXME: unnecessary */
     970     1675524 :         acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
     971     1675524 :       }
     972             :       /* In either case, at this point, the element is not in any map
     973             :          but is in free_acct_dlist.  It has the right key. last_ref, and
     974             :          ema_refs are valid. flags is 0. */
     975     1873431 :       free_dlist_idx_remove( disp->free_acct_dlist, idx, disp->acct_pool );
     976     1873431 :       memset( ai->last_reference, '\0', sizeof(ai->last_reference) );
     977     1873431 :       acct_map_idx_insert( disp->acct_map, idx, disp->acct_pool );
     978     1873431 :     }
     979     9186069 :     ai = disp->acct_pool+idx;
     980             :     /* At this point, in all cases, the acct_info is now in the ACTIVE
     981             :        state.  It's in acct_map, not in free_acct_map, and not in
     982             :        free_acct_dlist. */
     983             : 
     984             :     /* Assume that transactions are drawn randomly from some large
     985             :        distribution of potential transactions.  We want to estimate the
     986             :        expected value of the probability that this transaction conflicts
     987             :        with the next transaction that is sampled.  Two transactions
     988             :        conflict if they conflict on any account, and in general, they
     989             :        conflict if they both reference the same account, unless both
     990             :        this transaction and the next one only read it.  We don't have
     991             :        read/write info, so the best guess we have is that the next
     992             :        transaction does the same thing to the account that this one
     993             :        does.  That means we only really care about the accounts that
     994             :        this transaction writes to.  Label those a_1, a_2, ..., a_i, and
     995             :        suppose the probability that the next transaction references a_i
     996             :        is p_i (determined using the EMA).  Now then, assuming accounts
     997             :        are independent (which is false, but whatever), then the
     998             :        probability that the next transaction does not conflict with this
     999             :        account is:
    1000             :                      (1-p_1) * (1-p_2) * (1 - p_i).
    1001             : 
    1002             :        Since for the treap, a lower value means we'll schedule it
    1003             :        earlier, we'll use the probability of non-conflict as the
    1004             :        fractional part of the score. */
    1005     9186069 :     float score_change = 1.0f - update_ema( ai, disp->global_insert_cnt );
    1006     9186069 :     ele->score *= fd_float_if( writable & update_score, score_change, 1.0f );
    1007             : 
    1008             :     /* Step 2: add edge. There are 4 cases depending on whether this is
    1009             :        a writer or not and whether the previous reference was a writer
    1010             :        or not. */
    1011     9186069 :     int      _ignore;
    1012             : 
    1013     9186069 :     edge_t   ref_to_pa = ai->last_reference[ lane ];
    1014     9186069 :     edge_t   ref_to_me = (uint)((ulong)((ele - disp->pool)<<8) | acct_idx);
    1015     9186069 :     edge_t * pa        = FOLLOW_EDGE( disp->pool, ai->last_reference[ lane ], _ignore );
    1016     9186069 :     edge_t * me        = ele->edges + edge_idx;
    1017             : 
    1018             :     /* In the case that this is the first txn in the DAG, pa will point
    1019             :        to edges[0] of the sentinel element, pool[0].  We don't care
    1020             :        about what is stored there, so just set it up as a dummy element
    1021             :        to make the rest of the code work properly in this case too.  If
    1022             :        this is a read, we want me->sibli to ref_to_me in case 4; if this
    1023             :        is a write, we want to set me->sibli to 0 in case 2, but we need
    1024             :        to make sure pb==pa. */
    1025     9186069 :     disp->pool->edges[0] = (1U<<31) | (uint)idx;
    1026     9186069 :     disp->pool->edges[1] = fd_uint_if( writable, 0U, ref_to_me );
    1027     9186069 :     disp->pool->edges[2] = fd_uint_if( writable, 0U, ref_to_me );
    1028             : 
    1029     9186069 :     int flags = ai->flags;
    1030             : 
    1031     9186069 :     if( writable ) { /* also should be known at compile time */
    1032     3588582 :       if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
    1033             :         /* Case 1: w-w. The parent is the special last pointer.  Point
    1034             :            the parent to me, and set me to the last pointer. */
    1035     1171821 :         *me = *pa;
    1036     1171821 :         *pa = ref_to_me;
    1037     2416761 :       } else {
    1038             :         /* Case 2: r-w. This is the tricky case because there could be
    1039             :            multiple readers.  We need to set all the last readers' child
    1040             :            pointers to me. */
    1041     2416761 :         *me = *pa;
    1042     2416761 :         *pa = ref_to_me;
    1043     2416761 :         edge_t * pb = FOLLOW_EDGE( disp->pool, pa[1], _ignore );
    1044             :         /* Intentionally skip the first in_degree increment, because it
    1045             :            will be done later */
    1046     2810415 :         while( pb!=pa ) {
    1047      393654 :           *pb = ref_to_me;
    1048      393654 :           ele->in_degree++;
    1049      393654 :           pb = FOLLOW_EDGE( disp->pool, pb[1], _ignore );
    1050      393654 :         }
    1051     2416761 :         flags |= ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) | ACCT_INFO_FLAG_ANY_WRITERS( lane );
    1052     2416761 :       }
    1053     5597487 :     } else {
    1054     5597487 :       if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
    1055             :         /* Case 3: w-r. Similar to case 1, but need to initialize my
    1056             :            next and prev sibling pointers too. */
    1057      423078 :         *me = *pa;
    1058      423078 :         *pa = ref_to_me;
    1059      423078 :         me[1] = ref_to_me; /* next */
    1060      423078 :         me[2] = ref_to_me; /* prev */
    1061      423078 :         flags &= ~(int)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ); /* clear bit */
    1062     5174409 :       } else {
    1063             :         /* Case 4: r-r. Add myself as a sibling instead of a child */
    1064     5174409 :         *me = *pa;
    1065     5174409 :         FOLLOW_EDGE( disp->pool, pa[1], _ignore )[2] = ref_to_me;  /* prev->next->prev = me   */
    1066     5174409 :         me[1] = pa[1];                                             /* me->next   = prev->next */
    1067     5174409 :         me[2] = ref_to_pa;                                         /* me->prev   = prev       */
    1068     5174409 :         pa[1] = ref_to_me;                                         /* prev->next = me         */
    1069     5174409 :       }
    1070     5597487 :     }
    1071             : 
    1072             :     /* Step 3: Update the final values */
    1073             :     /* In general, we want to increment the in_degree unless this
    1074             :        transaction is the first to reference this account.  The
    1075             :        exception is that if this account has only readers, including
    1076             :        this transaction, we don't want to increment the in_degree
    1077             :        either.  At this point, we can tell if that is the case based on
    1078             :        ANY_WRITERS.  */
    1079     9186069 :     ele->in_degree += (uint)((ai->last_reference[ lane ]!=0U) & !!(flags & ACCT_INFO_FLAG_ANY_WRITERS( lane )));
    1080     9186069 :     ai->last_reference[ lane ] = ref_to_me;
    1081     9186069 :     ai->flags                  = (uchar)flags;
    1082     9186069 :     edge_idx += fd_uint_if( writable, 1U, 3U );
    1083     9186069 :     acct_idx++;
    1084     9186069 :   }
    1085     6998862 :   ele->edge_cnt_etc += (uint)addr_cnt<<fd_int_if( writable, 0, 7 ); /* Can't overflow by construction */
    1086     6998862 : }
    1087             : 
    1088             : 
    1089             : /* should be called with all writable accounts first */
    1090             : static void
    1091             : add_unstaged_edges( fd_rdisp_t * disp,
    1092             :                     fd_rdisp_txn_t       * ele,
    1093             :                     fd_rdisp_unstaged_t  * unstaged,
    1094             :                     fd_acct_addr_t const * addr,
    1095             :                     ulong                  addr_cnt,
    1096             :                     int                    writable,
    1097         216 :                     int                    update_score ) {
    1098         216 :   ulong base_idx = unstaged->writable_cnt+unstaged->readonly_cnt;
    1099         216 :   FD_TEST( !writable || unstaged->readonly_cnt==0U );
    1100         366 :   for( ulong i=0UL; i<addr_cnt; i++ ) {
    1101         150 :     unstaged->keys[ base_idx+i ] = addr[i];
    1102         150 :     if( FD_LIKELY( update_score ) ) {
    1103         150 :       ulong idx = acct_map_idx_query( disp->acct_map, addr+i, ULONG_MAX, disp->acct_pool );
    1104         150 :       if( FD_UNLIKELY( idx==ULONG_MAX ) ) idx = acct_map_idx_query( disp->free_acct_map, addr+i, ULONG_MAX, disp->acct_pool );
    1105             :       /* since these are unstaged, we don't bother moving accounts
    1106             :          around */
    1107         150 :       float score_change = 1.0f;
    1108         150 :       if( FD_LIKELY( idx!=ULONG_MAX ) ) score_change = 1.0f - update_ema( disp->acct_pool+idx, disp->global_insert_cnt );
    1109         150 :       ele->score *= fd_float_if( writable & update_score, score_change, 1.0f );
    1110         150 :     }
    1111         150 :   }
    1112         216 :   *(fd_ptr_if( writable, &(unstaged->writable_cnt), &(unstaged->readonly_cnt) ) ) += (uint)addr_cnt;
    1113         216 : }
    1114             : 
    1115             : ulong
    1116             : fd_rdisp_add_txn( fd_rdisp_t          *  disp,
    1117             :                   FD_RDISP_BLOCK_TAG_T   insert_block,
    1118             :                   fd_txn_t const       * txn,
    1119             :                   uchar const          * payload,
    1120             :                   fd_acct_addr_t const * alts,
    1121     1166511 :                   int                    serializing ) {
    1122             : 
    1123     1166511 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &insert_block, NULL, disp->block_pool );
    1124     1166511 :   if( FD_UNLIKELY( !block || !block->insert_ready ) ) return 0UL;
    1125     1166508 :   if( FD_UNLIKELY( !pool_free( disp->pool       ) ) ) return 0UL;
    1126             : 
    1127     1166508 :   ulong idx = pool_idx_acquire( disp->pool );
    1128     1166508 :   fd_rdisp_txn_t * rtxn = disp->pool + idx;
    1129     1166508 :   if( FD_UNLIKELY( rtxn->in_degree!=IN_DEGREE_FREE ) ) FD_LOG_CRIT(( "pool[%lu].in_degree==%u but free", idx, rtxn->in_degree ));
    1130             : 
    1131     1166508 :   fd_acct_addr_t const * imm_addrs = fd_txn_get_acct_addrs( txn, payload );
    1132             : 
    1133     1166508 :   if( FD_UNLIKELY( !block->staged ) ) {
    1134          36 :     rtxn->in_degree = IN_DEGREE_UNSTAGED;
    1135          36 :     rtxn->score     = 0.999f;
    1136             : 
    1137          36 :     fd_rdisp_unstaged_t * unstaged = disp->unstaged + idx;
    1138          36 :     unstaged->block = insert_block;
    1139          36 :     unstaged->writable_cnt = 0U;
    1140          36 :     unstaged->readonly_cnt = 0U;
    1141             : 
    1142          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs,
    1143          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER        ), 1, 1 );
    1144          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
    1145          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), 1, 1 );
    1146          36 :     if( FD_LIKELY( alts ) )
    1147          36 :       add_unstaged_edges( disp, rtxn, unstaged, alts,
    1148          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),           1, 1 );
    1149          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
    1150          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER        ), 0, 1 );
    1151          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
    1152          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), 0, 1 );
    1153          36 :     if( FD_LIKELY( alts ) )
    1154          36 :       add_unstaged_edges( disp, rtxn, unstaged, alts   +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
    1155          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ),           0, 1 );
    1156             : 
    1157          36 :     unstaged_txn_ll_ele_push_tail( block->ll, rtxn, disp->pool );
    1158     1166472 :   } else {
    1159     1166472 :     uint lane = block->staging_lane;
    1160             : 
    1161     1166472 :     rtxn->in_degree    = 0U;
    1162     1166472 :     rtxn->score        = 0.999f;
    1163     1166472 :     rtxn->edge_cnt_etc = (block->linear_block_number<<16) | (lane<<14);
    1164             : 
    1165     1166472 :     add_edges( disp, rtxn, imm_addrs,
    1166     1166472 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER        ), lane, 1, 1 );
    1167     1166472 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
    1168     1166472 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), lane, 1, 1 );
    1169     1166472 :     if( FD_LIKELY( alts ) )
    1170     1166469 :       add_edges( disp, rtxn, alts,
    1171     1166469 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),           lane, 1, 1 );
    1172     1166472 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
    1173     1166472 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER        ), lane, 0, 1 );
    1174     1166472 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
    1175     1166472 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), lane, 0, 1 );
    1176     1166472 :     if( FD_LIKELY( alts ) )
    1177     1166469 :       add_edges( disp, rtxn, alts   +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
    1178     1166469 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ),           lane, 0, 1 );
    1179     1166472 :   }
    1180             : 
    1181     1166508 :   if( FD_UNLIKELY( serializing | block->last_insert_was_serializing ) ) {
    1182           6 :     block->last_serializing = block->inserted_cnt;
    1183           6 :   }
    1184     1166508 :   block->last_insert_was_serializing = (uint)!!serializing;
    1185     1166508 :   rtxn->score += (float)block->last_serializing;
    1186             : 
    1187     1166508 :   block->inserted_cnt++;
    1188     1166508 :   disp->global_insert_cnt++;
    1189             : 
    1190     1166508 :   if( FD_LIKELY( (block->staged) & (rtxn->in_degree==0U) ) ) {
    1191      575862 :     pending_prq_ele_t temp[1] = {{ .score = rtxn->score, .linear_block_number = block->linear_block_number, .txn_idx = (uint)idx }};
    1192      575862 :     pending_prq_insert( disp->lanes[ block->staging_lane ].pending, temp );
    1193      575862 :   }
    1194             : 
    1195     1166508 :   return idx;
    1196     1166508 : }
    1197             : 
    1198             : ulong
    1199             : fd_rdisp_get_next_ready( fd_rdisp_t           * disp,
    1200     2023440 :                          FD_RDISP_BLOCK_TAG_T   schedule_block ) {
    1201     2023440 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &schedule_block, NULL, disp->block_pool );
    1202     2023440 :   if( FD_UNLIKELY( !block || !block->schedule_ready ) ) return 0UL;
    1203             : 
    1204     2023425 :   ulong idx;
    1205     2023425 :   if( FD_LIKELY( block->staged ) ) {
    1206     2023398 :     ulong staging_lane = block->staging_lane;
    1207     2023398 :     per_lane_info_t * l = disp->lanes + staging_lane;
    1208             : 
    1209     2023398 :     if( FD_UNLIKELY( !pending_prq_cnt( l->pending )                                ) ) return 0UL;
    1210     1166496 :     if( FD_UNLIKELY( l->pending->linear_block_number != block->linear_block_number ) ) return 0UL;
    1211             :     /* e.g. when completed_cnt==0, we can accept any score below 1.0 */
    1212     1166490 :     if( FD_UNLIKELY( l->pending->score>=(float)(block->completed_cnt+1U)           ) ) return 0UL;
    1213     1166490 :     idx = l->pending->txn_idx;
    1214     1166490 :     pending_prq_remove_min( l->pending );
    1215     1166490 :     disp->pool[ idx ].in_degree = IN_DEGREE_DISPATCHED;
    1216     1166490 :   } else {
    1217          27 :     if( FD_UNLIKELY( block->dispatched_cnt!=block->completed_cnt       ) ) return 0UL;
    1218          24 :     if( FD_UNLIKELY( unstaged_txn_ll_is_empty( block->ll, disp->pool ) ) ) return 0UL;
    1219          18 :     idx = unstaged_txn_ll_idx_peek_head( block->ll, disp->pool );
    1220          18 :     disp->pool[ idx ].in_degree = IN_DEGREE_UNSTAGED_DISPATCHED;
    1221          18 :   }
    1222     1166508 :   block->dispatched_cnt++;
    1223             : 
    1224     1166508 :   return idx;
    1225     2023425 : }
    1226             : 
    1227             : void
    1228             : fd_rdisp_complete_txn( fd_rdisp_t * disp,
    1229             :                        ulong        txn_idx,
    1230     1166511 :                        int          reclaim ) {
    1231             : 
    1232     1166511 :   fd_rdisp_txn_t * rtxn = disp->pool + txn_idx;
    1233     1166511 :   fd_rdisp_blockinfo_t * block = NULL;
    1234             : 
    1235     1166511 :   if( FD_UNLIKELY( rtxn->in_degree==IN_DEGREE_UNSTAGED_DISPATCHED ) ) {
    1236             :     /* Unstaged */
    1237          18 :     block = block_map_ele_query( disp->blockmap, &disp->unstaged[ txn_idx ].block, NULL, disp->block_pool );
    1238          18 :     FD_TEST( rtxn==unstaged_txn_ll_ele_peek_head( block->ll, disp->pool ) );
    1239          18 :     unstaged_txn_ll_ele_pop_head( block->ll, disp->pool );
    1240          18 :     block->completed_cnt++;
    1241     1166493 :   } else if( FD_LIKELY( rtxn->in_degree==IN_DEGREE_DISPATCHED ) ) {
    1242             :     /* Staged */
    1243     1166490 :     ulong w_cnt = (rtxn->edge_cnt_etc    ) & 0x7FU;
    1244     1166490 :     ulong r_cnt = (rtxn->edge_cnt_etc>> 7) & 0x7FU;
    1245     1166490 :     ulong lane  = (rtxn->edge_cnt_etc>>14) & 0x3U;
    1246     1166490 :     uint  tail_linear_block_num = (uint)(disp->lanes[lane].linear_block_number);
    1247     1166490 :     ulong edge_idx = 0UL;
    1248    10352559 :     for( ulong i=0UL; i<w_cnt+r_cnt; i++ ) {
    1249     9186069 :       edge_t const * e = rtxn->edges+edge_idx;
    1250     9186069 :       edge_t const  e0 = *e;
    1251     9186069 :       edge_t ref_to_me = (uint)((txn_idx<<8) | i);
    1252             : 
    1253             :       /* To help with explanations, consider the following DAG:
    1254             : 
    1255             :            --> B --\   --> E --\
    1256             :           /         V /         V
    1257             :          A           D         (G)
    1258             :           \         ^ \         ^
    1259             :            --> C --/   --> F --/     */
    1260             : 
    1261     9186069 :       if( FD_UNLIKELY( EDGE_IS_LAST( e0 ) ) ) {
    1262     6682734 :         ulong acct_idx = e0 & 0x7FFFFFFFU;
    1263     6682734 :         acct_info_t * ai = disp->acct_pool + acct_idx;
    1264             :         /* If this is a writer, e.g. node G above, we know it's the last
    1265             :            one, so we can clear last_reference.
    1266             :            If this is a reader, e.g. node E and F above, if node G
    1267             :            didn't exist, we need to check if it's the last one
    1268             :            (me==me->next).  If so, we can clear last_reference.  If not,
    1269             :            we need to delete this node from the linked list */
    1270     6682734 :         if( edge_idx<w_cnt || e[1]==ref_to_me ) {
    1271     4103505 :           ai->last_reference[ lane ] = 0U;
    1272     4103505 :           ai->flags = (uchar)(ai->flags & (~(ACCT_INFO_FLAG_ANY_WRITERS( lane ) | ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ))));
    1273     4103505 :         } else {
    1274     2579229 :           int _ignore;
    1275     2579229 :           FOLLOW_EDGE( disp->pool, e[1], _ignore )[2] = e[2];  /* me->next->prev = me->prev */
    1276     2579229 :           FOLLOW_EDGE( disp->pool, e[2], _ignore )[1] = e[1];  /* me->prev->next = me->next */
    1277     2579229 :           ai->last_reference[ lane ]= fd_uint_if( ai->last_reference[ lane ]==ref_to_me, e[1], ai->last_reference[ lane ] );
    1278     2579229 :         }
    1279             : 
    1280             :         /* Potentially transition from ACTIVE -> CACHED */
    1281     6682734 :         if( FD_UNLIKELY( (ai->last_reference[ 0 ]==0U)&(ai->last_reference[ 1 ]==0U)&
    1282     6682734 :                          (ai->last_reference[ 2 ]==0U)&(ai->last_reference[ 3 ]==0U) ) ) {
    1283     1873431 :           ai->flags = 0;
    1284     1873431 :           acct_map_idx_remove_fast( disp->acct_map,        acct_idx, disp->acct_pool );
    1285     1873431 :           acct_map_idx_insert     ( disp->free_acct_map,   acct_idx, disp->acct_pool );
    1286     1873431 :           free_dlist_idx_push_tail( disp->free_acct_dlist, acct_idx, disp->acct_pool );
    1287     1873431 :         }
    1288     6682734 :       } else {
    1289     2503335 :         int child_is_writer;
    1290             : 
    1291     2503335 :         edge_t next_e = e0;
    1292     2503335 :         edge_t const * child_edge;
    1293     2713623 :         while( 1 ) {
    1294             :           /* This loop first traverses the me->child link, and then
    1295             :              traverses any sibling links.  For example, in the case that
    1296             :              we're completing node D above, the first child_txn is E,
    1297             :              and the second child_txn is F. */
    1298     2713623 :           /*            */ child_edge = FOLLOW_EDGE(     disp->pool, next_e, child_is_writer );
    1299     2713623 :           fd_rdisp_txn_t * child_txn  = FOLLOW_EDGE_TXN( disp->pool, next_e                  );
    1300             : 
    1301             :           /* Sanity test */
    1302     2713623 :           FD_TEST( child_txn->in_degree>0U                   );
    1303     2713623 :           FD_TEST( child_txn->in_degree<IN_DEGREE_DISPATCHED );
    1304             : 
    1305     2713623 :           if( FD_UNLIKELY( 0U==(--(child_txn->in_degree)) ) ) {
    1306             :             /* We need an operation something like
    1307             :                fd_frag_meta_ts_decomp. child_txn has the low 16 bits,
    1308             :                and tail_linear_block_num has the full 32 bits, except
    1309             :                for tail_linear_block_num refers to a block < block_depth
    1310             :                later.  Since block_depth<2^16, that means we can resolve
    1311             :                this unambiguously.  Basically, we copy the high 16 bits
    1312             :                frorm tail_linear_block_num unless that would make
    1313             :                linear_block_num larger than tail_linear_block_num, in
    1314             :                which case, we subtract 2^16. */
    1315      590625 :             uint low_16_bits = child_txn->edge_cnt_etc>>16;
    1316      590625 :             uint linear_block_num = ((tail_linear_block_num & ~0xFFFFU) | low_16_bits) - (uint)((low_16_bits>(tail_linear_block_num&0xFFFFU))<<16);
    1317      590625 :             pending_prq_ele_t temp[1] = {{ .score               = child_txn->score,
    1318      590625 :                                            .linear_block_number = linear_block_num,
    1319      590625 :                                            .txn_idx             = (uint)(child_txn-disp->pool) }};
    1320      590625 :             pending_prq_insert( disp->lanes[ lane ].pending, temp );
    1321      590625 :           }
    1322     2713623 :           if( child_is_writer || child_edge[1]==e0 ) break;
    1323      210288 :           next_e = child_edge[1];
    1324      210288 :         }
    1325             :         /* In the case that the completed transaction is a reader, say B
    1326             :            or C above, it seems like we should need to remove it from
    1327             :            the doubly linked list, but we actually don't.  The times
    1328             :            that we need to read the sibling pointers are:
    1329             :             1. Completing the writer before a reader (e.g. completing A)
    1330             :             2. Completing a reader that's the last in the DAG (e.g.
    1331             :                completing E/F if G didn't exist)
    1332             :             3. Adding another reader to the same set of readers (e.g. if
    1333             :                G were added as a reader instead of a writer)
    1334             :             4. Adding a writer after a set of readers (e.g. adding G).
    1335             : 
    1336             :            Supposing that the completed transaction is a reader, since
    1337             :            we checked EDGE_IS_LAST( e0 ), we know that there is at least
    1338             :            one writer that follows this reader, e.g. D above.
    1339             : 
    1340             :            And so none of these reason can apply to this group of readers:
    1341             :             1. Completing B or C implies that A has already completed,
    1342             :                so it can't complete again.
    1343             :             2. We know that we're not in that case because we checked
    1344             :                EDGE_IS_LAST( e0 ), and if we're not last now, we cannot
    1345             :                become last later, because the growth happens in the
    1346             :                other direction.
    1347             :             3. Similarly, because we checked EDGE_IS_LAST, any future
    1348             :                additions of readers won't be to this group of readers.
    1349             :             4. Similarly, we know that there already is a writer that
    1350             :                follows this group of readers, so a later writer would
    1351             :                not read this set of readers.
    1352             : 
    1353             :            So then, we don't need to deal with the sibling edges.  The
    1354             :            fact that we only need to do it in one case almost calls into
    1355             :            question whether we need to maintain the whole circular
    1356             :            system in the first place, and whether we could get away with
    1357             :            a reference count or something instead, but it's important in
    1358             :            one critical case: suppose we add a bunch of readers, then
    1359             :            some of them complete, and then we add a writer (case 4
    1360             :            above).  We need to be able to enumerate the nodes in the DAG
    1361             :            that have not yet completed, and we need to be able to remove
    1362             :            them from that set in O(1).  Those two requirements don't
    1363             :            leave us with many alternatives besides a doubly linked list. */
    1364             : 
    1365     2503335 :         if( FD_UNLIKELY( EDGE_IS_LAST( *child_edge ) ) ) {
    1366             :           /* For example, either:
    1367             :              1. completing D when if G didn't exist
    1368             :              2. completing E or F with G in the DAG
    1369             : 
    1370             :              After completing this transaction, there's either 0 writers
    1371             :              left (case 1) or 1 writer left (case 2) in the DAG. and if
    1372             :              there is one, it is the last reference.
    1373             : 
    1374             :              Either way, we want to set ANY_WRITERS to
    1375             :              LAST_REF_WAS_WRITE. */
    1376     1965492 :           acct_info_t * ai = disp->acct_pool + (*child_edge & 0x7FFFFFFFU);
    1377     1965492 :           ulong flags = ai->flags;
    1378     1965492 :           flags &= ~(ulong)ACCT_INFO_FLAG_ANY_WRITERS( lane );
    1379     1965492 :           flags |= (flags & (ulong)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ))<<1;
    1380     1965492 :           ai->flags = (uchar)flags;
    1381     1965492 :         }
    1382     2503335 :       }
    1383     9186069 :       edge_idx += fd_ulong_if( i<w_cnt, 1UL, 3UL );
    1384     9186069 :     }
    1385     1166490 :     block = block_slist_ele_peek_head( disp->lanes[ lane ].block_ll, disp->block_pool );
    1386     1166490 :     block->completed_cnt++;
    1387     1166490 :   } else if( FD_LIKELY( rtxn->in_degree==IN_DEGREE_ZOMBIE ) ) {
    1388           3 :     FD_TEST( reclaim );
    1389           3 :     block = block_pool_ele( disp->block_pool, rtxn->block_idx );
    1390           3 :     zombie_dlist_ele_remove( block->zombie_list, rtxn, disp->pool );
    1391             :     /* Fall through to the pool release branch below. */
    1392           3 :   } else {
    1393           0 :     FD_LOG_CRIT(( "completed un-dispatched transaction %lu", txn_idx ));
    1394           0 :   }
    1395     1166511 :   if( reclaim ) {
    1396             :     /* For testing purposes, to make sure we don't read a completed
    1397             :        transaction, we can clobber the memory. */
    1398             :     /* memset( disp->pool+txn_idx, '\xCC', sizeof(fd_rdisp_txn_t) ); */
    1399     1166508 :     rtxn->in_degree = IN_DEGREE_FREE;
    1400     1166508 :     pool_idx_release( disp->pool, txn_idx );
    1401     1166508 :   } else {
    1402           3 :     rtxn->in_degree = IN_DEGREE_ZOMBIE;
    1403           3 :     zombie_dlist_ele_push_tail( block->zombie_list, rtxn, disp->pool );
    1404           3 :     rtxn->block_idx = (uint)block_pool_idx( disp->block_pool, block );
    1405           3 :   }
    1406     1166511 : }
    1407             : 
    1408             : 
    1409             : ulong
    1410             : fd_rdisp_staging_lane_info( fd_rdisp_t           const * disp,
    1411          36 :                             fd_rdisp_staging_lane_info_t out_sched[ static 4 ] ) {
    1412         180 :   for( ulong i=0UL; i<4UL; i++ ) {
    1413         144 :     if( !(disp->free_lanes & (1<<i) ) ) {
    1414          51 :       block_slist_t const * sl = disp->lanes[ i ].block_ll;
    1415          51 :       out_sched[ i ].insert_ready_block   = block_slist_ele_peek_tail( sl, disp->block_pool )->block;
    1416          51 :       out_sched[ i ].schedule_ready_block = block_slist_ele_peek_head( sl, disp->block_pool )->block;
    1417          51 :     }
    1418         144 :   }
    1419          36 :   return 0xFUL & ~(ulong)disp->free_lanes;
    1420          36 : }
    1421             : 
    1422             : void
    1423             : fd_rdisp_verify( fd_rdisp_t const * disp,
    1424      776976 :                  uint             * scratch ) {
    1425      776976 :   ulong acct_depth  = disp->depth*MAX_ACCT_PER_TXN;
    1426      776976 :   ulong block_depth = disp->block_depth;
    1427      776976 :   FD_TEST( 0==acct_map_verify ( disp->acct_map,      acct_depth+1UL,  disp->acct_pool ) );
    1428      776976 :   FD_TEST( 0==acct_map_verify ( disp->free_acct_map, acct_depth+1UL,  disp->acct_pool ) );
    1429      776976 :   FD_TEST( 0==block_map_verify( disp->blockmap,     block_depth+1UL, disp->block_pool ) );
    1430             : 
    1431             :   /* Check all the in degree counts are right */
    1432      776976 :   memset( scratch, '\0', sizeof(uint)*(disp->depth+1UL) );
    1433      776976 :   scratch[ 0 ] = UINT_MAX;
    1434   233866176 :   for( ulong j=1UL; j<disp->depth+1UL; j++ ) {
    1435   233089200 :     fd_rdisp_txn_t const * rtxn = disp->pool+j;
    1436   233089200 :     if( rtxn->in_degree==IN_DEGREE_FREE ) { scratch[ j ]=UINT_MAX; continue; }
    1437             : 
    1438    57500856 :     if( (rtxn->in_degree==IN_DEGREE_UNSTAGED_DISPATCHED) |
    1439    57500856 :         (rtxn->in_degree==IN_DEGREE_UNSTAGED)            |
    1440    57500856 :         (rtxn->in_degree==IN_DEGREE_ZOMBIE) ) continue;
    1441             : 
    1442    57500829 :     ulong w_cnt = (rtxn->edge_cnt_etc    ) & 0x7FU;
    1443    57500829 :     ulong r_cnt = (rtxn->edge_cnt_etc>> 7) & 0x7FU;
    1444    57500829 :     ulong edge_idx = 0UL;
    1445             : 
    1446   721543131 :     for( ulong i=0UL; i<w_cnt+r_cnt; i++ ) {
    1447   664042302 :       edge_t const * e = rtxn->edges+edge_idx;
    1448   664042302 :       edge_t const  e0 = *e;
    1449             : 
    1450   664042302 :       edge_idx += fd_ulong_if( i<w_cnt, 1UL, 3UL );
    1451             : 
    1452   664042302 :       if( FD_UNLIKELY( EDGE_IS_LAST( e0 ) ) ) continue;
    1453             : 
    1454   144900756 :       edge_t next_e = e0;
    1455   144900756 :       edge_t const * child_edge;
    1456   144900756 :       edge_t last_e = 0U;
    1457   151021773 :       while( 1 ) {
    1458   151021773 :         int child_is_writer;
    1459             :         /* This loop first traverses the me->child link, and then
    1460             :            traverses any sibling links.  For example, in the case that
    1461             :            we're completing node D above, the first child_txn is E,
    1462             :            and the second child_txn is F. */
    1463   151021773 :         /*            */ child_edge = FOLLOW_EDGE(     disp->pool, next_e, child_is_writer );
    1464   151021773 :         fd_rdisp_txn_t * child_txn  = FOLLOW_EDGE_TXN( disp->pool, next_e                  );
    1465   151021773 :         scratch[ child_txn - disp->pool ]++;
    1466   151021773 :         if( child_is_writer || child_edge[1]==e0 ) break;
    1467     6121017 :         if( last_e!=0U ) FD_TEST( child_edge[2]==last_e );
    1468     6121017 :         last_e = next_e;
    1469     6121017 :         next_e = child_edge[1];
    1470     6121017 :         FD_TEST( next_e>=0x100U );
    1471     6121017 :       }
    1472   144900756 :     }
    1473    57500829 :   }
    1474   233866176 :   for( ulong i=1UL; i<disp->depth+1UL; i++ ) {
    1475   233089200 :     FD_TEST( scratch[ i ]==UINT_MAX ||
    1476   233089200 :              disp->pool[ i ].in_degree==IN_DEGREE_DISPATCHED ||
    1477   233089200 :              disp->pool[ i ].in_degree==IN_DEGREE_UNSTAGED ||
    1478   233089200 :              disp->pool[ i ].in_degree==IN_DEGREE_UNSTAGED_DISPATCHED ||
    1479   233089200 :              disp->pool[ i ].in_degree==IN_DEGREE_ZOMBIE ||
    1480   233089200 :              disp->pool[ i ].in_degree==scratch[ i ] );
    1481   233089200 :   }
    1482      776976 : }
    1483             : 
    1484           9 : void * fd_rdisp_leave ( fd_rdisp_t * disp ) { return disp; }
    1485           9 : void * fd_rdisp_delete( void * mem        ) { return  mem; }

Generated by: LCOV version 1.14