LCOV - code coverage report
Current view: top level - discof/replay - fd_rdisp.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 619 645 96.0 %
Date: 2025-11-29 04:46:19 Functions: 21 23 91.3 %

          Line data    Source code
       1             : #include "fd_rdisp.h"
       2             : #include <math.h> /* for the EMA */
       3             : 
       4             : /* The conflict graph that this file builds is not a general DAG, but
       5             :    the union of several special account-conflict graphs.  Each
       6             :    account-conflict graph has special structure:
       7             :                            ---> 3 --
       8             :                           /          \
       9             :                          /            v
      10             :                1  ---> 2 -----> 4 --> 6 ----> 7
      11             :                         \             ^
      12             :                          \           /
      13             :                           ----> 5 --
      14             : 
      15             :    That is, the graph is almost a line, but may have fan-out and fan-in
      16             :    regions.  The tricky part about representing a graph like this
      17             :    without dynamic memory allocation is that nodes may have arbitrary
      18             :    in-degree and out-degree.  Thus, we use a pretty standard trick and
      19             :    use sibling pointers, denoted below with dotted lines.  Each node
      20             :    maintains at most one successor pointer and at most one sibling
      21             :    pointer.  Although not shown below, the sibling pointers are
      22             :    circularly linked, so 5's sibling is 3.  Additionally, though not
      23             :    shown below, the child of the last node (7) stores information about
      24             :    what account address all this is for, which facilitates deleting the
      25             :    last node in the graph.
      26             : 
      27             : 
      28             :                               --> 3 --
      29             :                              /    ^   \
      30             :                             /     :    \
      31             :                            /      V     v
      32             :                  1  ---> 2        4 --> 6 ----> 7
      33             :                                   ^     ^
      34             :                                   :    /
      35             :                                   V   /
      36             :                                   5 --
      37             : 
      38             :    The normal edge 2->3 along with the sibling edge 3..>4 implies a
      39             :    normal edge 2->4.  That then transitively implies an edge 2->5.
      40             : 
      41             :    We want each node to maintain a count of its in-degree so that we
      42             :    know when it can be executed.  The implied edges also count for the
      43             :    in-degree.  In this example, node 1 has in-degree 0, node 6 has
      44             :    in-degree 3, and the rest have in-degree 1.
      45             : 
      46             :    Maintaining each account-conflict graph is relatively easy given the
      47             :    operations we want to support.  Only the details about sibling edges
      48             :    are worth mentioning.  For example, when deleting node 2, we
      49             :    decrement the in-degree count for its successor, and then follow the
      50             :    sibling pointers, decrementing all the in-degree counts as we go to
      51             :    mark the deletion of the implied edges.  Sibling edges also need to
      52             :    be doubly linked, so that e.g. nodes 3 and 5 can be re-linked in O(1)
      53             :    if node 4 is deleted.  They're also circularly linked as well for
      54             :    convenience.
      55             : 
      56             :    When building the graph, we maintain a map of account to the last
      57             :    node that references it, whether that was a read or write, and
      58             :    whether there are any writers to that account in the graph right now.
      59             :    If the new node reads from an account that was last read, the new
      60             :    node becomes a sibling of the last read, with in-degree increased if
      61             :    there are any writers.  Otherwise, it becomes a successor of the node
      62             :    that last referenced the account. */
      63             : 
      64             : /* For a task like this with lots of graph traversal and pointer
      65             :    chasing, performance is typically limited by memory latency.  That
      66             :    means that the more that can fit in cache, the better the
      67             :    performance.  This implementation uses a lot of bit-packing to
      68             :    improve cache footprint. */
      69             : 
      70             : /* The following structs are all very local to this compilation unit,
      71             :    and so they don't have globally acceptable type names (e.g.
      72             :    fd_rdisp_edge_t). */
      73             : 
      74      777003 : #define MAX_ACCT_PER_TXN 128UL
      75             : 
      76             : /* edge_t: Fields typed edge_t represent an edge in one of the parallel
      77             :    account-conflict DAGs.  Each transaction stores a list of all its
      78             :    outgoing edges.  The type is actually a union of bitfield, but C
      79             :    bitfields are gross, so we just do it manually with macros.  If the
      80             :    high bit is set, that means the transaction storing this edge_t value
      81             :    is the last in this specific DAG, and the lower 31 bits of the
      82             :    value are an index in the map_pool for the account pubkey for this
      83             :    DAG.  See the comments about the hidden edge outgoing from node 7 in
      84             :    the DAG at the top of this file for an example.
      85             :    If the high bit is not set, then the next 23 bits store the
      86             :    destination of the edge, represented by its index in the pool of
      87             :    transactions.  Then the lowest 8 bits store the account index within
      88             :    that transaction of the edge that is part of the same
      89             :    account-specific DAG as this edge.  Because the max depth is 2^23-1,
      90             :    and each transaction can reference 128 accounts, the max accounts
      91             :    that can be referenced fits in 30 bits.
      92             :    The proper type would be something like
      93             :    typedef union {
      94             :      struct {
      95             :        uint is_last:1;
      96             :        uint map_pool_idx:31;
      97             :      } last;
      98             :      struct {
      99             :        uint is_last:1;
     100             :        uint txn_idx:23;
     101             :        uint acct_idx:8;
     102             :      } e;
     103             :    } edge_t;
     104             : 
     105             :    */
     106             : typedef uint edge_t;
     107             : 
     108             : 
     109             : /* fd_rdisp_txn is the representation of a transaction as a node in the
     110             :    DAG. */
     111             : struct fd_rdisp_txn {
     112             :   /* in_degree: The total number of edges summed across all account DAGs
     113             :      with this node as their destination.  In the worst case, all the
     114             :      other transactions in the pool read from each of the max number of
     115             :      accounts that this transaction writes to,  so there are
     116             :      MAX_ACCT_PER_TXN*depth edges that come into this node, which fits in
     117             :      about 30 bits, so we have some room for special values.  If
     118             :      in_degree is one of the following values, then
     119             :      the transaction is: */
     120   234453531 : #define IN_DEGREE_FREE                (UINT_MAX   )
     121    57500892 : #define IN_DEGREE_UNSTAGED            (UINT_MAX-1U)/* unstaged, not dispatched */
     122     1166490 : #define IN_DEGREE_DISPATCHED          (UINT_MAX-2U)/* staged,   dispatched */
     123    57500874 : #define IN_DEGREE_UNSTAGED_DISPATCHED (UINT_MAX-3U)/* unstaged, dispatched */
     124    57500859 : #define IN_DEGREE_ZOMBIE              (UINT_MAX-4U)/* zombie */
     125             :   /* a transaction that is staged and dispatched is must have an
     126             :      in_degree of 0.  in_degree isn't a meaningful concept for unstaged
     127             :      transactions. */
     128             :   uint    in_degree;
     129             : 
     130             :   /* score: integer part stores how many transactions in the block must
     131             :      have completed before this transaction can be scheduled.  This is
     132             :      useful for transactions marked as serializing.  The fractional part
     133             :      gives some measure of how urgent the transaction is, where lower is
     134             :      more urgent.  This means we can't have more transactions in a block
     135             :      marked as serializing than the first integer that a float cannot
     136             :      represent.  That value is about 16M, which is much higher than the
     137             :      maximum number of transactions in a block, so this is not a
     138             :      problem.  If in the very rare case that there are more than just a
     139             :      few serializing transactions, we will lose a bit of precision for
     140             :      the fractional portion, which makes total sense; if there's not
     141             :      much room for parallel execution, having the optimal parallel
     142             :      execution is not very important. */
     143             :   float   score;
     144             : 
     145             :   /* edge_cnt_etc:
     146             :      0xFFFF0000 (16 bits) for linear block number,
     147             :      0x0000C000 (2 bits) for concurrency lane,
     148             :      0x00003F80 (7 bits) for r_cnt
     149             :      0x0000007F (7 bits) for w_cnt */
     150             :   union {
     151             :     uint edge_cnt_etc;
     152             :     /* edge_cnt_etc is only used when the transaction is STAGED and
     153             :        PENDING, READY, or DISPATCHED.  If UNSTAGED or FREE, the next
     154             :        pointer is also here.  It can't be UNSTAGED and FREE at the same
     155             :        time, so there's no conflict with storage there either. */
     156             :     uint unstaged_next;
     157             :     uint free_next;
     158             :     /* If ZOMBIE, pointer back to block is here.  No storage conflict
     159             :        because ZOMBIE is an exclusive state. */
     160             :     uint block_idx;
     161             :   };
     162             : 
     163             : 
     164             :   /* When a transaction writes to an account, it only creates one
     165             :      link.  When a transaction reads from an account, we need the full
     166             :      doubly linked list with its siblings, so it creates 3 edges
     167             :      (child, next, prev).  All edges from writable accounts come first,
     168             :      and we keep track of how many there are.  In the worst case, all
     169             :      accounts are reads, so we size it appropriately. */
     170             :   edge_t edges[3UL*MAX_ACCT_PER_TXN]; /* addressed [0, w_cnt+3*r_cnt) */
     171             : };
     172             : typedef struct fd_rdisp_txn fd_rdisp_txn_t;
     173             : 
     174             : #define EDGE_IS_LAST(x) ((x)&0x80000000U)
     175             : 
     176             : /* Two more definitions:
     177             :    An edge index is an array position within the edges array.  An
     178             :    account index is a position within a transaction's account addresses,
     179             :    reordered so that the writable ones come first.  Since writable
     180             :    accounts use one position in the edges array per account address,
     181             :    these often coincide. */
     182             : 
     183             : 
     184             : /* FOLLOW_EDGE and FOLLOW_EDGE_TXN are helper macros for dealing with
     185             :    edges.  Given an edge_t x, and transaction pool base, FOLLOW_EDGE_TXN
     186             :    returns a pointer to transaction that the edge points to; FOLLOW_EDGE
     187             :    returns a pointer to the (first) edge_t within that transaction that
     188             :    is part of the same DAG as this edge.  FOLLOW_EDGE and
     189             :    FOLLOW_EDGE_TXN must not be called if EDGE_IS_LAST is non-zero.
     190             : 
     191             :    Then the edge index, i.e. the position in the edges array of the
     192             :    (first) edge for an account index is:
     193             :           acct_idx                      if acct_idx<w_cnt
     194             :           w_cnt + 3*(acct_idx-w_cnt)    else.
     195             :   Simplifying gives
     196             :          acct_idx + 2*signed_max( 0, acct_idx-w_cnt ).
     197             :   In doing this calculation, we also basically get for free whether the
     198             :   edge is for a writable account or a readonly account, so we return
     199             :   that as well via the w parameter.  If the child transaction only reads
     200             :   the account address for this DAG, then the next and prev pointers can
     201             :   be accessed using the returned value +1 and +2, respectively. */
     202   176064747 : #define FOLLOW_EDGE(base, x, w) (__extension__({                                 \
     203   176064747 :         uint __e = (x);                                                          \
     204   176064747 :         fd_rdisp_txn_t * __txn = ((base)+(__e>>8));                              \
     205   176064747 :         uint __wcnt = __txn->edge_cnt_etc & 0x7FU;                               \
     206   176064747 :         uint __idx  = (__e & 0xFFU);                                             \
     207   176064747 :         (w) = __idx<__wcnt;                                                      \
     208   176064747 :         (void)(w);  /* not robust... */                                          \
     209   176064747 :         __txn->edges + __idx + 2*fd_int_max( 0, (int)(__idx)-(int)(__wcnt) ); }))
     210   153735396 : #define FOLLOW_EDGE_TXN(base, x) ( (base)+((x)>>8) )
     211             : 
     212             : /* The pool and slist are almost the same, but they are used
     213             :    differently, so keep them as different structures for now. */
     214             : 
     215             : #define POOL_NAME     pool
     216          27 : #define POOL_T        fd_rdisp_txn_t
     217             : #define POOL_IDX_T    uint
     218     2530839 : #define POOL_NEXT     free_next
     219             : #define POOL_SENTINEL 1
     220             : #include "../../util/tmpl/fd_pool.c"
     221             : 
     222             : #define SLIST_NAME unstaged_txn_ll
     223             : #define SLIST_ELE_T fd_rdisp_txn_t
     224          18 : #define SLIST_IDX_T uint
     225          72 : #define SLIST_NEXT  unstaged_next
     226             : #include "../../util/tmpl/fd_slist.c"
     227             : 
     228             : #define DLIST_IDX_T uint
     229           6 : #define DLIST_PREV  edges[0]
     230           6 : #define DLIST_NEXT  edges[1]
     231             : #define DLIST_NAME  zombie_dlist
     232             : #define DLIST_ELE_T fd_rdisp_txn_t
     233             : #include "../../util/tmpl/fd_dlist.c"
     234             : 
     235             : 
     236             : /* ACCT_INFO_FLAG: It's a bit unfortunate that we have to maintain these
     237             :    flags, but basically we need to be able to distinguish the case where
     238             :    there are only readers so that we don't increment in_degree when
     239             :    adding a new fd_rdisp_txn.  If we have any writers, the only way to
     240             :    transition into a state where there are only readers is to complete
     241             :    the last writer.  We know we are in this case when the completed
     242             :    node's child doesn't have a child, and the completed node's child is
     243             :    a reader, as indicated by the LAST_REF_WAS_WRITE bit.
     244             :    LAST_REF_WAS_WRITE also has the advantage of being easy to
     245             :    maintain. */
     246    18094905 : #define ACCT_INFO_FLAG_LAST_REF_WAS_WRITE(lane) (((uchar)1)<<(2*(lane)))
     247    17671827 : #define ACCT_INFO_FLAG_ANY_WRITERS(       lane) (((uchar)2)<<(2*(lane)))
     248             : 
     249             : /* acct_info_t is a node in a map_chain that contains the metadata for a
     250             :    single account address's conflict graph DAG.  In particular, it
     251             :    contains the information needed to know where to insert a node that
     252             :    reads from or writes to the account.  The objects of this type follow
     253             :    this state machine:
     254             : 
     255             :                  FREE  -----> ACTIVE ----> CACHED
     256             :                                 ^            |
     257             :                                 |-------------
     258             :    When FREE, it is in free_acct_dlist only.  When ACTIVE, it is in
     259             :    acct_map only.  When CACHED, it is in both free_acct_dlist and
     260             :    cached_acct_map.
     261             : */
     262             : struct acct_info {
     263             :   /* key, next, and prev are the map_chain fields. Used in the ACTIVE
     264             :      and CACHED states.  next and prev set to 0 when in the FREE state.
     265             :      Element 0 is a sentinel and isn't inserted to the free_acct_dlist,
     266             :      so this is unambiguous. */
     267             :   fd_acct_addr_t key;
     268             :   uint next;
     269             :   uint prev;
     270             : 
     271             :   union {
     272             :     struct {
     273             :       /* This is effectively a pointer to the last node in the DAG for
     274             :          this pubkey, one for each staging lane.
     275             :          EDGE_IS_LAST(FOLLOW_EDGE(base, last_reference[i])) is non-zero.
     276             :          */
     277             :       edge_t last_reference[4];
     278             : 
     279             :     }; /* When in the ACTIVE state */
     280             :     struct {
     281             :       uint free_ll_next;
     282             :       uint free_ll_prev;
     283             :       /* 8 bytes of padding here */
     284             :     }; /* When not in the ACTIVE state, used by the free_acct_dlist */
     285             :   };
     286             :   /* flags: a combination of ACCT_INFO_FLAG_* bitfields above.  Used
     287             :      when ACTIVE. */
     288             :   uint flags:8;
     289             : 
     290             : 
     291             :   /* We want to dispatch the READY transactions in an order that
     292             :      maximizes parallelism, but we also want to be able to start
     293             :      dispatching transactions decently well before we have the full
     294             :      conflict graph.  We can do that because we know that contentious
     295             :      accounts tend to stay contentious and uncontentious accounts tend
     296             :      to stay uncontentious.
     297             : 
     298             :      To accomplish this, we maintain a special EMA.  Let x_i be 1 if
     299             :      transaction i references it and 0 if not.  If we squint and assume
     300             :      the transactions are independent and all drawn from some
     301             :      distribution (which is not true, but that's why we squint), an EMA
     302             :      of x_i estimates the probability that the next transaction
     303             :      references this account.  How we use this value is detailed later.
     304             : 
     305             :      We can't update this value for every pubkey for each transaction,
     306             :      so we maintain it in a lazy way, by applying updates only when we
     307             :      need to read the value, which also happens to be every time we want
     308             :      to add a 1 value to the EMA.  We then just need to maintain the
     309             :      last index i at which x_i was updated and the current value.  We
     310             :      only have 24 bits for the index, which means that we can't maintain
     311             :      it in a fork-aware way, which doesn't seem like a problem.  Also,
     312             :      it's possible it can overflow, and that can result in an incorrect
     313             :      value, but that means the account is only referenced ~ 1/2^24
     314             :      transactions, which is also fine.
     315             : 
     316             :      last_ref is in the domain of global_inserted_txn_cnt.  ema_refs is
     317             :      in [0, 1].  Both fields are used in ACTIVE and CACHED.  Maintaining
     318             :      these fields is actually the main reason CACHED exists. */
     319             :   uint   last_ref:24;
     320             :   float  ema_refs;
     321             : };
     322             : typedef struct acct_info acct_info_t;
     323             : 
     324             : FD_STATIC_ASSERT( sizeof(acct_info_t)==64UL, acct_info_t );
     325             : 
     326             : /* For the acct_map and the free_acct_map */
     327             : #define MAP_NAME          acct_map
     328     3707112 : #define MAP_ELE_T         acct_info_t
     329   375691212 : #define MAP_IDX_T         uint
     330             : #define MAP_KEY_T         fd_acct_addr_t
     331   714923010 : #define MAP_KEY_HASH(k,s) fd_hash( (s), (k)->b, 32UL )
     332   360558198 : #define MAP_KEY_EQ(k0,k1) (!memcmp( (k0)->b, (k1)->b, 32UL ))
     333             : #define MAP_OPTIMIZE_RANDOM_ACCESS_REMOVAL 1
     334             : #include "../../util/tmpl/fd_map_chain.c"
     335             : 
     336             : 
     337             : #define DLIST_IDX_T uint
     338    29065902 : #define DLIST_PREV  free_ll_prev
     339    29065902 : #define DLIST_NEXT  free_ll_next
     340             : #define DLIST_NAME  free_dlist
     341             : #define DLIST_ELE_T acct_info_t
     342             : #include "../../util/tmpl/fd_dlist.c"
     343             : 
     344             : 
     345             : struct pending_prq_ele {
     346             :   /* lower score means should be scheduled sooner.  Integer part has a
     347             :      special meaning as explained above. */
     348             :   float score;
     349             : 
     350             :   uint linear_block_number;
     351             :   uint txn_idx;
     352             : 
     353             :   /* It seems like we should be able to get this whole struct in 8
     354             :      bytes, but we're a few bits over.
     355             : 
     356             :      23 bits for txn_idx
     357             :      16 bit for linear_block_number
     358             :      Then the integer part of the score needs to be at least 18 or 19
     359             :      bits, but we can't use a custom floating point number. */
     360             : };
     361             : 
     362             : typedef struct pending_prq_ele pending_prq_ele_t;
     363             : 
     364             : 
     365             : #define PRQ_NAME pending_prq
     366    15249270 : #define PRQ_T    pending_prq_ele_t
     367             : #define PRQ_EXPLICIT_TIMEOUT 0
     368             : /* returns 1 if x is strictly after y */
     369    12665712 : #define PRQ_AFTER(x,y) (__extension__( {                                            \
     370    12665712 :             int cmp0 = (int)(x).linear_block_number - (int)(y).linear_block_number; \
     371    12665712 :             fd_int_if( cmp0!=0, cmp0>0, (x).score>(y).score );                      \
     372    12665712 :             }))
     373             : #include "../../util/tmpl/fd_prq.c"
     374             : 
     375             : /* fd_rdisp_blockinfo_t maintains a little metadata about transactions for each
     376             :    slot.  */
     377             : struct fd_rdisp_blockinfo {
     378             :   FD_RDISP_BLOCK_TAG_T block;
     379             :   uint  linear_block_number;
     380             : 
     381             :   uint  insert_ready:1;
     382             :   uint  schedule_ready:1;
     383             :   uint  staged:1;
     384             :   uint  staging_lane:2; /* ignored if staged==0 */
     385             :   uint  last_insert_was_serializing:1;
     386             : 
     387             :   uint inserted_cnt;
     388             :   uint dispatched_cnt;
     389             :   uint completed_cnt;
     390             :   uint last_serializing;
     391             : 
     392             :   uint map_chain_next;
     393             :   uint ll_next;
     394             :   unstaged_txn_ll_t ll[ 1 ]; /* used only when unstaged */
     395             :   zombie_dlist_t    zombie_list[ 1 ];
     396             : };
     397             : typedef struct fd_rdisp_blockinfo fd_rdisp_blockinfo_t;
     398             : 
     399             : #define POOL_NAME     block_pool
     400          27 : #define POOL_T        fd_rdisp_blockinfo_t
     401             : #define POOL_IDX_T    uint
     402     1007172 : #define POOL_NEXT     ll_next
     403             : #define POOL_SENTINEL 1
     404             : #include "../../util/tmpl/fd_pool.c"
     405             : 
     406             : #define MAP_NAME  block_map
     407             : #define MAP_ELE_T fd_rdisp_blockinfo_t
     408             : #define MAP_KEY_T FD_RDISP_BLOCK_TAG_T
     409      405246 : #define MAP_KEY   block
     410    10124793 : #define MAP_NEXT  map_chain_next
     411    12261066 : #define MAP_IDX_T uint
     412             : #include "../../util/tmpl/fd_map_chain.c"
     413             : 
     414             : #define SLIST_NAME  block_slist
     415             : #define SLIST_ELE_T fd_rdisp_blockinfo_t
     416      405246 : #define SLIST_IDX_T uint
     417      810495 : #define SLIST_NEXT  ll_next
     418             : #include "../../util/tmpl/fd_slist.c"
     419             : 
     420             : struct fd_rdisp_unstaged {
     421             :   FD_RDISP_BLOCK_TAG_T block;
     422             : 
     423             :   uint writable_cnt;
     424             :   uint readonly_cnt;
     425             :   fd_acct_addr_t keys[MAX_ACCT_PER_TXN];
     426             : };
     427             : typedef struct fd_rdisp_unstaged fd_rdisp_unstaged_t;
     428             : 
     429             : typedef struct {
     430             :   pending_prq_ele_t * pending;
     431             :   ulong               linear_block_number;
     432             :   block_slist_t       block_ll[1];
     433             :   ulong               inserted_cnt;
     434             :   ulong               dispatched_cnt;
     435             :   ulong               completed_cnt;
     436             : } per_lane_info_t;
     437             : 
     438             : /* We maintain two maps from pubkeys to acct_info_t.  The first one is
     439             :    the main acct_map, just called acct_map.  All pubkeys in this map
     440             :    have >0 references in the one of the staging lane DAGs.  When an
     441             :    account goes to 0 references, it gets removed from main map_chain and
     442             :    moved to free map_chain, called free_acct_map.  The free_acct_map
     443             :    exists to maintain the reference count EMA information lazily.
     444             :    Unless we need the acct_info_t for something in the DAG, we might as
     445             :    well maintain the EMA info.
     446             : 
     447             :    When we start up, all the acct_info_t structs are in the
     448             :    free_acct_dlist.  Whenever something is added to the free_acct_map,
     449             :    it's also added to the tail of the free_acct_dlist.  When we need an
     450             :    acct_info_t that's not in the free_acct_map, we pop the head of the
     451             :    free_acct_dlist.  In general, the free_acct_dlist contains everything
     452             :    in the free_acct_map, potentially plus some elements that have never
     453             :    been used; all acct_info_t objects are in exactly one of the main
     454             :    acct_map and the free_acct_dlist (not free_acct_map).  See
     455             :    acct_info_t for more information about this. */
     456             : 
     457             : struct fd_rdisp {
     458             :   ulong depth;
     459             :   ulong block_depth;
     460             : 
     461             :   ulong global_insert_cnt;
     462             :   ulong unstaged_lblk_num;
     463             : 
     464             :   /* pool: an fd_pool, indexed [0, depth+1), with 0 being a sentinel */
     465             :   fd_rdisp_txn_t       * pool;
     466             :   fd_rdisp_unstaged_t  * unstaged; /* parallel to pool with additional info */
     467             : 
     468             :   block_map_t          * blockmap; /* map chain */
     469             :   fd_rdisp_blockinfo_t * block_pool;
     470             : 
     471             :   int free_lanes; /* a bitmask */
     472             :   per_lane_info_t lanes[4];
     473             : 
     474             :   acct_map_t   * acct_map;
     475             :   acct_map_t   * free_acct_map;
     476             :   /* acct_pool is not an fd_pool, but is just a flat array, since we
     477             :      don't need to acquire and release from it because of the dlist. */
     478             :   acct_info_t  * acct_pool;
     479             :   free_dlist_t   free_acct_dlist[1];
     480             : };
     481             : 
     482             : typedef struct fd_rdisp fd_rdisp_t;
     483             : 
     484             : 
     485             :  #define ACCT_ITER_TO_PTR( iter ) (__extension__( {                                             \
     486             :        ulong __idx = fd_txn_acct_iter_idx( iter );                                              \
     487             :        fd_ptr_if( __idx<fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_IMM ), accts, alt_adj )+__idx; \
     488             :        }))
     489             : 
     490             : 
     491          99 : ulong fd_rdisp_align( void ) { return 128UL; }
     492             : 
     493             : ulong
     494             : fd_rdisp_footprint( ulong depth,
     495           9 :                     ulong block_depth ) {
     496           9 :   if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH)             | (depth<2UL) |
     497           9 :                    (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) | (block_depth<4UL) ) ) return 0UL;
     498             : 
     499           9 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     500           9 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     501           9 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     502             : 
     503           9 :   ulong l = FD_LAYOUT_INIT;
     504           9 :   l = FD_LAYOUT_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     505           9 :   l = FD_LAYOUT_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) ); /* pool       */
     506           9 :   l = FD_LAYOUT_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) ); /* unstaged   */
     507           9 :   l = FD_LAYOUT_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) ); /* blockmap   */
     508           9 :   l = FD_LAYOUT_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) ); /* block_pool */
     509           9 :   l = FD_LAYOUT_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) ); /* pending    */
     510           9 :   l = FD_LAYOUT_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) ); /* acct_map   */
     511           9 :   l = FD_LAYOUT_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) ); /* free_acct_map */
     512           9 :   l = FD_LAYOUT_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            ); /* acct_pool  */
     513           9 :   return FD_LAYOUT_FINI( l, fd_rdisp_align() );
     514           9 : }
     515             : 
     516             : void *
     517             : fd_rdisp_new( void * mem,
     518             :               ulong  depth,
     519             :               ulong  block_depth,
     520           9 :               ulong  seed ) {
     521           9 :   if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH)             | (depth<2UL) |
     522           9 :                    (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) | (block_depth<4UL) ) ) return 0UL;
     523             : 
     524           9 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     525           9 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     526           9 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     527             : 
     528           9 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     529           9 :   fd_rdisp_t * disp   = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     530           9 :   void  * _pool       = FD_SCRATCH_ALLOC_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) );
     531           9 :   void  * _unstaged   = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) );
     532           9 :   void  * _bmap       = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) );
     533           9 :   void  * _bpool      = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) );
     534           9 :   uchar * _pending    = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) );
     535           9 :   void  * _acct_map   = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     536           9 :   void  * _freea_map  = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     537           9 :   acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            );
     538           9 :   FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
     539             : 
     540           9 :   disp->depth             = depth;
     541           9 :   disp->block_depth       = block_depth;
     542           9 :   disp->global_insert_cnt = 0UL;
     543           9 :   disp->unstaged_lblk_num = 0UL;
     544             : 
     545           9 :   fd_rdisp_txn_t * temp_pool_join = pool_join( pool_new( _pool, depth+1UL ) );
     546      197823 :   for( ulong i=0UL; i<depth+1UL; i++ ) temp_pool_join[ i ].in_degree = IN_DEGREE_FREE;
     547           9 :   pool_leave( temp_pool_join );
     548             : 
     549           9 :   memset( _unstaged, '\0', sizeof(fd_rdisp_unstaged_t)*(depth+1UL) );
     550             : 
     551           9 :   block_map_new ( _bmap,  chain_cnt, seed );
     552           9 :   block_pool_new( _bpool, block_depth+1UL );
     553             : 
     554           9 :   fd_rdisp_blockinfo_t * bpool_temp_join = block_pool_join( _bpool );
     555      196683 :   for( ulong i=0UL; i<block_depth+1UL; i++ ) zombie_dlist_new( bpool_temp_join[ i ].zombie_list );
     556           9 :   block_pool_leave( bpool_temp_join );
     557             : 
     558           9 :   disp->free_lanes = 0xF;
     559          45 :   for( ulong i=0UL; i<4UL; i++ ) {
     560          36 :     pending_prq_new( _pending, depth );
     561          36 :     _pending += pending_prq_footprint( depth );
     562             : 
     563          36 :     disp->lanes[i].linear_block_number = 0UL;
     564          36 :     disp->lanes[i].inserted_cnt        = 0U;
     565          36 :     disp->lanes[i].dispatched_cnt      = 0U;
     566          36 :     disp->lanes[i].completed_cnt       = 0U;
     567          36 :   }
     568             : 
     569           9 :   acct_map_new( _acct_map,  acct_chain_cnt, fd_ulong_hash( seed+1UL ) );
     570           9 :   acct_map_new( _freea_map, acct_chain_cnt, fd_ulong_hash( seed+2UL ) );
     571             : 
     572           9 :   free_dlist_t * temp_join = free_dlist_join( free_dlist_new( disp->free_acct_dlist ) );
     573    25319049 :   for( ulong i=1UL; i<acct_depth+1UL; i++ ) {
     574    25319040 :     apool[ i ].next = apool[ i ].prev = 0U;
     575    25319040 :     free_dlist_idx_push_tail( disp->free_acct_dlist, i, apool );
     576    25319040 :   }
     577           9 :   free_dlist_leave( temp_join );
     578             : 
     579           9 :   return disp;
     580           9 : }
     581             : 
     582             : fd_rdisp_t *
     583           9 : fd_rdisp_join( void * mem ) {
     584           9 :   fd_rdisp_t * disp = (fd_rdisp_t *)mem;
     585             : 
     586           9 :   ulong depth          = disp->depth;
     587           9 :   ulong block_depth    = disp->block_depth;
     588           9 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     589           9 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     590           9 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     591             : 
     592           9 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     593           9 :   /*                 */ FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     594           9 :   void  * _pool       = FD_SCRATCH_ALLOC_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) );
     595           9 :   void  * _unstaged   = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) );
     596           9 :   void  * _bmap       = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) );
     597           9 :   void  * _bpool      = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) );
     598           9 :   uchar * _pending    = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) );
     599           9 :   void  * _acct_map   = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     600           9 :   void  * _freea_map  = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     601           9 :   acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            );
     602           9 :   FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
     603             : 
     604           9 :   disp->pool       = pool_join( _pool );
     605           9 :   disp->unstaged   = (fd_rdisp_unstaged_t *)_unstaged;
     606           9 :   disp->blockmap   = block_map_join( _bmap );
     607           9 :   disp->block_pool = block_pool_join( _bpool );
     608             : 
     609      196683 :   for( ulong i=0UL; i<block_depth+1UL; i++ ) zombie_dlist_join( disp->block_pool[ i ].zombie_list );
     610             : 
     611          45 :   for( ulong i=0UL; i<4UL; i++ ) {
     612          36 :     disp->lanes[i].pending = pending_prq_join( _pending );
     613          36 :     _pending += pending_prq_footprint( depth );
     614          36 :   }
     615             : 
     616           9 :   disp->acct_map      = acct_map_join( _acct_map );
     617           9 :   disp->free_acct_map = acct_map_join( _freea_map );
     618           9 :   disp->acct_pool     = apool;
     619           9 :   free_dlist_join( disp->free_acct_dlist );
     620             : 
     621           9 :   return disp;
     622           9 : }
     623             : 
     624             : static inline void
     625             : free_lane( fd_rdisp_t * disp,
     626       12027 :            ulong        staging_lane ) {
     627       12027 :   disp->free_lanes |= 1<<staging_lane;
     628       12027 :   per_lane_info_t * l = disp->lanes+staging_lane;
     629       12027 :   FD_TEST( pending_prq_cnt( l->pending )==0UL );
     630       12027 :   l->linear_block_number = 0UL;
     631       12027 :   l->inserted_cnt   = 0UL;
     632       12027 :   l->dispatched_cnt = 0UL;
     633       12027 :   l->completed_cnt  = 0UL;
     634       12027 :   block_slist_delete( block_slist_leave( l->block_ll ) );
     635       12027 : }
     636             : 
     637             : static inline void
     638             : alloc_lane( fd_rdisp_t * disp,
     639       12030 :             ulong        staging_lane ) {
     640       12030 :   disp->free_lanes &= ~(1<<staging_lane);
     641       12030 :   block_slist_join( block_slist_new( disp->lanes[staging_lane].block_ll ) );
     642       12030 : }
     643             : 
     644             : ulong
     645             : fd_rdisp_suggest_staging_lane( fd_rdisp_t const *   disp,
     646             :                                FD_RDISP_BLOCK_TAG_T parent_block,
     647           0 :                                int                  duplicate ) {
     648             : 
     649             :   /* 1. If it's a duplicate, suggest FD_RDISP_UNSTAGED */
     650           0 :   if( FD_UNLIKELY( duplicate ) ) return FD_RDISP_UNSTAGED;
     651             : 
     652             :   /* 2. If parent is the last block in any existing staging lane, suggest
     653             :         that lane */
     654           0 :   fd_rdisp_blockinfo_t const * block_pool = disp->block_pool;
     655           0 :   fd_rdisp_blockinfo_t const * block = block_map_ele_query_const( disp->blockmap, &parent_block, NULL, block_pool );
     656           0 :   if( FD_LIKELY( block && block->insert_ready && block->staged ) ) return block->staging_lane;
     657             : 
     658             :   /* 3. If there is at least one free lane, suggest a free lane */
     659           0 :   if( FD_LIKELY( disp->free_lanes!=0 ) ) return (ulong)fd_uint_find_lsb( (uint)disp->free_lanes );
     660             : 
     661             :   /* 4. Else, suggest FD_RDISP_UNSTAGED */
     662           0 :   return FD_RDISP_UNSTAGED;
     663           0 : }
     664             : 
     665             : int
     666             : fd_rdisp_add_block( fd_rdisp_t          * disp,
     667             :                    FD_RDISP_BLOCK_TAG_T   new_block,
     668      405252 :                    ulong                  staging_lane ) {
     669      405252 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     670             : 
     671      405252 :   if( FD_UNLIKELY( !block_pool_free( block_pool )                                                             ) ) return -1;
     672      405252 :   if( FD_UNLIKELY(  ULONG_MAX!=block_map_idx_query_const( disp->blockmap, &new_block, ULONG_MAX, block_pool ) ) ) return -1;
     673      405246 :   fd_rdisp_blockinfo_t * block = block_pool_ele_acquire( block_pool );
     674      405246 :   block->block = new_block;
     675      405246 :   block_map_ele_insert( disp->blockmap, block, block_pool );
     676             : 
     677      405246 :   block->insert_ready = 1;
     678      405246 :   block->staged       = staging_lane!=FD_RDISP_UNSTAGED;
     679      405246 :   block->staging_lane = (uint)(staging_lane & 0x3UL);
     680      405246 :   block->last_insert_was_serializing = 0U;
     681             : 
     682      405246 :   block->inserted_cnt     = 0U;
     683      405246 :   block->dispatched_cnt   = 0U;
     684      405246 :   block->completed_cnt    = 0U;
     685      405246 :   block->last_serializing = 0U;
     686             : 
     687      405246 :   if( FD_UNLIKELY( staging_lane==FD_RDISP_UNSTAGED ) ) {
     688           3 :     block->schedule_ready = 1;
     689           3 :     block->linear_block_number = (uint)disp->unstaged_lblk_num++;
     690           3 :     unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
     691      405243 :   } else {
     692      405243 :     block->linear_block_number = (uint)disp->lanes[staging_lane].linear_block_number++;
     693      405243 :     block->schedule_ready      = (uint)(1 & (disp->free_lanes >> staging_lane));
     694             : 
     695      405243 :     if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
     696             : 
     697      405243 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     698      405243 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) )  block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
     699      405243 :     block_slist_ele_push_tail( sl, block, block_pool );
     700      405243 :   }
     701      405246 :   return 0;
     702      405252 : }
     703             : 
     704             : 
     705             : 
     706             : int
     707             : fd_rdisp_remove_block( fd_rdisp_t          * disp,
     708      405243 :                        FD_RDISP_BLOCK_TAG_T  block_tag ) {
     709      405243 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     710             : 
     711      405243 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     712      405243 :   if( FD_UNLIKELY( block==NULL ) ) return -1;
     713             : 
     714      405240 :   FD_TEST( block->schedule_ready );
     715      405240 :   FD_TEST( block->completed_cnt==block->inserted_cnt );
     716      405240 :   FD_TEST( zombie_dlist_is_empty( block->zombie_list, disp->pool ) );
     717             : 
     718      405240 :   if( FD_LIKELY( block->staged ) ) {
     719      405240 :     ulong staging_lane = (ulong)block->staging_lane;
     720      405240 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     721             : 
     722      405240 :     FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
     723      405240 :     block_slist_idx_pop_head( sl, block_pool );
     724      405240 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     725       12024 :     else                                                       free_lane( disp, staging_lane );
     726      405240 :   } else {
     727           0 :     unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     728           0 :   }
     729      405240 :   block_pool_idx_release( block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, block_pool ) );
     730             : 
     731      405240 :   return 0;
     732      405240 : }
     733             : 
     734             : 
     735             : int
     736             : fd_rdisp_abandon_block( fd_rdisp_t          * disp,
     737           6 :                         FD_RDISP_BLOCK_TAG_T  block_tag ) {
     738           6 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     739             : 
     740           6 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, disp->block_pool );
     741           6 :   if( FD_UNLIKELY( block==NULL ) ) return -1;
     742             : 
     743           3 :   FD_TEST( block->schedule_ready );
     744           3 :   FD_TEST( block->dispatched_cnt==block->completed_cnt ); /* TODO: remove this when it can call complete properly */
     745          12 :   while( block->completed_cnt<block->inserted_cnt ) {
     746             :     /* because there is nothing DISPATCHED, there has to be something
     747             :        READY */
     748           9 :     ulong txn = fd_rdisp_get_next_ready( disp, block_tag );
     749           9 :     FD_TEST( txn );
     750           9 :     fd_rdisp_complete_txn( disp, txn, 1 );
     751           9 :   }
     752           3 :   while( !zombie_dlist_is_empty( block->zombie_list, disp->pool ) ) {
     753           0 :     fd_rdisp_complete_txn( disp, zombie_dlist_idx_pop_head( block->zombie_list, disp->pool ), 1 );
     754           0 :   }
     755             : 
     756           3 :   if( FD_LIKELY( block->staged ) ) {
     757           3 :     ulong staging_lane = (ulong)block->staging_lane;
     758           3 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     759             : 
     760           3 :     FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
     761           3 :     block_slist_idx_pop_head( sl, block_pool );
     762           3 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     763           0 :     else                                                       free_lane( disp, staging_lane );
     764           3 :   } else {
     765           0 :     unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     766           0 :   }
     767           3 :   block_pool_idx_release( disp->block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, disp->block_pool ) );
     768             : 
     769           3 :   return 0;
     770           3 : }
     771             : 
     772             : static void
     773             : add_edges( fd_rdisp_t           * disp,
     774             :            fd_rdisp_txn_t       * ele,
     775             :            fd_acct_addr_t const * addr,
     776             :            ulong                  addr_cnt,
     777             :            uint                   staging_lane,
     778             :            int                    writable,
     779             :            int                    update_score );
     780             : /* updates in_degree, edge_cnt_etc */
     781             : 
     782             : int
     783             : fd_rdisp_promote_block( fd_rdisp_t *          disp,
     784             :                         FD_RDISP_BLOCK_TAG_T  block_tag,
     785           6 :                         ulong                 staging_lane ) {
     786           6 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     787           6 :   per_lane_info_t * lane = disp->lanes + staging_lane;
     788           6 :   block_slist_t * sl = lane->block_ll;
     789             : 
     790           6 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     791           6 :   if( FD_UNLIKELY( block==NULL   ) ) return -1;
     792           6 :   if( FD_UNLIKELY( block->staged ) ) return -1;
     793             : 
     794           6 :   block->staged = 1;
     795           6 :   block->staging_lane = (uint)(staging_lane & 0x3);
     796           6 :   block->insert_ready = 1;
     797           6 :   block->schedule_ready = (uint)(1 & (disp->free_lanes >> staging_lane));
     798             : 
     799           6 :   if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
     800             : 
     801           6 :   if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) )  block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
     802           6 :   block_slist_ele_push_tail( sl, block, block_pool );
     803           6 :   uint linear_block_number = (uint)lane->linear_block_number++;
     804           6 :   block->linear_block_number = linear_block_number;
     805             : 
     806           6 :   unstaged_txn_ll_iter_t next;
     807           6 :   for( unstaged_txn_ll_iter_t iter = unstaged_txn_ll_iter_init( block->ll, disp->pool );
     808          24 :       !unstaged_txn_ll_iter_done( iter, block->ll, disp->pool );
     809          18 :       iter = next ) {
     810          18 :       next = unstaged_txn_ll_iter_next( iter, block->ll, disp->pool );
     811             : 
     812          18 :     fd_rdisp_txn_t      * ele = unstaged_txn_ll_iter_ele( iter, block->ll, disp->pool );
     813          18 :     fd_rdisp_unstaged_t * uns = disp->unstaged + unstaged_txn_ll_iter_idx( iter, block->ll, disp->pool );
     814          18 :     FD_TEST( ele->in_degree==IN_DEGREE_UNSTAGED );
     815             : 
     816          18 :     ele->in_degree    = 0U;
     817          18 :     ele->edge_cnt_etc = 0U;
     818             : 
     819          18 :     add_edges( disp, ele, uns->keys,                   uns->writable_cnt, (uint)staging_lane, 1, 0 );
     820          18 :     add_edges( disp, ele, uns->keys+uns->writable_cnt, uns->readonly_cnt, (uint)staging_lane, 0, 0 );
     821             : 
     822          18 :     ele->edge_cnt_etc |= (uint)staging_lane<<14;
     823          18 :     ele->edge_cnt_etc |= linear_block_number<<16;
     824             : 
     825          18 :     if( FD_UNLIKELY( ele->in_degree==0U ) ) {
     826           3 :       pending_prq_ele_t temp[1] = {{ .score = ele->score, .linear_block_number = linear_block_number, .txn_idx = (uint)(ele-disp->pool)}};
     827           3 :       pending_prq_insert( lane->pending, temp );
     828           3 :     }
     829          18 :   }
     830           6 :   unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     831             : 
     832           6 :   lane->inserted_cnt   += block->inserted_cnt;
     833           6 :   lane->dispatched_cnt += block->dispatched_cnt;
     834           6 :   lane->completed_cnt  += block->completed_cnt;
     835             : 
     836           6 :   return 0;
     837           6 : }
     838             : 
     839             : int
     840             : fd_rdisp_demote_block( fd_rdisp_t *          disp,
     841           3 :                        FD_RDISP_BLOCK_TAG_T  block_tag ) {
     842           3 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     843             : 
     844           3 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     845           3 :   if( FD_UNLIKELY(  block==NULL           ) ) return -1;
     846           3 :   if( FD_UNLIKELY( !block->staged         ) ) return -1;
     847           3 :   if( FD_UNLIKELY( !block->schedule_ready ) ) return -1;
     848           3 :   if( FD_UNLIKELY(  block->completed_cnt!=block->inserted_cnt ) ) FD_LOG_ERR(( "demote_block called with non-empty block" ));
     849           3 :   ulong staging_lane = block->staging_lane;
     850           3 :   block->staged = 0;
     851             : 
     852           3 :   per_lane_info_t * lane = disp->lanes + staging_lane;
     853           3 :   block_slist_t * sl = lane->block_ll;
     854             : 
     855           3 :   lane->inserted_cnt   -= block->inserted_cnt;
     856           3 :   lane->dispatched_cnt -= block->dispatched_cnt;
     857           3 :   lane->completed_cnt  -= block->completed_cnt;
     858             : 
     859           3 :   block->linear_block_number = (uint)disp->unstaged_lblk_num++;
     860             : 
     861             :   /* staged and schedule_ready means it must be the head of the staging lane */
     862           3 :   FD_TEST( block_slist_ele_peek_head( sl, block_pool )==block );
     863           3 :   block_slist_idx_pop_head( sl, block_pool );
     864             : 
     865           3 :   unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
     866             : 
     867           3 :   if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     868           3 :   else                                                       free_lane( disp, staging_lane );
     869           3 :   return 0;
     870           3 : }
     871             : 
     872             : int
     873             : fd_rdisp_rekey_block( fd_rdisp_t *           disp,
     874             :                       FD_RDISP_BLOCK_TAG_T   new_tag,
     875           0 :                       FD_RDISP_BLOCK_TAG_T   old_tag ) {
     876           0 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     877             : 
     878           0 :   if( FD_UNLIKELY(        NULL!= block_map_ele_query_const( disp->blockmap, &new_tag, NULL, block_pool ) ) ) return -1;
     879           0 :   fd_rdisp_blockinfo_t * block = block_map_ele_query      ( disp->blockmap, &old_tag, NULL, block_pool );
     880           0 :   if( FD_UNLIKELY(        NULL== block ) )                                                                   return -1;
     881             : 
     882           0 :   block->block = new_tag;
     883           0 :   block_map_ele_insert( disp->blockmap, block, block_pool );
     884           0 :   return 0;
     885           0 : }
     886             : 
     887             : 
     888             : /* "Registers" a reference to the account in info at transaction
     889             :    global_insert_cnt.  Returns the value of the EMA, which is an
     890             :    estimate of the probability that the next transaction also references
     891             :    the account.  This value does not matter for correctness, which is
     892             :    why floating point arithmetic is okay. */
     893             : static inline float
     894             : update_ema( acct_info_t * info,
     895     9186144 :             ulong         global_insert_cnt ) {
     896             : #if FD_RDISP_DISABLE_EMA
     897             :   (void)info;
     898             :   (void)global_insert_cnt;
     899             :   return 0.0f;
     900             : #else
     901    18372288 : #define ALPHA 0.005f
     902             :   /* The normal EMA update equation is
     903             :                 e_i = (alpha) * x_i + (1-alpha)*e_{i-1},
     904             :      where alpha is a constant in (0,1).  Let L be the last reference of
     905             :      the account, and G be the current global_insert_cnt value.  We know
     906             :      that e_L = ema_refs, and that for i in [L+1, G), x_i = 0.
     907             :      That means
     908             :                e_{G-1} =         (1-alpha)^(G-L-1) * ema_refs
     909             :                e_G     = alpha + (1-alpha)^(G-L)   * ema_refs
     910             :    */
     911             :   /* last_ref only captures the low 24 bits, so we guess its the highest
     912             :      value for them that would still make it less than
     913             :      global_insert_cnt.  Turns out, we can calculate that with just an
     914             :      AND. */
     915     9186144 :   ulong last_ref = (ulong)info->last_ref;
     916     9186144 :   ulong delta = (global_insert_cnt - last_ref) & 0xFFFFFFUL;
     917     9186144 :   float ema_refs = ALPHA + powf( 1.0f-ALPHA, (float)delta ) * info->ema_refs;
     918             : 
     919     9186144 :   info->ema_refs = ema_refs;
     920     9186144 :   info->last_ref = (uint)(global_insert_cnt & 0xFFFFFFUL);
     921     9186144 : #undef ALPHA
     922             : 
     923     9186144 :   return ema_refs;
     924     9186144 : #endif
     925     9186144 : }
     926             : 
     927             : static void
     928             : add_edges( fd_rdisp_t           * disp,
     929             :            fd_rdisp_txn_t       * ele,
     930             :            fd_acct_addr_t const * addrs,
     931             :            ulong                  addr_cnt,
     932             :            uint                   lane,
     933             :            int                    writable,
     934     6998862 :            int                    update_score ) {
     935             : 
     936     6998862 :   ulong acct_idx = (ele->edge_cnt_etc & 0x7FU) +     ((ele->edge_cnt_etc>>7) & 0x7FU);
     937     6998862 :   ulong edge_idx = (ele->edge_cnt_etc & 0x7FU) + 3UL*((ele->edge_cnt_etc>>7) & 0x7FU);
     938             : 
     939    16184931 :   for( ulong i=0UL; i<addr_cnt; i++ ) {
     940     9186069 :     fd_acct_addr_t const * addr = addrs+i;
     941     9186069 :     acct_info_t * ai = NULL;
     942             : 
     943             :     /* Step 1: lookup the pubkey */
     944     9186069 :     ulong idx = acct_map_idx_query( disp->acct_map, addr, ULONG_MAX, disp->acct_pool );
     945     9186069 :     if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
     946     1873431 :       idx = acct_map_idx_query( disp->free_acct_map, addr, ULONG_MAX, disp->acct_pool );
     947     1873431 :       if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
     948             :         /* The acct pool is sized so that the list cannot be empty at this
     949             :            point.  However, the element at the head might be the free
     950             :            map with a different pubkey. */
     951      197907 :         idx = free_dlist_idx_peek_head( disp->free_acct_dlist, disp->acct_pool );
     952      197907 :         ai = disp->acct_pool+idx;
     953             : 
     954             :         /* CACHED -> FREE transition */
     955      197907 :         if( FD_LIKELY( ai->next!=0U ) ) {
     956      158157 :           acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
     957      158157 :         }
     958             : 
     959             :         /* FREE -> ACTIVE transition */
     960      197907 :         ai->key      = *addr;
     961      197907 :         ai->flags    = 0U;
     962      197907 :         ai->last_ref = 0U;
     963      197907 :         ai->ema_refs = 0.0f;
     964     1675524 :       } else {
     965             :         /* CACHED -> ACTIVE transition */
     966     1675524 :         ai = disp->acct_pool+idx;
     967     1675524 :         ai->flags    = 0U; /* FIXME: unnecessary */
     968     1675524 :         acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
     969     1675524 :       }
     970             :       /* In either case, at this point, the element is not in any map
     971             :          but is in free_acct_dlist.  It has the right key. last_ref, and
     972             :          ema_refs are valid. flags is 0. */
     973     1873431 :       free_dlist_idx_remove( disp->free_acct_dlist, idx, disp->acct_pool );
     974     1873431 :       memset( ai->last_reference, '\0', sizeof(ai->last_reference) );
     975     1873431 :       acct_map_idx_insert( disp->acct_map, idx, disp->acct_pool );
     976     1873431 :     }
     977     9186069 :     ai = disp->acct_pool+idx;
     978             :     /* At this point, in all cases, the acct_info is now in the ACTIVE
     979             :        state.  It's in acct_map, not in free_acct_map, and not in
     980             :        free_acct_dlist. */
     981             : 
     982             :     /* Assume that transactions are drawn randomly from some large
     983             :        distribution of potential transactions.  We want to estimate the
     984             :        expected value of the probability that this transaction conflicts
     985             :        with the next transaction that is sampled.  Two transactions
     986             :        conflict if they conflict on any account, and in general, they
     987             :        conflict if they both reference the same account, unless both
     988             :        this transaction and the next one only read it.  We don't have
     989             :        read/write info, so the best guess we have is that the next
     990             :        transaction does the same thing to the account that this one
     991             :        does.  That means we only really care about the accounts that
     992             :        this transaction writes to.  Label those a_1, a_2, ..., a_i, and
     993             :        suppose the probability that the next transaction references a_i
     994             :        is p_i (determined using the EMA).  Now then, assuming accounts
     995             :        are independent (which is false, but whatever), then the
     996             :        probability that the next transaction does not conflict with this
     997             :        account is:
     998             :                      (1-p_1) * (1-p_2) * (1 - p_i).
     999             : 
    1000             :        Since for the treap, a lower value means we'll schedule it
    1001             :        earlier, we'll use the probability of non-conflict as the
    1002             :        fractional part of the score. */
    1003     9186069 :     float score_change = 1.0f - update_ema( ai, disp->global_insert_cnt );
    1004     9186069 :     ele->score *= fd_float_if( writable & update_score, score_change, 1.0f );
    1005             : 
    1006             :     /* Step 2: add edge. There are 4 cases depending on whether this is
    1007             :        a writer or not and whether the previous reference was a writer
    1008             :        or not. */
    1009     9186069 :     int      _ignore;
    1010             : 
    1011     9186069 :     edge_t   ref_to_pa = ai->last_reference[ lane ];
    1012     9186069 :     edge_t   ref_to_me = (uint)((ulong)((ele - disp->pool)<<8) | acct_idx);
    1013     9186069 :     edge_t * pa        = FOLLOW_EDGE( disp->pool, ai->last_reference[ lane ], _ignore );
    1014     9186069 :     edge_t * me        = ele->edges + edge_idx;
    1015             : 
    1016             :     /* In the case that this is the first txn in the DAG, pa will point
    1017             :        to edges[0] of the sentinel element, pool[0].  We don't care
    1018             :        about what is stored there, so just set it up as a dummy element
    1019             :        to make the rest of the code work properly in this case too.  If
    1020             :        this is a read, we want me->sibli to ref_to_me in case 4; if this
    1021             :        is a write, we want to set me->sibli to 0 in case 2, but we need
    1022             :        to make sure pb==pa. */
    1023     9186069 :     disp->pool->edges[0] = (1U<<31) | (uint)idx;
    1024     9186069 :     disp->pool->edges[1] = fd_uint_if( writable, 0U, ref_to_me );
    1025     9186069 :     disp->pool->edges[2] = fd_uint_if( writable, 0U, ref_to_me );
    1026             : 
    1027     9186069 :     int flags = ai->flags;
    1028             : 
    1029     9186069 :     if( writable ) { /* also should be known at compile time */
    1030     3588582 :       if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
    1031             :         /* Case 1: w-w. The parent is the special last pointer.  Point
    1032             :            the parent to me, and set me to the last pointer. */
    1033     1171821 :         *me = *pa;
    1034     1171821 :         *pa = ref_to_me;
    1035     2416761 :       } else {
    1036             :         /* Case 2: r-w. This is the tricky case because there could be
    1037             :            multiple readers.  We need to set all the last readers' child
    1038             :            pointers to me. */
    1039     2416761 :         *me = *pa;
    1040     2416761 :         *pa = ref_to_me;
    1041     2416761 :         edge_t * pb = FOLLOW_EDGE( disp->pool, pa[1], _ignore );
    1042             :         /* Intentionally skip the first in_degree increment, because it
    1043             :            will be done later */
    1044     2810415 :         while( pb!=pa ) {
    1045      393654 :           *pb = ref_to_me;
    1046      393654 :           ele->in_degree++;
    1047      393654 :           pb = FOLLOW_EDGE( disp->pool, pb[1], _ignore );
    1048      393654 :         }
    1049     2416761 :         flags |= ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) | ACCT_INFO_FLAG_ANY_WRITERS( lane );
    1050     2416761 :       }
    1051     5597487 :     } else {
    1052     5597487 :       if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
    1053             :         /* Case 3: w-r. Similar to case 1, but need to initialize my
    1054             :            next and prev sibling pointers too. */
    1055      423078 :         *me = *pa;
    1056      423078 :         *pa = ref_to_me;
    1057      423078 :         me[1] = ref_to_me; /* next */
    1058      423078 :         me[2] = ref_to_me; /* prev */
    1059      423078 :         flags &= ~(int)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ); /* clear bit */
    1060     5174409 :       } else {
    1061             :         /* Case 4: r-r. Add myself as a sibling instead of a child */
    1062     5174409 :         *me = *pa;
    1063     5174409 :         FOLLOW_EDGE( disp->pool, pa[1], _ignore )[2] = ref_to_me;  /* prev->next->prev = me   */
    1064     5174409 :         me[1] = pa[1];                                             /* me->next   = prev->next */
    1065     5174409 :         me[2] = ref_to_pa;                                         /* me->prev   = prev       */
    1066     5174409 :         pa[1] = ref_to_me;                                         /* prev->next = me         */
    1067     5174409 :       }
    1068     5597487 :     }
    1069             : 
    1070             :     /* Step 3: Update the final values */
    1071             :     /* In general, we want to increment the in_degree unless this
    1072             :        transaction is the first to reference this account.  The
    1073             :        exception is that if this account has only readers, including
    1074             :        this transaction, we don't want to increment the in_degree
    1075             :        either.  At this point, we can tell if that is the case based on
    1076             :        ANY_WRITERS.  */
    1077     9186069 :     ele->in_degree += (uint)((ai->last_reference[ lane ]!=0U) & !!(flags & ACCT_INFO_FLAG_ANY_WRITERS( lane )));
    1078     9186069 :     ai->last_reference[ lane ] = ref_to_me;
    1079     9186069 :     ai->flags                  = (uchar)flags;
    1080     9186069 :     edge_idx += fd_uint_if( writable, 1U, 3U );
    1081     9186069 :     acct_idx++;
    1082     9186069 :   }
    1083     6998862 :   ele->edge_cnt_etc += (uint)addr_cnt<<fd_int_if( writable, 0, 7 ); /* Can't overflow by construction */
    1084     6998862 : }
    1085             : 
    1086             : 
    1087             : /* should be called with all writable accounts first */
    1088             : static void
    1089             : add_unstaged_edges( fd_rdisp_t * disp,
    1090             :                     fd_rdisp_txn_t       * ele,
    1091             :                     fd_rdisp_unstaged_t  * unstaged,
    1092             :                     fd_acct_addr_t const * addr,
    1093             :                     ulong                  addr_cnt,
    1094             :                     int                    writable,
    1095         216 :                     int                    update_score ) {
    1096         216 :   ulong base_idx = unstaged->writable_cnt+unstaged->readonly_cnt;
    1097         216 :   FD_TEST( !writable || unstaged->readonly_cnt==0U );
    1098         366 :   for( ulong i=0UL; i<addr_cnt; i++ ) {
    1099         150 :     unstaged->keys[ base_idx+i ] = addr[i];
    1100         150 :     if( FD_LIKELY( update_score ) ) {
    1101         150 :       ulong idx = acct_map_idx_query( disp->acct_map, addr+i, ULONG_MAX, disp->acct_pool );
    1102         150 :       if( FD_UNLIKELY( idx==ULONG_MAX ) ) idx = acct_map_idx_query( disp->free_acct_map, addr+i, ULONG_MAX, disp->acct_pool );
    1103             :       /* since these are unstaged, we don't bother moving accounts
    1104             :          around */
    1105         150 :       float score_change = 1.0f;
    1106         150 :       if( FD_LIKELY( idx!=ULONG_MAX ) ) score_change = 1.0f - update_ema( disp->acct_pool+idx, disp->global_insert_cnt );
    1107         150 :       ele->score *= fd_float_if( writable & update_score, score_change, 1.0f );
    1108         150 :     }
    1109         150 :   }
    1110         216 :   *(fd_ptr_if( writable, &(unstaged->writable_cnt), &(unstaged->readonly_cnt) ) ) += (uint)addr_cnt;
    1111         216 : }
    1112             : 
    1113             : ulong
    1114             : fd_rdisp_add_txn( fd_rdisp_t          *  disp,
    1115             :                   FD_RDISP_BLOCK_TAG_T   insert_block,
    1116             :                   fd_txn_t const       * txn,
    1117             :                   uchar const          * payload,
    1118             :                   fd_acct_addr_t const * alts,
    1119     1166511 :                   int                    serializing ) {
    1120             : 
    1121     1166511 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &insert_block, NULL, disp->block_pool );
    1122     1166511 :   if( FD_UNLIKELY( !block || !block->insert_ready ) ) return 0UL;
    1123     1166508 :   if( FD_UNLIKELY( !pool_free( disp->pool       ) ) ) return 0UL;
    1124             : 
    1125     1166508 :   ulong idx = pool_idx_acquire( disp->pool );
    1126     1166508 :   fd_rdisp_txn_t * rtxn = disp->pool + idx;
    1127     1166508 :   if( FD_UNLIKELY( rtxn->in_degree!=IN_DEGREE_FREE ) ) FD_LOG_CRIT(( "pool[%lu].in_degree==%u but free", idx, rtxn->in_degree ));
    1128             : 
    1129     1166508 :   fd_acct_addr_t const * imm_addrs = fd_txn_get_acct_addrs( txn, payload );
    1130             : 
    1131     1166508 :   if( FD_UNLIKELY( !block->staged ) ) {
    1132          36 :     rtxn->in_degree = IN_DEGREE_UNSTAGED;
    1133          36 :     rtxn->score     = 0.999f;
    1134             : 
    1135          36 :     fd_rdisp_unstaged_t * unstaged = disp->unstaged + idx;
    1136          36 :     unstaged->block = insert_block;
    1137          36 :     unstaged->writable_cnt = 0U;
    1138          36 :     unstaged->readonly_cnt = 0U;
    1139             : 
    1140          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs,
    1141          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER        ), 1, 1 );
    1142          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
    1143          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), 1, 1 );
    1144          36 :     if( FD_LIKELY( alts ) )
    1145          36 :       add_unstaged_edges( disp, rtxn, unstaged, alts,
    1146          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),           1, 1 );
    1147          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
    1148          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER        ), 0, 1 );
    1149          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
    1150          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), 0, 1 );
    1151          36 :     if( FD_LIKELY( alts ) )
    1152          36 :       add_unstaged_edges( disp, rtxn, unstaged, alts   +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
    1153          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ),           0, 1 );
    1154             : 
    1155          36 :     unstaged_txn_ll_ele_push_tail( block->ll, rtxn, disp->pool );
    1156     1166472 :   } else {
    1157     1166472 :     uint lane = block->staging_lane;
    1158             : 
    1159     1166472 :     rtxn->in_degree    = 0U;
    1160     1166472 :     rtxn->score        = 0.999f;
    1161     1166472 :     rtxn->edge_cnt_etc = (block->linear_block_number<<16) | (lane<<14);
    1162             : 
    1163     1166472 :     add_edges( disp, rtxn, imm_addrs,
    1164     1166472 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER        ), lane, 1, 1 );
    1165     1166472 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
    1166     1166472 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), lane, 1, 1 );
    1167     1166472 :     if( FD_LIKELY( alts ) )
    1168     1166469 :       add_edges( disp, rtxn, alts,
    1169     1166469 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),           lane, 1, 1 );
    1170     1166472 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
    1171     1166472 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER        ), lane, 0, 1 );
    1172     1166472 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
    1173     1166472 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), lane, 0, 1 );
    1174     1166472 :     if( FD_LIKELY( alts ) )
    1175     1166469 :       add_edges( disp, rtxn, alts   +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
    1176     1166469 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ),           lane, 0, 1 );
    1177     1166472 :   }
    1178             : 
    1179     1166508 :   if( FD_UNLIKELY( serializing | block->last_insert_was_serializing ) ) {
    1180           6 :     block->last_serializing = block->inserted_cnt;
    1181           6 :   }
    1182     1166508 :   block->last_insert_was_serializing = (uint)!!serializing;
    1183     1166508 :   rtxn->score += (float)block->last_serializing;
    1184             : 
    1185     1166508 :   block->inserted_cnt++;
    1186     1166508 :   disp->global_insert_cnt++;
    1187             : 
    1188     1166508 :   if( FD_LIKELY( (block->staged) & (rtxn->in_degree==0U) ) ) {
    1189      575862 :     pending_prq_ele_t temp[1] = {{ .score = rtxn->score, .linear_block_number = block->linear_block_number, .txn_idx = (uint)idx }};
    1190      575862 :     pending_prq_insert( disp->lanes[ block->staging_lane ].pending, temp );
    1191      575862 :   }
    1192             : 
    1193     1166508 :   return idx;
    1194     1166508 : }
    1195             : 
    1196             : ulong
    1197             : fd_rdisp_get_next_ready( fd_rdisp_t           * disp,
    1198     2023440 :                          FD_RDISP_BLOCK_TAG_T   schedule_block ) {
    1199     2023440 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &schedule_block, NULL, disp->block_pool );
    1200     2023440 :   if( FD_UNLIKELY( !block || !block->schedule_ready ) ) return 0UL;
    1201             : 
    1202     2023425 :   ulong idx;
    1203     2023425 :   if( FD_LIKELY( block->staged ) ) {
    1204     2023398 :     ulong staging_lane = block->staging_lane;
    1205     2023398 :     per_lane_info_t * l = disp->lanes + staging_lane;
    1206             : 
    1207     2023398 :     if( FD_UNLIKELY( !pending_prq_cnt( l->pending )                                ) ) return 0UL;
    1208     1166496 :     if( FD_UNLIKELY( l->pending->linear_block_number != block->linear_block_number ) ) return 0UL;
    1209             :     /* e.g. when completed_cnt==0, we can accept any score below 1.0 */
    1210     1166490 :     if( FD_UNLIKELY( l->pending->score>=(float)(block->completed_cnt+1U)           ) ) return 0UL;
    1211     1166490 :     idx = l->pending->txn_idx;
    1212     1166490 :     pending_prq_remove_min( l->pending );
    1213     1166490 :     disp->pool[ idx ].in_degree = IN_DEGREE_DISPATCHED;
    1214     1166490 :   } else {
    1215          27 :     if( FD_UNLIKELY( block->dispatched_cnt!=block->completed_cnt       ) ) return 0UL;
    1216          24 :     if( FD_UNLIKELY( unstaged_txn_ll_is_empty( block->ll, disp->pool ) ) ) return 0UL;
    1217          18 :     idx = unstaged_txn_ll_idx_peek_head( block->ll, disp->pool );
    1218          18 :     disp->pool[ idx ].in_degree = IN_DEGREE_UNSTAGED_DISPATCHED;
    1219          18 :   }
    1220     1166508 :   block->dispatched_cnt++;
    1221             : 
    1222     1166508 :   return idx;
    1223     2023425 : }
    1224             : 
    1225             : void
    1226             : fd_rdisp_complete_txn( fd_rdisp_t * disp,
    1227             :                        ulong        txn_idx,
    1228     1166511 :                        int          reclaim ) {
    1229             : 
    1230     1166511 :   fd_rdisp_txn_t * rtxn = disp->pool + txn_idx;
    1231     1166511 :   fd_rdisp_blockinfo_t * block = NULL;
    1232             : 
    1233     1166511 :   if( FD_UNLIKELY( rtxn->in_degree==IN_DEGREE_UNSTAGED_DISPATCHED ) ) {
    1234             :     /* Unstaged */
    1235          18 :     block = block_map_ele_query( disp->blockmap, &disp->unstaged[ txn_idx ].block, NULL, disp->block_pool );
    1236          18 :     FD_TEST( rtxn==unstaged_txn_ll_ele_peek_head( block->ll, disp->pool ) );
    1237          18 :     unstaged_txn_ll_ele_pop_head( block->ll, disp->pool );
    1238          18 :     block->completed_cnt++;
    1239     1166493 :   } else if( FD_LIKELY( rtxn->in_degree==IN_DEGREE_DISPATCHED ) ) {
    1240             :     /* Staged */
    1241     1166490 :     ulong w_cnt = (rtxn->edge_cnt_etc    ) & 0x7FU;
    1242     1166490 :     ulong r_cnt = (rtxn->edge_cnt_etc>> 7) & 0x7FU;
    1243     1166490 :     ulong lane  = (rtxn->edge_cnt_etc>>14) & 0x3U;
    1244     1166490 :     uint  tail_linear_block_num = (uint)(disp->lanes[lane].linear_block_number);
    1245     1166490 :     ulong edge_idx = 0UL;
    1246    10352559 :     for( ulong i=0UL; i<w_cnt+r_cnt; i++ ) {
    1247     9186069 :       edge_t const * e = rtxn->edges+edge_idx;
    1248     9186069 :       edge_t const  e0 = *e;
    1249     9186069 :       edge_t ref_to_me = (uint)((txn_idx<<8) | i);
    1250             : 
    1251             :       /* To help with explanations, consider the following DAG:
    1252             : 
    1253             :            --> B --\   --> E --\
    1254             :           /         V /         V
    1255             :          A           D         (G)
    1256             :           \         ^ \         ^
    1257             :            --> C --/   --> F --/     */
    1258             : 
    1259     9186069 :       if( FD_UNLIKELY( EDGE_IS_LAST( e0 ) ) ) {
    1260     6682734 :         ulong acct_idx = e0 & 0x7FFFFFFFU;
    1261     6682734 :         acct_info_t * ai = disp->acct_pool + acct_idx;
    1262             :         /* If this is a writer, e.g. node G above, we know it's the last
    1263             :            one, so we can clear last_reference.
    1264             :            If this is a reader, e.g. node E and F above, if node G
    1265             :            didn't exist, we need to check if it's the last one
    1266             :            (me==me->next).  If so, we can clear last_reference.  If not,
    1267             :            we need to delete this node from the linked list */
    1268     6682734 :         if( edge_idx<w_cnt || e[1]==ref_to_me ) {
    1269     4103505 :           ai->last_reference[ lane ] = 0U;
    1270     4103505 :           ai->flags = (uchar)(ai->flags & (~(ACCT_INFO_FLAG_ANY_WRITERS( lane ) | ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ))));
    1271     4103505 :         } else {
    1272     2579229 :           int _ignore;
    1273     2579229 :           FOLLOW_EDGE( disp->pool, e[1], _ignore )[2] = e[2];  /* me->next->prev = me->prev */
    1274     2579229 :           FOLLOW_EDGE( disp->pool, e[2], _ignore )[1] = e[1];  /* me->prev->next = me->next */
    1275     2579229 :           ai->last_reference[ lane ]= fd_uint_if( ai->last_reference[ lane ]==ref_to_me, e[1], ai->last_reference[ lane ] );
    1276     2579229 :         }
    1277             : 
    1278             :         /* Potentially transition from ACTIVE -> CACHED */
    1279     6682734 :         if( FD_UNLIKELY( (ai->last_reference[ 0 ]==0U)&(ai->last_reference[ 1 ]==0U)&
    1280     6682734 :                          (ai->last_reference[ 2 ]==0U)&(ai->last_reference[ 3 ]==0U) ) ) {
    1281     1873431 :           ai->flags = 0;
    1282     1873431 :           acct_map_idx_remove_fast( disp->acct_map,        acct_idx, disp->acct_pool );
    1283     1873431 :           acct_map_idx_insert     ( disp->free_acct_map,   acct_idx, disp->acct_pool );
    1284     1873431 :           free_dlist_idx_push_tail( disp->free_acct_dlist, acct_idx, disp->acct_pool );
    1285     1873431 :         }
    1286     6682734 :       } else {
    1287     2503335 :         int child_is_writer;
    1288             : 
    1289     2503335 :         edge_t next_e = e0;
    1290     2503335 :         edge_t const * child_edge;
    1291     2713623 :         while( 1 ) {
    1292             :           /* This loop first traverses the me->child link, and then
    1293             :              traverses any sibling links.  For example, in the case that
    1294             :              we're completing node D above, the first child_txn is E,
    1295             :              and the second child_txn is F. */
    1296     2713623 :           /*            */ child_edge = FOLLOW_EDGE(     disp->pool, next_e, child_is_writer );
    1297     2713623 :           fd_rdisp_txn_t * child_txn  = FOLLOW_EDGE_TXN( disp->pool, next_e                  );
    1298             : 
    1299             :           /* Sanity test */
    1300     2713623 :           FD_TEST( child_txn->in_degree>0U                   );
    1301     2713623 :           FD_TEST( child_txn->in_degree<IN_DEGREE_DISPATCHED );
    1302             : 
    1303     2713623 :           if( FD_UNLIKELY( 0U==(--(child_txn->in_degree)) ) ) {
    1304             :             /* We need an operation something like
    1305             :                fd_frag_meta_ts_decomp. child_txn has the low 16 bits,
    1306             :                and tail_linear_block_num has the full 32 bits, except
    1307             :                for tail_linear_block_num refers to a block < block_depth
    1308             :                later.  Since block_depth<2^16, that means we can resolve
    1309             :                this unambiguously.  Basically, we copy the high 16 bits
    1310             :                frorm tail_linear_block_num unless that would make
    1311             :                linear_block_num larger than tail_linear_block_num, in
    1312             :                which case, we subtract 2^16. */
    1313      590625 :             uint low_16_bits = child_txn->edge_cnt_etc>>16;
    1314      590625 :             uint linear_block_num = ((tail_linear_block_num & ~0xFFFFU) | low_16_bits) - (uint)((low_16_bits>(tail_linear_block_num&0xFFFFU))<<16);
    1315      590625 :             pending_prq_ele_t temp[1] = {{ .score               = child_txn->score,
    1316      590625 :                                            .linear_block_number = linear_block_num,
    1317      590625 :                                            .txn_idx             = (uint)(child_txn-disp->pool) }};
    1318      590625 :             pending_prq_insert( disp->lanes[ lane ].pending, temp );
    1319      590625 :           }
    1320     2713623 :           if( child_is_writer || child_edge[1]==e0 ) break;
    1321      210288 :           next_e = child_edge[1];
    1322      210288 :         }
    1323             :         /* In the case that the completed transaction is a reader, say B
    1324             :            or C above, it seems like we should need to remove it from
    1325             :            the doubly linked list, but we actually don't.  The times
    1326             :            that we need to read the sibling pointers are:
    1327             :             1. Completing the writer before a reader (e.g. completing A)
    1328             :             2. Completing a reader that's the last in the DAG (e.g.
    1329             :                completing E/F if G didn't exist)
    1330             :             3. Adding another reader to the same set of readers (e.g. if
    1331             :                G were added as a reader instead of a writer)
    1332             :             4. Adding a writer after a set of readers (e.g. adding G).
    1333             : 
    1334             :            Supposing that the completed transaction is a reader, since
    1335             :            we checked EDGE_IS_LAST( e0 ), we know that there is at least
    1336             :            one writer that follows this reader, e.g. D above.
    1337             : 
    1338             :            And so none of these reason can apply to this group of readers:
    1339             :             1. Completing B or C implies that A has already completed,
    1340             :                so it can't complete again.
    1341             :             2. We know that we're not in that case because we checked
    1342             :                EDGE_IS_LAST( e0 ), and if we're not last now, we cannot
    1343             :                become last later, because the growth happens in the
    1344             :                other direction.
    1345             :             3. Similarly, because we checked EDGE_IS_LAST, any future
    1346             :                additions of readers won't be to this group of readers.
    1347             :             4. Similarly, we know that there already is a writer that
    1348             :                follows this group of readers, so a later writer would
    1349             :                not read this set of readers.
    1350             : 
    1351             :            So then, we don't need to deal with the sibling edges.  The
    1352             :            fact that we only need to do it in one case almost calls into
    1353             :            question whether we need to maintain the whole circular
    1354             :            system in the first place, and whether we could get away with
    1355             :            a reference count or something instead, but it's important in
    1356             :            one critical case: suppose we add a bunch of readers, then
    1357             :            some of them complete, and then we add a writer (case 4
    1358             :            above).  We need to be able to enumerate the nodes in the DAG
    1359             :            that have not yet completed, and we need to be able to remove
    1360             :            them from that set in O(1).  Those two requirements don't
    1361             :            leave us with many alternatives besides a doubly linked list. */
    1362             : 
    1363     2503335 :         if( FD_UNLIKELY( EDGE_IS_LAST( *child_edge ) ) ) {
    1364             :           /* For example, either:
    1365             :              1. completing D when if G didn't exist
    1366             :              2. completing E or F with G in the DAG
    1367             : 
    1368             :              After completing this transaction, there's either 0 writers
    1369             :              left (case 1) or 1 writer left (case 2) in the DAG. and if
    1370             :              there is one, it is the last reference.
    1371             : 
    1372             :              Either way, we want to set ANY_WRITERS to
    1373             :              LAST_REF_WAS_WRITE. */
    1374     1965492 :           acct_info_t * ai = disp->acct_pool + (*child_edge & 0x7FFFFFFFU);
    1375     1965492 :           ulong flags = ai->flags;
    1376     1965492 :           flags &= ~(ulong)ACCT_INFO_FLAG_ANY_WRITERS( lane );
    1377     1965492 :           flags |= (flags & (ulong)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ))<<1;
    1378     1965492 :           ai->flags = (uchar)flags;
    1379     1965492 :         }
    1380     2503335 :       }
    1381     9186069 :       edge_idx += fd_ulong_if( i<w_cnt, 1UL, 3UL );
    1382     9186069 :     }
    1383     1166490 :     block = block_slist_ele_peek_head( disp->lanes[ lane ].block_ll, disp->block_pool );
    1384     1166490 :     block->completed_cnt++;
    1385     1166490 :   } else if( FD_LIKELY( rtxn->in_degree==IN_DEGREE_ZOMBIE ) ) {
    1386           3 :     FD_TEST( reclaim );
    1387           3 :     block = block_pool_ele( disp->block_pool, rtxn->block_idx );
    1388           3 :     zombie_dlist_ele_remove( block->zombie_list, rtxn, disp->pool );
    1389             :     /* Fall through to the pool release branch below. */
    1390           3 :   } else {
    1391           0 :     FD_LOG_CRIT(( "completed un-dispatched transaction %lu", txn_idx ));
    1392           0 :   }
    1393     1166511 :   if( reclaim ) {
    1394             :     /* For testing purposes, to make sure we don't read a completed
    1395             :        transaction, we can clobber the memory. */
    1396             :     /* memset( disp->pool+txn_idx, '\xCC', sizeof(fd_rdisp_txn_t) ); */
    1397     1166508 :     rtxn->in_degree = IN_DEGREE_FREE;
    1398     1166508 :     pool_idx_release( disp->pool, txn_idx );
    1399     1166508 :   } else {
    1400           3 :     rtxn->in_degree = IN_DEGREE_ZOMBIE;
    1401           3 :     zombie_dlist_ele_push_tail( block->zombie_list, rtxn, disp->pool );
    1402           3 :     rtxn->block_idx = (uint)block_pool_idx( disp->block_pool, block );
    1403           3 :   }
    1404     1166511 : }
    1405             : 
    1406             : 
    1407             : ulong
    1408             : fd_rdisp_staging_lane_info( fd_rdisp_t           const * disp,
    1409          36 :                             fd_rdisp_staging_lane_info_t out_sched[ static 4 ] ) {
    1410         180 :   for( ulong i=0UL; i<4UL; i++ ) {
    1411         144 :     if( !(disp->free_lanes & (1<<i) ) ) {
    1412          51 :       block_slist_t const * sl = disp->lanes[ i ].block_ll;
    1413          51 :       out_sched[ i ].insert_ready_block   = block_slist_ele_peek_tail( sl, disp->block_pool )->block;
    1414          51 :       out_sched[ i ].schedule_ready_block = block_slist_ele_peek_head( sl, disp->block_pool )->block;
    1415          51 :     }
    1416         144 :   }
    1417          36 :   return 0xFUL & ~(ulong)disp->free_lanes;
    1418          36 : }
    1419             : 
    1420             : void
    1421             : fd_rdisp_verify( fd_rdisp_t const * disp,
    1422      776976 :                  uint             * scratch ) {
    1423      776976 :   ulong acct_depth  = disp->depth*MAX_ACCT_PER_TXN;
    1424      776976 :   ulong block_depth = disp->block_depth;
    1425      776976 :   FD_TEST( 0==acct_map_verify ( disp->acct_map,      acct_depth+1UL,  disp->acct_pool ) );
    1426      776976 :   FD_TEST( 0==acct_map_verify ( disp->free_acct_map, acct_depth+1UL,  disp->acct_pool ) );
    1427      776976 :   FD_TEST( 0==block_map_verify( disp->blockmap,     block_depth+1UL, disp->block_pool ) );
    1428             : 
    1429             :   /* Check all the in degree counts are right */
    1430      776976 :   memset( scratch, '\0', sizeof(uint)*(disp->depth+1UL) );
    1431      776976 :   scratch[ 0 ] = UINT_MAX;
    1432   233866176 :   for( ulong j=1UL; j<disp->depth+1UL; j++ ) {
    1433   233089200 :     fd_rdisp_txn_t const * rtxn = disp->pool+j;
    1434   233089200 :     if( rtxn->in_degree==IN_DEGREE_FREE ) { scratch[ j ]=UINT_MAX; continue; }
    1435             : 
    1436    57500856 :     if( (rtxn->in_degree==IN_DEGREE_UNSTAGED_DISPATCHED) |
    1437    57500856 :         (rtxn->in_degree==IN_DEGREE_UNSTAGED)            |
    1438    57500856 :         (rtxn->in_degree==IN_DEGREE_ZOMBIE) ) continue;
    1439             : 
    1440    57500829 :     ulong w_cnt = (rtxn->edge_cnt_etc    ) & 0x7FU;
    1441    57500829 :     ulong r_cnt = (rtxn->edge_cnt_etc>> 7) & 0x7FU;
    1442    57500829 :     ulong edge_idx = 0UL;
    1443             : 
    1444   721543131 :     for( ulong i=0UL; i<w_cnt+r_cnt; i++ ) {
    1445   664042302 :       edge_t const * e = rtxn->edges+edge_idx;
    1446   664042302 :       edge_t const  e0 = *e;
    1447             : 
    1448   664042302 :       edge_idx += fd_ulong_if( i<w_cnt, 1UL, 3UL );
    1449             : 
    1450   664042302 :       if( FD_UNLIKELY( EDGE_IS_LAST( e0 ) ) ) continue;
    1451             : 
    1452   144900756 :       edge_t next_e = e0;
    1453   144900756 :       edge_t const * child_edge;
    1454   144900756 :       edge_t last_e = 0U;
    1455   151021773 :       while( 1 ) {
    1456   151021773 :         int child_is_writer;
    1457             :         /* This loop first traverses the me->child link, and then
    1458             :            traverses any sibling links.  For example, in the case that
    1459             :            we're completing node D above, the first child_txn is E,
    1460             :            and the second child_txn is F. */
    1461   151021773 :         /*            */ child_edge = FOLLOW_EDGE(     disp->pool, next_e, child_is_writer );
    1462   151021773 :         fd_rdisp_txn_t * child_txn  = FOLLOW_EDGE_TXN( disp->pool, next_e                  );
    1463   151021773 :         scratch[ child_txn - disp->pool ]++;
    1464   151021773 :         if( child_is_writer || child_edge[1]==e0 ) break;
    1465     6121017 :         if( last_e!=0U ) FD_TEST( child_edge[2]==last_e );
    1466     6121017 :         last_e = next_e;
    1467     6121017 :         next_e = child_edge[1];
    1468     6121017 :         FD_TEST( next_e>=0x100U );
    1469     6121017 :       }
    1470   144900756 :     }
    1471    57500829 :   }
    1472   233866176 :   for( ulong i=1UL; i<disp->depth+1UL; i++ ) {
    1473   233089200 :     FD_TEST( scratch[ i ]==UINT_MAX ||
    1474   233089200 :              disp->pool[ i ].in_degree==IN_DEGREE_DISPATCHED ||
    1475   233089200 :              disp->pool[ i ].in_degree==IN_DEGREE_UNSTAGED ||
    1476   233089200 :              disp->pool[ i ].in_degree==IN_DEGREE_UNSTAGED_DISPATCHED ||
    1477   233089200 :              disp->pool[ i ].in_degree==IN_DEGREE_ZOMBIE ||
    1478   233089200 :              disp->pool[ i ].in_degree==scratch[ i ] );
    1479   233089200 :   }
    1480      776976 : }
    1481             : 
    1482           9 : void * fd_rdisp_leave ( fd_rdisp_t * disp ) { return disp; }
    1483           9 : void * fd_rdisp_delete( void * mem        ) { return  mem; }

Generated by: LCOV version 1.14