LCOV - code coverage report
Current view: top level - discof/replay - fd_rdisp.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 637 651 97.8 %
Date: 2026-06-29 05:51:35 Functions: 22 23 95.7 %

          Line data    Source code
       1             : #include "fd_rdisp.h"
       2             : #include <math.h> /* for the EMA */
       3             : 
       4             : /* The conflict graph that this file builds is not a general DAG, but
       5             :    the union of several special account-conflict graphs.  Each
       6             :    account-conflict graph has special structure:
       7             :                            ---> 3 --
       8             :                           /          \
       9             :                          /            v
      10             :                1  ---> 2 -----> 4 --> 6 ----> 7
      11             :                         \             ^
      12             :                          \           /
      13             :                           ----> 5 --
      14             : 
      15             :    That is, the graph is almost a line, but may have fan-out and fan-in
      16             :    regions.  The tricky part about representing a graph like this
      17             :    without dynamic memory allocation is that nodes may have arbitrary
      18             :    in-degree and out-degree.  Thus, we use a pretty standard trick and
      19             :    use sibling pointers, denoted below with dotted lines.  Each node
      20             :    maintains at most one successor pointer and at most one sibling
      21             :    pointer.  Although not shown below, the sibling pointers are
      22             :    circularly linked, so 5's sibling is 3.  Additionally, though not
      23             :    shown below, the child of the last node (7) stores information about
      24             :    what account address all this is for, which facilitates deleting the
      25             :    last node in the graph.
      26             : 
      27             : 
      28             :                               --> 3 --
      29             :                              /    ^   \
      30             :                             /     :    \
      31             :                            /      V     v
      32             :                  1  ---> 2        4 --> 6 ----> 7
      33             :                                   ^     ^
      34             :                                   :    /
      35             :                                   V   /
      36             :                                   5 --
      37             : 
      38             :    The normal edge 2->3 along with the sibling edge 3..>4 implies a
      39             :    normal edge 2->4.  That then transitively implies an edge 2->5.
      40             : 
      41             :    We want each node to maintain a count of its in-degree so that we
      42             :    know when it can be executed.  The implied edges also count for the
      43             :    in-degree.  In this example, node 1 has in-degree 0, node 6 has
      44             :    in-degree 3, and the rest have in-degree 1.
      45             : 
      46             :    Maintaining each account-conflict graph is relatively easy given the
      47             :    operations we want to support.  Only the details about sibling edges
      48             :    are worth mentioning.  For example, when deleting node 2, we
      49             :    decrement the in-degree count for its successor, and then follow the
      50             :    sibling pointers, decrementing all the in-degree counts as we go to
      51             :    mark the deletion of the implied edges.  Sibling edges also need to
      52             :    be doubly linked, so that e.g. nodes 3 and 5 can be re-linked in O(1)
      53             :    if node 4 is deleted.  They're also circularly linked as well for
      54             :    convenience.
      55             : 
      56             :    When building the graph, we maintain a map of account to the last
      57             :    node that references it, whether that was a read or write, and
      58             :    whether there are any writers to that account in the graph right now.
      59             :    If the new node reads from an account that was last read, the new
      60             :    node becomes a sibling of the last read, with in-degree increased if
      61             :    there are any writers.  Otherwise, it becomes a successor of the node
      62             :    that last referenced the account. */
      63             : 
      64             : /* For a task like this with lots of graph traversal and pointer
      65             :    chasing, performance is typically limited by memory latency.  That
      66             :    means that the more that can fit in cache, the better the
      67             :    performance.  This implementation uses a lot of bit-packing to
      68             :    improve cache footprint. */
      69             : 
      70             : /* The following structs are all very local to this compilation unit,
      71             :    and so they don't have globally acceptable type names (e.g.
      72             :    fd_rdisp_edge_t). */
      73             : 
      74      155541 : #define MAX_ACCT_PER_TXN 128UL
      75             : 
      76             : /* edge_t: Fields typed edge_t represent an edge in one of the parallel
      77             :    account-conflict DAGs.  Each transaction stores a list of all its
      78             :    outgoing edges.  The type is actually a union of bitfield, but C
      79             :    bitfields are gross, so we just do it manually with macros.  If the
      80             :    high bit is set, that means the transaction storing this edge_t value
      81             :    is the last in this specific DAG, and the lower 31 bits of the
      82             :    value are an index in the map_pool for the account pubkey for this
      83             :    DAG.  See the comments about the hidden edge outgoing from node 7 in
      84             :    the DAG at the top of this file for an example.
      85             :    If the high bit is not set, then the next 23 bits store the
      86             :    destination of the edge, represented by its index in the pool of
      87             :    transactions.  Then the lowest 8 bits store the account index within
      88             :    that transaction of the edge that is part of the same
      89             :    account-specific DAG as this edge.  Because the max depth is 2^23-1,
      90             :    and each transaction can reference 128 accounts, the max accounts
      91             :    that can be referenced fits in 30 bits.
      92             :    The proper type would be something like
      93             :    typedef union {
      94             :      struct {
      95             :        uint is_last:1;
      96             :        uint map_pool_idx:31;
      97             :      } last;
      98             :      struct {
      99             :        uint is_last:1;
     100             :        uint txn_idx:23;
     101             :        uint acct_idx:8;
     102             :      } e;
     103             :    } edge_t;
     104             : 
     105             :    */
     106             : typedef uint edge_t;
     107             : 
     108             : 
     109             : /* fd_rdisp_txn is the representation of a transaction as a node in the
     110             :    DAG. */
     111             : struct fd_rdisp_txn {
     112             :   /* in_degree: The total number of edges summed across all account DAGs
     113             :      with this node as their destination.  In the worst case, all the
     114             :      other transactions in the pool read from each of the max number of
     115             :      accounts that this transaction writes to,  so there are
     116             :      MAX_ACCT_PER_TXN*depth edges that come into this node, which fits in
     117             :      about 30 bits, so we have some room for special values.  If
     118             :      in_degree is one of the following values, then
     119             :      the transaction is: */
     120    47385540 : #define IN_DEGREE_FREE                (UINT_MAX   )
     121    11538690 : #define IN_DEGREE_UNSTAGED            (UINT_MAX-1U)/* unstaged, not dispatched */
     122      552831 : #define IN_DEGREE_DISPATCHED          (UINT_MAX-2U)/* staged,   dispatched */
     123    11538372 : #define IN_DEGREE_UNSTAGED_DISPATCHED (UINT_MAX-3U)/* unstaged, dispatched */
     124    11538057 : #define IN_DEGREE_ZOMBIE              (UINT_MAX-4U)/* zombie */
     125             :   /* a transaction that is staged and dispatched is must have an
     126             :      in_degree of 0.  in_degree isn't a meaningful concept for unstaged
     127             :      transactions. */
     128             :   uint    in_degree;
     129             : 
     130             :   /* score: integer part stores how many transactions in the block must
     131             :      have completed before this transaction can be scheduled.  This is
     132             :      useful for transactions marked as serializing.  The fractional part
     133             :      gives some measure of how urgent the transaction is, where lower is
     134             :      more urgent.  This means we can't have more transactions in a block
     135             :      marked as serializing than the first integer that a float cannot
     136             :      represent.  That value is about 16M, which is much higher than the
     137             :      maximum number of transactions in a block, so this is not a
     138             :      problem.  If in the very rare case that there are more than just a
     139             :      few serializing transactions, we will lose a bit of precision for
     140             :      the fractional portion, which makes total sense; if there's not
     141             :      much room for parallel execution, having the optimal parallel
     142             :      execution is not very important. */
     143             :   float   score;
     144             : 
     145             :   /* edge_cnt_etc:
     146             :      0xFFFF0000 (16 bits) for linear block number,
     147             :      0x0000C000 (2 bits) for concurrency lane,
     148             :      0x00003F80 (7 bits) for r_cnt
     149             :      0x0000007F (7 bits) for w_cnt_1.  Unfortunately, transactions can
     150             :      have up to 128 writable accounts, but they have at least 1, so by
     151             :      storing w_cnt_1 = w_cnt - 1, we can still store it in 7 bits. */
     152             :   union {
     153             :     uint edge_cnt_etc;
     154             :     /* edge_cnt_etc is only used when the transaction is STAGED and
     155             :        PENDING, READY, or DISPATCHED.  If UNSTAGED or FREE, the next
     156             :        pointer is also here.  It can't be UNSTAGED and FREE at the same
     157             :        time, so there's no conflict with storage there either. */
     158             :     uint unstaged_next;
     159             :     uint free_next;
     160             :     /* If ZOMBIE, pointer back to block is here.  No storage conflict
     161             :        because ZOMBIE is an exclusive state. */
     162             :     uint block_idx;
     163             :   };
     164             : 
     165             : 
     166             :   /* When a transaction writes to an account, it only creates one
     167             :      link.  When a transaction reads from an account, we need the full
     168             :      doubly linked list with its siblings, so it creates 3 edges
     169             :      (child, next, prev).  All edges from writable accounts come first,
     170             :      and we keep track of how many there are.  In the worst case, all
     171             :      accounts are reads, so we size it appropriately. */
     172             :   edge_t edges[3UL*MAX_ACCT_PER_TXN]; /* addressed [0, w_cnt_1+1+3*r_cnt) */
     173             : };
     174             : typedef struct fd_rdisp_txn fd_rdisp_txn_t;
     175             : 
     176             : #define EDGE_IS_LAST(x) ((x)&0x80000000U)
     177             : 
     178             : /* Two more definitions:
     179             :    An edge index is an array position within the edges array.  An
     180             :    account index is a position within a transaction's account addresses,
     181             :    reordered so that the writable ones come first.  Since writable
     182             :    accounts use one position in the edges array per account address,
     183             :    these often coincide. */
     184             : 
     185             : 
     186             : /* FOLLOW_EDGE and FOLLOW_EDGE_TXN are helper macros for dealing with
     187             :    edges.  Given an edge_t x, and transaction pool base, FOLLOW_EDGE_TXN
     188             :    returns a pointer to transaction that the edge points to; FOLLOW_EDGE
     189             :    returns a pointer to the (first) edge_t within that transaction that
     190             :    is part of the same DAG as this edge.  FOLLOW_EDGE and
     191             :    FOLLOW_EDGE_TXN must not be called if EDGE_IS_LAST is non-zero.
     192             : 
     193             :    Then the edge index, i.e. the position in the edges array of the
     194             :    (first) edge for an account index is:
     195             :           acct_idx                              if acct_idx<=w_cnt_1
     196             :           w_cnt_1+1 + 3*(acct_idx-w_cnt_1-1)    else.
     197             :   Simplifying gives
     198             :          acct_idx + 2*signed_max( 0, acct_idx-w_cnt_1-1 ).
     199             :   In doing this calculation, we also basically get for free whether the
     200             :   edge is for a writable account or a readonly account, so we return
     201             :   that as well via the w parameter.  If the child transaction only reads
     202             :   the account address for this DAG, then the next and prev pointers can
     203             :   be accessed using the returned value +1 and +2, respectively. */
     204    38179194 : #define FOLLOW_EDGE(base, x, w) (__extension__({                                      \
     205    38179194 :         uint __e = (x);                                                               \
     206    38179194 :         fd_rdisp_txn_t * __txn = ((base)+(__e>>8));                                   \
     207    38179194 :         uint __w_cnt_1 = __txn->edge_cnt_etc & 0x7FU;                                 \
     208    38179194 :         uint __idx  = (__e & 0xFFU);                                                  \
     209    38179194 :         (w) = __idx<=__w_cnt_1;                                                       \
     210    38179194 :         (void)(w);  /* not robust... */                                               \
     211    38179194 :         __txn->edges + __idx + 2*fd_int_max( 0, (int)(__idx)-(int)(__w_cnt_1)-1 ); }))
     212    30971187 : #define FOLLOW_EDGE_TXN(base, x) ( (base)+((x)>>8) )
     213             : 
     214             : /* The pool and slist are almost the same, but they are used
     215             :    differently, so keep them as different structures for now. */
     216             : 
     217             : #define POOL_NAME     pool
     218          63 : #define POOL_T        fd_rdisp_txn_t
     219             : #define POOL_IDX_T    uint
     220     1310289 : #define POOL_NEXT     free_next
     221             : #define POOL_SENTINEL 1
     222             : #include "../../util/tmpl/fd_pool.c"
     223             : 
     224             : #define SLIST_NAME unstaged_txn_ll
     225             : #define SLIST_ELE_T fd_rdisp_txn_t
     226         318 : #define SLIST_IDX_T uint
     227        1272 : #define SLIST_NEXT  unstaged_next
     228             : #include "../../util/tmpl/fd_slist.c"
     229             : 
     230             : #define DLIST_IDX_T uint
     231           6 : #define DLIST_PREV  edges[0]
     232           6 : #define DLIST_NEXT  edges[1]
     233             : #define DLIST_NAME  zombie_dlist
     234             : #define DLIST_ELE_T fd_rdisp_txn_t
     235             : #include "../../util/tmpl/fd_dlist.c"
     236             : 
     237             : 
     238             : /* ACCT_INFO_FLAG: It's a bit unfortunate that we have to maintain these
     239             :    flags, but basically we need to be able to distinguish the case where
     240             :    there are only readers so that we don't increment in_degree when
     241             :    adding a new fd_rdisp_txn.  If we have any writers, the only way to
     242             :    transition into a state where there are only readers is to complete
     243             :    the last writer.  We know we are in this case when the completed
     244             :    node's child doesn't have a child, and the completed node's child is
     245             :    a reader, as indicated by the LAST_REF_WAS_WRITE bit.
     246             :    LAST_REF_WAS_WRITE also has the advantage of being easy to
     247             :    maintain. */
     248     6068064 : #define ACCT_INFO_FLAG_LAST_REF_WAS_WRITE(lane) (((uchar)1)<<(2*(lane)))
     249     5969337 : #define ACCT_INFO_FLAG_ANY_WRITERS(       lane) (((uchar)2)<<(2*(lane)))
     250             : 
     251             : /* acct_info_t is a node in a map_chain that contains the metadata for a
     252             :    single account address's conflict graph DAG.  In particular, it
     253             :    contains the information needed to know where to insert a node that
     254             :    reads from or writes to the account.  The objects of this type follow
     255             :    this state machine:
     256             : 
     257             :                  FREE  -----> ACTIVE ----> CACHED
     258             :                                 ^            |
     259             :                                 |-------------
     260             :    When FREE, it is in free_acct_dlist only.  When ACTIVE, it is in
     261             :    acct_map only.  When CACHED, it is in both free_acct_dlist and
     262             :    cached_acct_map.
     263             : */
     264             : struct acct_info {
     265             :   /* key, next, and prev are the map_chain fields. Used in the ACTIVE
     266             :      and CACHED states.  next and prev set to 0 when in the FREE state.
     267             :      Element 0 is a sentinel and isn't inserted to the free_acct_dlist,
     268             :      so this is unambiguous. */
     269             :   fd_acct_addr_t key;
     270             :   uint next;
     271             :   uint prev;
     272             : 
     273             :   union {
     274             :     struct {
     275             :       /* This is effectively a pointer to the last node in the DAG for
     276             :          this pubkey, one for each staging lane.
     277             :          EDGE_IS_LAST(FOLLOW_EDGE(base, last_reference[i])) is non-zero.
     278             :          */
     279             :       edge_t last_reference[4];
     280             : 
     281             :     }; /* When in the ACTIVE state */
     282             :     struct {
     283             :       uint free_ll_next;
     284             :       uint free_ll_prev;
     285             :       /* 8 bytes of padding here */
     286             :     }; /* When not in the ACTIVE state, used by the free_acct_dlist */
     287             :   };
     288             :   /* flags: a combination of ACCT_INFO_FLAG_* bitfields above.  Used
     289             :      when ACTIVE. */
     290             :   uint flags:8;
     291             : 
     292             : 
     293             :   /* We want to dispatch the READY transactions in an order that
     294             :      maximizes parallelism, but we also want to be able to start
     295             :      dispatching transactions decently well before we have the full
     296             :      conflict graph.  We can do that because we know that contentious
     297             :      accounts tend to stay contentious and uncontentious accounts tend
     298             :      to stay uncontentious.
     299             : 
     300             :      To accomplish this, we maintain a special EMA.  Let x_i be 1 if
     301             :      transaction i references it and 0 if not.  If we squint and assume
     302             :      the transactions are independent and all drawn from some
     303             :      distribution (which is not true, but that's why we squint), an EMA
     304             :      of x_i estimates the probability that the next transaction
     305             :      references this account.  How we use this value is detailed later.
     306             : 
     307             :      We can't update this value for every pubkey for each transaction,
     308             :      so we maintain it in a lazy way, by applying updates only when we
     309             :      need to read the value, which also happens to be every time we want
     310             :      to add a 1 value to the EMA.  We then just need to maintain the
     311             :      last index i at which x_i was updated and the current value.  We
     312             :      only have 24 bits for the index, which means that we can't maintain
     313             :      it in a fork-aware way, which doesn't seem like a problem.  Also,
     314             :      it's possible it can overflow, and that can result in an incorrect
     315             :      value, but that means the account is only referenced ~ 1/2^24
     316             :      transactions, which is also fine.
     317             : 
     318             :      last_ref is in the domain of global_inserted_txn_cnt.  ema_refs is
     319             :      in [0, 1].  Both fields are used in ACTIVE and CACHED.  Maintaining
     320             :      these fields is actually the main reason CACHED exists. */
     321             :   uint   last_ref:24;
     322             :   float  ema_refs;
     323             : };
     324             : typedef struct acct_info acct_info_t;
     325             : 
     326             : FD_STATIC_ASSERT( sizeof(acct_info_t)==64UL, acct_info_t );
     327             : 
     328             : /* For the acct_map and the free_acct_map */
     329             : #define MAP_NAME          acct_map
     330     1729506 : #define MAP_ELE_T         acct_info_t
     331   119902212 : #define MAP_IDX_T         uint
     332             : #define MAP_KEY_T         fd_acct_addr_t
     333   226145622 : #define MAP_KEY_HASH(k,s) fd_hash( (s), (k)->b, 32UL )
     334   113022618 : #define MAP_KEY_EQ(k0,k1) (!memcmp( (k0)->b, (k1)->b, 32UL ))
     335             : #define MAP_OPTIMIZE_RANDOM_ACCESS_REMOVAL 1
     336             : #include "../../util/tmpl/fd_map_chain.c"
     337             : 
     338             : 
     339             : #define DLIST_IDX_T uint
     340    28268700 : #define DLIST_PREV  free_ll_prev
     341    28268700 : #define DLIST_NEXT  free_ll_next
     342             : #define DLIST_NAME  free_dlist
     343             : #define DLIST_ELE_T acct_info_t
     344             : #include "../../util/tmpl/fd_dlist.c"
     345             : 
     346             : 
     347             : struct pending_prq_ele {
     348             :   /* lower score means should be scheduled sooner.  Integer part has a
     349             :      special meaning as explained above. */
     350             :   float score;
     351             : 
     352             :   uint linear_block_number;
     353             :   uint txn_idx;
     354             : 
     355             :   /* It seems like we should be able to get this whole struct in 8
     356             :      bytes, but we're a few bits over.
     357             : 
     358             :      23 bits for txn_idx
     359             :      16 bit for linear_block_number
     360             :      Then the integer part of the score needs to be at least 18 or 19
     361             :      bits, but we can't use a custom floating point number. */
     362             : };
     363             : 
     364             : typedef struct pending_prq_ele pending_prq_ele_t;
     365             : 
     366             : 
     367             : #define PRQ_NAME pending_prq
     368    13535220 : #define PRQ_T    pending_prq_ele_t
     369             : #define PRQ_EXPLICIT_TIMEOUT 0
     370             : /* returns 1 if x is strictly after y */
     371    12069504 : #define PRQ_AFTER(x,y) (__extension__( {                                            \
     372    12069504 :             int cmp0 = (int)(x).linear_block_number - (int)(y).linear_block_number; \
     373    12069504 :             fd_int_if( cmp0!=0, cmp0>0, (x).score>(y).score );                      \
     374    12069504 :             }))
     375             : #include "../../util/tmpl/fd_prq.c"
     376             : 
     377             : /* fd_rdisp_blockinfo_t maintains a little metadata about transactions for each
     378             :    slot.  */
     379             : struct fd_rdisp_blockinfo {
     380             :   FD_RDISP_BLOCK_TAG_T block;
     381             :   uint  linear_block_number;
     382             : 
     383             :   uint  insert_ready:1;
     384             :   uint  schedule_ready:1;
     385             :   uint  staged:1;
     386             :   uint  staging_lane:2; /* ignored if staged==0 */
     387             :   uint  last_insert_was_serializing:1;
     388             : 
     389             :   uint inserted_cnt;
     390             :   uint dispatched_cnt;
     391             :   uint completed_cnt;
     392             :   uint last_serializing;
     393             : 
     394             :   uint map_chain_next;
     395             :   uint ll_next;
     396             :   unstaged_txn_ll_t ll[ 1 ]; /* used only when unstaged */
     397             :   zombie_dlist_t    zombie_list[ 1 ];
     398             : };
     399             : typedef struct fd_rdisp_blockinfo fd_rdisp_blockinfo_t;
     400             : 
     401             : #define POOL_NAME     block_pool
     402          63 : #define POOL_T        fd_rdisp_blockinfo_t
     403             : #define POOL_IDX_T    uint
     404      988128 : #define POOL_NEXT     ll_next
     405             : #define POOL_SENTINEL 1
     406             : #include "../../util/tmpl/fd_pool.c"
     407             : 
     408             : #define MAP_NAME  block_map
     409             : #define MAP_ELE_T fd_rdisp_blockinfo_t
     410             : #define MAP_KEY_T FD_RDISP_BLOCK_TAG_T
     411      395694 : #define MAP_KEY   block
     412     4048962 : #define MAP_NEXT  map_chain_next
     413     5308065 : #define MAP_IDX_T uint
     414             : #include "../../util/tmpl/fd_map_chain.c"
     415             : 
     416             : #define SLIST_NAME  block_slist
     417             : #define SLIST_ELE_T fd_rdisp_blockinfo_t
     418      395679 : #define SLIST_IDX_T uint
     419      791361 : #define SLIST_NEXT  ll_next
     420             : #include "../../util/tmpl/fd_slist.c"
     421             : 
     422             : struct fd_rdisp_unstaged {
     423             :   FD_RDISP_BLOCK_TAG_T block;
     424             : 
     425             :   uint writable_cnt;
     426             :   uint readonly_cnt;
     427             :   fd_acct_addr_t keys[MAX_ACCT_PER_TXN];
     428             : };
     429             : typedef struct fd_rdisp_unstaged fd_rdisp_unstaged_t;
     430             : 
     431             : typedef struct {
     432             :   pending_prq_ele_t * pending;
     433             :   ulong               linear_block_number;
     434             :   block_slist_t       block_ll[1];
     435             :   ulong               inserted_cnt;
     436             :   ulong               dispatched_cnt;
     437             :   ulong               completed_cnt;
     438             : } per_lane_info_t;
     439             : 
     440             : /* We maintain two maps from pubkeys to acct_info_t.  The first one is
     441             :    the main acct_map, just called acct_map.  All pubkeys in this map
     442             :    have >0 references in the one of the staging lane DAGs.  When an
     443             :    account goes to 0 references, it gets removed from main map_chain and
     444             :    moved to free map_chain, called free_acct_map.  The free_acct_map
     445             :    exists to maintain the reference count EMA information lazily.
     446             :    Unless we need the acct_info_t for something in the DAG, we might as
     447             :    well maintain the EMA info.
     448             : 
     449             :    When we start up, all the acct_info_t structs are in the
     450             :    free_acct_dlist.  Whenever something is added to the free_acct_map,
     451             :    it's also added to the tail of the free_acct_dlist.  When we need an
     452             :    acct_info_t that's not in the free_acct_map, we pop the head of the
     453             :    free_acct_dlist.  In general, the free_acct_dlist contains everything
     454             :    in the free_acct_map, potentially plus some elements that have never
     455             :    been used; all acct_info_t objects are in exactly one of the main
     456             :    acct_map and the free_acct_dlist (not free_acct_map).  See
     457             :    acct_info_t for more information about this. */
     458             : 
     459             : struct fd_rdisp {
     460             :   ulong depth;
     461             :   ulong block_depth;
     462             : 
     463             :   ulong global_insert_cnt;
     464             :   ulong unstaged_lblk_num;
     465             : 
     466             :   /* pool: an fd_pool, indexed [0, depth+1), with 0 being a sentinel */
     467             :   fd_rdisp_txn_t       * pool;
     468             :   fd_rdisp_unstaged_t  * unstaged; /* parallel to pool with additional info */
     469             : 
     470             :   block_map_t          * blockmap; /* map chain */
     471             :   fd_rdisp_blockinfo_t * block_pool;
     472             : 
     473             :   int free_lanes; /* a bitmask */
     474             :   per_lane_info_t lanes[4];
     475             : 
     476             :   acct_map_t   * acct_map;
     477             :   acct_map_t   * free_acct_map;
     478             :   /* acct_pool is not an fd_pool, but is just a flat array, since we
     479             :      don't need to acquire and release from it because of the dlist. */
     480             :   acct_info_t  * acct_pool;
     481             :   free_dlist_t   free_acct_dlist[1];
     482             : };
     483             : 
     484             : typedef struct fd_rdisp fd_rdisp_t;
     485             : 
     486             : 
     487             :  #define ACCT_ITER_TO_PTR( iter ) (__extension__( {                                             \
     488             :        ulong __idx = fd_txn_acct_iter_idx( iter );                                              \
     489             :        fd_ptr_if( __idx<fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_IMM ), accts, alt_adj )+__idx; \
     490             :        }))
     491             : 
     492             : 
     493         507 : ulong fd_rdisp_align( void ) { return 128UL; }
     494             : 
     495             : ulong
     496             : fd_rdisp_footprint( ulong depth,
     497          45 :                     ulong block_depth ) {
     498          45 :   if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH)             | (depth<2UL) |
     499          45 :                    (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) | (block_depth<4UL) ) ) return 0UL;
     500             : 
     501          45 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     502          45 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     503          45 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     504             : 
     505          45 :   ulong l = FD_LAYOUT_INIT;
     506          45 :   l = FD_LAYOUT_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     507          45 :   l = FD_LAYOUT_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) ); /* pool       */
     508          45 :   l = FD_LAYOUT_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) ); /* unstaged   */
     509          45 :   l = FD_LAYOUT_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) ); /* blockmap   */
     510          45 :   l = FD_LAYOUT_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) ); /* block_pool */
     511          45 :   l = FD_LAYOUT_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) ); /* pending    */
     512          45 :   l = FD_LAYOUT_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) ); /* acct_map   */
     513          45 :   l = FD_LAYOUT_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) ); /* free_acct_map */
     514          45 :   l = FD_LAYOUT_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            ); /* acct_pool  */
     515          45 :   return FD_LAYOUT_FINI( l, fd_rdisp_align() );
     516          45 : }
     517             : 
     518             : void *
     519             : fd_rdisp_new( void * mem,
     520             :               ulong  depth,
     521             :               ulong  block_depth,
     522          21 :               ulong  seed ) {
     523          21 :   if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH)             | (depth<2UL) |
     524          21 :                    (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) | (block_depth<4UL) ) ) return NULL;
     525             : 
     526          21 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     527          21 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     528          21 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     529             : 
     530          21 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     531          21 :   fd_rdisp_t * disp   = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     532          21 :   void  * _pool       = FD_SCRATCH_ALLOC_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) );
     533          21 :   void  * _unstaged   = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) );
     534          21 :   void  * _bmap       = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) );
     535          21 :   void  * _bpool      = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) );
     536          21 :   uchar * _pending    = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) );
     537          21 :   void  * _acct_map   = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     538          21 :   void  * _freea_map  = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     539          21 :   acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            );
     540          21 :   FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
     541             : 
     542          21 :   disp->depth             = depth;
     543          21 :   disp->block_depth       = block_depth;
     544          21 :   disp->global_insert_cnt = 0UL;
     545          21 :   disp->unstaged_lblk_num = 0UL;
     546             : 
     547          21 :   fd_rdisp_txn_t * temp_pool_join = pool_join( pool_new( _pool, depth+1UL ) );
     548      203991 :   for( ulong i=0UL; i<depth+1UL; i++ ) temp_pool_join[ i ].in_degree = IN_DEGREE_FREE;
     549          21 :   pool_leave( temp_pool_join );
     550             : 
     551          21 :   memset( _unstaged, '\0', sizeof(fd_rdisp_unstaged_t)*(depth+1UL) );
     552             : 
     553          21 :   block_map_new ( _bmap,  chain_cnt, seed );
     554          21 :   block_pool_new( _bpool, block_depth+1UL );
     555             : 
     556          21 :   fd_rdisp_blockinfo_t * bpool_temp_join = block_pool_join( _bpool );
     557      196767 :   for( ulong i=0UL; i<block_depth+1UL; i++ ) zombie_dlist_new( bpool_temp_join[ i ].zombie_list );
     558          21 :   block_pool_leave( bpool_temp_join );
     559             : 
     560          21 :   disp->free_lanes = 0xF;
     561         105 :   for( ulong i=0UL; i<4UL; i++ ) {
     562          84 :     pending_prq_new( _pending, depth );
     563          84 :     _pending += pending_prq_footprint( depth );
     564             : 
     565          84 :     disp->lanes[i].linear_block_number = 0UL;
     566          84 :     disp->lanes[i].inserted_cnt        = 0U;
     567          84 :     disp->lanes[i].dispatched_cnt      = 0U;
     568          84 :     disp->lanes[i].completed_cnt       = 0U;
     569          84 :   }
     570             : 
     571          21 :   acct_map_new( _acct_map,  acct_chain_cnt, fd_ulong_hash( seed+1UL ) );
     572          21 :   acct_map_new( _freea_map, acct_chain_cnt, fd_ulong_hash( seed+2UL ) );
     573             : 
     574          21 :   free_dlist_t * temp_join = free_dlist_join( free_dlist_new( disp->free_acct_dlist ) );
     575    26105493 :   for( ulong i=1UL; i<acct_depth+1UL; i++ ) {
     576    26105472 :     apool[ i ].next = apool[ i ].prev = 0U;
     577    26105472 :     free_dlist_idx_push_tail( disp->free_acct_dlist, i, apool );
     578    26105472 :   }
     579          21 :   free_dlist_leave( temp_join );
     580             : 
     581          21 :   return disp;
     582          21 : }
     583             : 
     584             : fd_rdisp_t *
     585          21 : fd_rdisp_join( void * mem ) {
     586          21 :   fd_rdisp_t * disp = (fd_rdisp_t *)mem;
     587             : 
     588          21 :   ulong depth          = disp->depth;
     589          21 :   ulong block_depth    = disp->block_depth;
     590          21 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     591          21 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     592          21 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     593             : 
     594          21 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     595          21 :   /*                 */ FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     596          21 :   void  * _pool       = FD_SCRATCH_ALLOC_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) );
     597          21 :   void  * _unstaged   = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) );
     598          21 :   void  * _bmap       = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) );
     599          21 :   void  * _bpool      = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) );
     600          21 :   uchar * _pending    = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) );
     601          21 :   void  * _acct_map   = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     602          21 :   void  * _freea_map  = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     603          21 :   acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            );
     604          21 :   FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
     605             : 
     606          21 :   disp->pool       = pool_join( _pool );
     607          21 :   disp->unstaged   = (fd_rdisp_unstaged_t *)_unstaged;
     608          21 :   disp->blockmap   = block_map_join( _bmap );
     609          21 :   disp->block_pool = block_pool_join( _bpool );
     610             : 
     611      196767 :   for( ulong i=0UL; i<block_depth+1UL; i++ ) zombie_dlist_join( disp->block_pool[ i ].zombie_list );
     612             : 
     613         105 :   for( ulong i=0UL; i<4UL; i++ ) {
     614          84 :     disp->lanes[i].pending = pending_prq_join( _pending );
     615          84 :     _pending += pending_prq_footprint( depth );
     616          84 :   }
     617             : 
     618          21 :   disp->acct_map      = acct_map_join( _acct_map );
     619          21 :   disp->free_acct_map = acct_map_join( _freea_map );
     620          21 :   disp->acct_pool     = apool;
     621          21 :   free_dlist_join( disp->free_acct_dlist );
     622             : 
     623          21 :   return disp;
     624          21 : }
     625             : 
     626             : static inline void
     627             : free_lane( fd_rdisp_t * disp,
     628        2460 :            ulong        staging_lane ) {
     629        2460 :   disp->free_lanes |= 1<<staging_lane;
     630        2460 :   per_lane_info_t * l = disp->lanes+staging_lane;
     631        2460 :   FD_TEST( pending_prq_cnt( l->pending )==0UL );
     632        2460 :   l->linear_block_number = 0UL;
     633        2460 :   l->inserted_cnt   = 0UL;
     634        2460 :   l->dispatched_cnt = 0UL;
     635        2460 :   l->completed_cnt  = 0UL;
     636        2460 :   block_slist_delete( block_slist_leave( l->block_ll ) );
     637        2460 : }
     638             : 
     639             : static inline void
     640             : alloc_lane( fd_rdisp_t * disp,
     641        2463 :             ulong        staging_lane ) {
     642        2463 :   disp->free_lanes &= ~(1<<staging_lane);
     643        2463 :   block_slist_join( block_slist_new( disp->lanes[staging_lane].block_ll ) );
     644        2463 : }
     645             : 
     646             : ulong
     647             : fd_rdisp_suggest_staging_lane( fd_rdisp_t const *   disp,
     648             :                                FD_RDISP_BLOCK_TAG_T parent_block,
     649           0 :                                int                  duplicate ) {
     650             : 
     651             :   /* 1. If it's a duplicate, suggest FD_RDISP_UNSTAGED */
     652           0 :   if( FD_UNLIKELY( duplicate ) ) return FD_RDISP_UNSTAGED;
     653             : 
     654             :   /* 2. If parent is the last block in any existing staging lane, suggest
     655             :         that lane */
     656           0 :   fd_rdisp_blockinfo_t const * block_pool = disp->block_pool;
     657           0 :   fd_rdisp_blockinfo_t const * block = block_map_ele_query_const( disp->blockmap, &parent_block, NULL, block_pool );
     658           0 :   if( FD_LIKELY( block && block->insert_ready && block->staged ) ) return block->staging_lane;
     659             : 
     660             :   /* 3. If there is at least one free lane, suggest a free lane */
     661           0 :   if( FD_LIKELY( disp->free_lanes!=0 ) ) return (ulong)fd_uint_find_lsb( (uint)disp->free_lanes );
     662             : 
     663             :   /* 4. Else, suggest FD_RDISP_UNSTAGED */
     664           0 :   return FD_RDISP_UNSTAGED;
     665           0 : }
     666             : 
     667             : int
     668             : fd_rdisp_add_block( fd_rdisp_t          * disp,
     669             :                    FD_RDISP_BLOCK_TAG_T   new_block,
     670      395694 :                    ulong                  staging_lane ) {
     671      395694 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     672             : 
     673      395694 :   if( FD_UNLIKELY( !block_pool_free( block_pool )                                                             ) ) return -1;
     674      395694 :   if( FD_UNLIKELY(  ULONG_MAX!=block_map_idx_query_const( disp->blockmap, &new_block, ULONG_MAX, block_pool ) ) ) return -1;
     675      395688 :   fd_rdisp_blockinfo_t * block = block_pool_ele_acquire( block_pool );
     676      395688 :   block->block = new_block;
     677      395688 :   block_map_ele_insert( disp->blockmap, block, block_pool );
     678             : 
     679      395688 :   block->insert_ready = 1;
     680      395688 :   block->staged       = staging_lane!=FD_RDISP_UNSTAGED;
     681      395688 :   block->staging_lane = (uint)(staging_lane & 0x3UL);
     682      395688 :   block->last_insert_was_serializing = 0U;
     683             : 
     684      395688 :   block->inserted_cnt     = 0U;
     685      395688 :   block->dispatched_cnt   = 0U;
     686      395688 :   block->completed_cnt    = 0U;
     687      395688 :   block->last_serializing = 0U;
     688             : 
     689      395688 :   if( FD_UNLIKELY( staging_lane==FD_RDISP_UNSTAGED ) ) {
     690          18 :     block->schedule_ready = 1;
     691          18 :     block->linear_block_number = (uint)disp->unstaged_lblk_num++;
     692          18 :     unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
     693      395670 :   } else {
     694      395670 :     block->linear_block_number = (uint)disp->lanes[staging_lane].linear_block_number++;
     695      395670 :     block->schedule_ready      = (uint)(1 & (disp->free_lanes >> staging_lane));
     696             : 
     697      395670 :     if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
     698             : 
     699      395670 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     700      395670 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) )  block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
     701      395670 :     block_slist_ele_push_tail( sl, block, block_pool );
     702      395670 :   }
     703      395688 :   return 0;
     704      395694 : }
     705             : 
     706             : 
     707             : 
     708             : int
     709             : fd_rdisp_remove_block( fd_rdisp_t          * disp,
     710      395670 :                        FD_RDISP_BLOCK_TAG_T  block_tag ) {
     711      395670 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     712             : 
     713      395670 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     714      395670 :   if( FD_UNLIKELY( block==NULL ) ) return -1;
     715             : 
     716      395661 :   FD_TEST( block->schedule_ready );
     717      395661 :   FD_TEST( block->completed_cnt==block->inserted_cnt );
     718      395661 :   FD_TEST( zombie_dlist_is_empty( block->zombie_list, disp->pool ) );
     719             : 
     720      395661 :   if( FD_LIKELY( block->staged ) ) {
     721      395652 :     ulong staging_lane = (ulong)block->staging_lane;
     722      395652 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     723             : 
     724      395652 :     FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
     725      395652 :     block_slist_idx_pop_head( sl, block_pool );
     726      395652 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     727        2436 :     else                                                       free_lane( disp, staging_lane );
     728      395652 :   } else {
     729           9 :     unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     730           9 :   }
     731      395661 :   block_pool_idx_release( block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, block_pool ) );
     732             : 
     733      395661 :   return 0;
     734      395661 : }
     735             : 
     736             : 
     737             : int
     738             : fd_rdisp_abandon_block( fd_rdisp_t          * disp,
     739          15 :                         FD_RDISP_BLOCK_TAG_T  block_tag ) {
     740          15 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     741             : 
     742          15 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, disp->block_pool );
     743          15 :   if( FD_UNLIKELY( block==NULL ) ) return -1;
     744             : 
     745          12 :   FD_TEST( block->schedule_ready );
     746          12 :   FD_TEST( block->dispatched_cnt==block->completed_cnt ); /* TODO: remove this when it can call complete properly */
     747          21 :   while( block->completed_cnt<block->inserted_cnt ) {
     748             :     /* because there is nothing DISPATCHED, there has to be something
     749             :        READY */
     750           9 :     ulong txn = fd_rdisp_get_next_ready( disp, block_tag );
     751           9 :     FD_TEST( txn );
     752           9 :     fd_rdisp_complete_txn( disp, txn, 1 );
     753           9 :   }
     754          12 :   while( !zombie_dlist_is_empty( block->zombie_list, disp->pool ) ) {
     755           0 :     fd_rdisp_complete_txn( disp, zombie_dlist_idx_pop_head( block->zombie_list, disp->pool ), 1 );
     756           0 :   }
     757             : 
     758          12 :   if( FD_LIKELY( block->staged ) ) {
     759          12 :     ulong staging_lane = (ulong)block->staging_lane;
     760          12 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     761             : 
     762          12 :     FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
     763          12 :     block_slist_idx_pop_head( sl, block_pool );
     764          12 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     765           9 :     else                                                       free_lane( disp, staging_lane );
     766          12 :   } else {
     767           0 :     unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     768           0 :   }
     769          12 :   block_pool_idx_release( disp->block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, disp->block_pool ) );
     770             : 
     771          12 :   return 0;
     772          12 : }
     773             : 
     774             : static void
     775             : add_edges( fd_rdisp_t           * disp,
     776             :            fd_rdisp_txn_t       * ele,
     777             :            fd_acct_addr_t const * addr,
     778             :            ulong                  addr_cnt,
     779             :            uint                   staging_lane,
     780             :            int                    writable,
     781             :            int                    update_score );
     782             : /* updates in_degree, edge_cnt_etc */
     783             : 
     784             : int
     785             : fd_rdisp_promote_block( fd_rdisp_t *          disp,
     786             :                         FD_RDISP_BLOCK_TAG_T  block_tag,
     787          12 :                         ulong                 staging_lane ) {
     788          12 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     789          12 :   per_lane_info_t * lane = disp->lanes + staging_lane;
     790          12 :   block_slist_t * sl = lane->block_ll;
     791             : 
     792          12 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     793          12 :   if( FD_UNLIKELY( block==NULL   ) ) return -1;
     794          12 :   if( FD_UNLIKELY( block->staged ) ) return -1;
     795             : 
     796          12 :   block->staged = 1;
     797          12 :   block->staging_lane = (uint)(staging_lane & 0x3);
     798          12 :   block->insert_ready = 1;
     799          12 :   block->schedule_ready = (uint)(1 & (disp->free_lanes >> staging_lane));
     800             : 
     801          12 :   if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
     802             : 
     803          12 :   if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) )  block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
     804          12 :   block_slist_ele_push_tail( sl, block, block_pool );
     805          12 :   uint linear_block_number = (uint)lane->linear_block_number++;
     806          12 :   block->linear_block_number = linear_block_number;
     807             : 
     808          12 :   unstaged_txn_ll_iter_t next;
     809          12 :   for( unstaged_txn_ll_iter_t iter = unstaged_txn_ll_iter_init( block->ll, disp->pool );
     810         330 :       !unstaged_txn_ll_iter_done( iter, block->ll, disp->pool );
     811         318 :       iter = next ) {
     812         318 :       next = unstaged_txn_ll_iter_next( iter, block->ll, disp->pool );
     813             : 
     814         318 :     fd_rdisp_txn_t      * ele = unstaged_txn_ll_iter_ele( iter, block->ll, disp->pool );
     815         318 :     fd_rdisp_unstaged_t * uns = disp->unstaged + unstaged_txn_ll_iter_idx( iter, block->ll, disp->pool );
     816         318 :     FD_TEST( ele->in_degree==IN_DEGREE_UNSTAGED );
     817             : 
     818         318 :     ele->in_degree    = 0U;
     819         318 :     ele->edge_cnt_etc = (uint)(-1); /* set w_cnt_1 to -1 */
     820             : 
     821         318 :     add_edges( disp, ele, uns->keys,                   uns->writable_cnt, (uint)staging_lane, 1, 0 );
     822         318 :     add_edges( disp, ele, uns->keys+uns->writable_cnt, uns->readonly_cnt, (uint)staging_lane, 0, 0 );
     823             : 
     824         318 :     ele->edge_cnt_etc &= 0x3FFFU;
     825         318 :     ele->edge_cnt_etc |= (uint)staging_lane<<14;
     826         318 :     ele->edge_cnt_etc |= linear_block_number<<16;
     827             : 
     828         318 :     if( FD_UNLIKELY( ele->in_degree==0U ) ) {
     829           6 :       pending_prq_ele_t temp[1] = {{ .score = ele->score, .linear_block_number = linear_block_number, .txn_idx = (uint)(ele-disp->pool)}};
     830           6 :       pending_prq_insert( lane->pending, temp );
     831           6 :     }
     832         318 :   }
     833          12 :   unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     834             : 
     835          12 :   lane->inserted_cnt   += block->inserted_cnt;
     836          12 :   lane->dispatched_cnt += block->dispatched_cnt;
     837          12 :   lane->completed_cnt  += block->completed_cnt;
     838             : 
     839          12 :   return 0;
     840          12 : }
     841             : 
     842             : int
     843             : fd_rdisp_demote_block( fd_rdisp_t *          disp,
     844          15 :                        FD_RDISP_BLOCK_TAG_T  block_tag ) {
     845          15 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     846             : 
     847          15 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     848          15 :   if( FD_UNLIKELY(  block==NULL           ) ) return -1;
     849          15 :   if( FD_UNLIKELY( !block->staged         ) ) return -1;
     850          15 :   if( FD_UNLIKELY( !block->schedule_ready ) ) return -1;
     851          15 :   if( FD_UNLIKELY(  block->completed_cnt!=block->inserted_cnt ) ) FD_LOG_ERR(( "demote_block called with non-empty block" ));
     852          15 :   ulong staging_lane = block->staging_lane;
     853          15 :   block->staged = 0;
     854             : 
     855          15 :   per_lane_info_t * lane = disp->lanes + staging_lane;
     856          15 :   block_slist_t * sl = lane->block_ll;
     857             : 
     858          15 :   lane->inserted_cnt   -= block->inserted_cnt;
     859          15 :   lane->dispatched_cnt -= block->dispatched_cnt;
     860          15 :   lane->completed_cnt  -= block->completed_cnt;
     861             : 
     862          15 :   block->linear_block_number = (uint)disp->unstaged_lblk_num++;
     863             : 
     864             :   /* staged and schedule_ready means it must be the head of the staging lane */
     865          15 :   FD_TEST( block_slist_ele_peek_head( sl, block_pool )==block );
     866          15 :   block_slist_idx_pop_head( sl, block_pool );
     867             : 
     868          15 :   unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
     869             : 
     870          15 :   if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     871          15 :   else                                                       free_lane( disp, staging_lane );
     872          15 :   return 0;
     873          15 : }
     874             : 
     875             : int
     876             : fd_rdisp_rekey_block( fd_rdisp_t *           disp,
     877             :                       FD_RDISP_BLOCK_TAG_T   new_tag,
     878           6 :                       FD_RDISP_BLOCK_TAG_T   old_tag ) {
     879           6 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     880             : 
     881           6 :   if( FD_UNLIKELY(        NULL!= block_map_ele_query_const( disp->blockmap, &new_tag, NULL, block_pool ) ) ) return -1;
     882           6 :   fd_rdisp_blockinfo_t * block = block_map_ele_query      ( disp->blockmap, &old_tag, NULL, block_pool );
     883           6 :   if( FD_UNLIKELY(        NULL== block ) )                                                                   return -1;
     884             : 
     885           6 :   block_map_idx_remove( disp->blockmap, &old_tag, ULONG_MAX, block_pool );
     886           6 :   block->block = new_tag;
     887           6 :   block_map_ele_insert( disp->blockmap, block, block_pool );
     888           6 :   return 0;
     889           6 : }
     890             : 
     891             : 
     892             : /* "Registers" a reference to the account in info at transaction
     893             :    global_insert_cnt.  Returns the value of the EMA, which is an
     894             :    estimate of the probability that the next transaction also references
     895             :    the account.  This value does not matter for correctness, which is
     896             :    why floating point arithmetic is okay. */
     897             : static inline float
     898             : update_ema( acct_info_t * info,
     899     2960016 :             ulong         global_insert_cnt ) {
     900             : #if FD_RDISP_DISABLE_EMA
     901             :   (void)info;
     902             :   (void)global_insert_cnt;
     903             :   return 0.0f;
     904             : #else
     905     5920032 : #define ALPHA 0.005f
     906             :   /* The normal EMA update equation is
     907             :                 e_i = (alpha) * x_i + (1-alpha)*e_{i-1},
     908             :      where alpha is a constant in (0,1).  Let L be the last reference of
     909             :      the account, and G be the current global_insert_cnt value.  We know
     910             :      that e_L = ema_refs, and that for i in [L+1, G), x_i = 0.
     911             :      That means
     912             :                e_{G-1} =         (1-alpha)^(G-L-1) * ema_refs
     913             :                e_G     = alpha + (1-alpha)^(G-L)   * ema_refs
     914             :    */
     915             :   /* last_ref only captures the low 24 bits, so we guess its the highest
     916             :      value for them that would still make it less than
     917             :      global_insert_cnt.  Turns out, we can calculate that with just an
     918             :      AND.  We need to make sure that in the rare case it is actually
     919             :      2^24 away, we don't use a delta of 0.  We also want to make sure
     920             :      that even with inexact float math, we never get to 1.0, so we bias
     921             :      the exponent up slightly. */
     922     2960016 :   ulong last_ref = (ulong)info->last_ref;
     923     2960016 :   ulong delta = fd_ulong_max( (global_insert_cnt - last_ref) & 0xFFFFFFUL, 1UL );
     924     2960016 :   float ema_refs = ALPHA + powf( 1.0f-ALPHA, 0.05f+(float)delta ) * info->ema_refs;
     925             : 
     926     2960016 :   info->ema_refs = ema_refs;
     927     2960016 :   info->last_ref = (uint)(global_insert_cnt & 0xFFFFFFUL);
     928     2960016 : #undef ALPHA
     929             : 
     930     2960016 :   return ema_refs;
     931     2960016 : #endif
     932     2960016 : }
     933             : 
     934             : static void
     935             : add_edges( fd_rdisp_t           * disp,
     936             :            fd_rdisp_txn_t       * ele,
     937             :            fd_acct_addr_t const * addrs,
     938             :            ulong                  addr_cnt,
     939             :            uint                   lane,
     940             :            int                    writable,
     941     3315708 :            int                    update_score ) {
     942             :   /* When we first start, the low 14 bits are 0x3FFF, so that gives us
     943             :      w_cnt==0, r_cnt==0, as desired.  Then otherwise, the low 7 bits are
     944             :      w_cnt_1, so adding 1 gives w_cnt.  If we've actually added 128
     945             :      writable accounts, then r_cnt will be incorrect, but then we
     946             :      actually can't add any more readonly accounts, so the function call
     947             :      will be a no-op. */
     948             : 
     949     3315708 :   ulong w_cnt =  (ele->edge_cnt_etc + 1U)     & 0x7FU;
     950     3315708 :   ulong r_cnt = ((ele->edge_cnt_etc + 1U)>>7) & 0x7FU;
     951     3315708 :   ulong acct_idx = w_cnt +     r_cnt;
     952     3315708 :   ulong edge_idx = w_cnt + 3UL*r_cnt;
     953             : 
     954     6237978 :   for( ulong i=0UL; i<addr_cnt; i++ ) {
     955     2922270 :     fd_acct_addr_t const * addr = addrs+i;
     956     2922270 :     acct_info_t * ai = NULL;
     957             : 
     958             :     /* Step 1: lookup the pubkey */
     959     2922270 :     ulong idx = acct_map_idx_query( disp->acct_map, addr, ULONG_MAX, disp->acct_pool );
     960     2922270 :     if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
     961     1081614 :       idx = acct_map_idx_query( disp->free_acct_map, addr, ULONG_MAX, disp->acct_pool );
     962     1081614 :       if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
     963             :         /* The acct pool is sized so that the list cannot be empty at this
     964             :            point.  However, the element at the head might be the free
     965             :            map with a different pubkey. */
     966      597945 :         idx = free_dlist_idx_peek_head( disp->free_acct_dlist, disp->acct_pool );
     967      597945 :         ai = disp->acct_pool+idx;
     968             : 
     969             :         /* CACHED -> FREE transition */
     970      597945 :         if( FD_LIKELY( ai->next!=0U ) ) {
     971      164223 :           acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
     972      164223 :         }
     973             : 
     974             :         /* FREE -> ACTIVE transition */
     975      597945 :         ai->key      = *addr;
     976      597945 :         ai->flags    = 0U;
     977      597945 :         ai->last_ref = 0U;
     978      597945 :         ai->ema_refs = 0.0f;
     979      597945 :       } else {
     980             :         /* CACHED -> ACTIVE transition */
     981      483669 :         ai = disp->acct_pool+idx;
     982      483669 :         ai->flags    = 0U; /* FIXME: unnecessary */
     983      483669 :         acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
     984      483669 :       }
     985             :       /* In either case, at this point, the element is not in any map
     986             :          but is in free_acct_dlist.  It has the right key. last_ref, and
     987             :          ema_refs are valid. flags is 0. */
     988     1081614 :       free_dlist_idx_remove( disp->free_acct_dlist, idx, disp->acct_pool );
     989     1081614 :       memset( ai->last_reference, '\0', sizeof(ai->last_reference) );
     990     1081614 :       acct_map_idx_insert( disp->acct_map, idx, disp->acct_pool );
     991     1081614 :     }
     992     2922270 :     ai = disp->acct_pool+idx;
     993             :     /* At this point, in all cases, the acct_info is now in the ACTIVE
     994             :        state.  It's in acct_map, not in free_acct_map, and not in
     995             :        free_acct_dlist. */
     996             : 
     997             :     /* Assume that transactions are drawn randomly from some large
     998             :        distribution of potential transactions.  We want to estimate the
     999             :        expected value of the probability that this transaction conflicts
    1000             :        with the next transaction that is sampled.  Two transactions
    1001             :        conflict if they conflict on any account, and in general, they
    1002             :        conflict if they both reference the same account, unless both
    1003             :        this transaction and the next one only read it.  We don't have
    1004             :        read/write info, so the best guess we have is that the next
    1005             :        transaction does the same thing to the account that this one
    1006             :        does.  That means we only really care about the accounts that
    1007             :        this transaction writes to.  Label those a_1, a_2, ..., a_i, and
    1008             :        suppose the probability that the next transaction references a_i
    1009             :        is p_i (determined using the EMA).  Now then, assuming accounts
    1010             :        are independent (which is false, but whatever), then the
    1011             :        probability that the next transaction does not conflict with this
    1012             :        account is:
    1013             :                      (1-p_1) * (1-p_2) * (1 - p_i).
    1014             : 
    1015             :        Since for the treap, a lower value means we'll schedule it
    1016             :        earlier, we'll use the probability of non-conflict as the
    1017             :        fractional part of the score. */
    1018     2922270 :     if( FD_LIKELY( update_score ) ) {
    1019     2883777 :       float score_change = 1.0f - update_ema( ai, disp->global_insert_cnt );
    1020     2883777 :       ele->score *= fd_float_if( writable, score_change, 1.0f );
    1021     2883777 :     }
    1022             : 
    1023             :     /* Step 2: add edge. There are 4 cases depending on whether this is
    1024             :        a writer or not and whether the previous reference was a writer
    1025             :        or not. */
    1026     2922270 :     int      _ignore;
    1027             : 
    1028     2922270 :     edge_t   ref_to_pa = ai->last_reference[ lane ];
    1029     2922270 :     edge_t   ref_to_me = (uint)((ulong)((ele - disp->pool)<<8) | acct_idx);
    1030     2922270 :     edge_t * pa        = FOLLOW_EDGE( disp->pool, ai->last_reference[ lane ], _ignore );
    1031     2922270 :     edge_t * me        = ele->edges + edge_idx;
    1032             : 
    1033             :     /* In the case that this is the first txn in the DAG, pa will point
    1034             :        to edges[0] of the sentinel element, pool[0].  We don't care
    1035             :        about what is stored there, so just set it up as a dummy element
    1036             :        to make the rest of the code work properly in this case too.  If
    1037             :        this is a read, we want me->sibli to ref_to_me in case 4; if this
    1038             :        is a write, we want to set me->sibli to 0 in case 2, but we need
    1039             :        to make sure pb==pa. */
    1040     2922270 :     disp->pool->edges[0] = (1U<<31) | (uint)idx;
    1041     2922270 :     disp->pool->edges[1] = fd_uint_if( writable, 0U, ref_to_me );
    1042     2922270 :     disp->pool->edges[2] = fd_uint_if( writable, 0U, ref_to_me );
    1043             : 
    1044     2922270 :     int flags = ai->flags;
    1045             : 
    1046     2922270 :     if( writable ) { /* also should be known at compile time */
    1047     1399407 :       if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
    1048             :         /* Case 1: w-w. The parent is the special last pointer.  Point
    1049             :            the parent to me, and set me to the last pointer. */
    1050      271902 :         *me = *pa;
    1051      271902 :         *pa = ref_to_me;
    1052     1127505 :       } else {
    1053             :         /* Case 2: r-w. This is the tricky case because there could be
    1054             :            multiple readers.  We need to set all the last readers' child
    1055             :            pointers to me. */
    1056     1127505 :         *me = *pa;
    1057     1127505 :         *pa = ref_to_me;
    1058     1127505 :         edge_t * pb = FOLLOW_EDGE( disp->pool, pa[1], _ignore );
    1059             :         /* Intentionally skip the first in_degree increment, because it
    1060             :            will be done later */
    1061     1210065 :         while( pb!=pa ) {
    1062       82560 :           *pb = ref_to_me;
    1063       82560 :           ele->in_degree++;
    1064       82560 :           pb = FOLLOW_EDGE( disp->pool, pb[1], _ignore );
    1065       82560 :         }
    1066     1127505 :         flags |= ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) | ACCT_INFO_FLAG_ANY_WRITERS( lane );
    1067     1127505 :       }
    1068     1522863 :     } else {
    1069     1522863 :       if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
    1070             :         /* Case 3: w-r. Similar to case 1, but need to initialize my
    1071             :            next and prev sibling pointers too. */
    1072       98727 :         *me = *pa;
    1073       98727 :         *pa = ref_to_me;
    1074       98727 :         me[1] = ref_to_me; /* next */
    1075       98727 :         me[2] = ref_to_me; /* prev */
    1076       98727 :         flags &= ~(int)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ); /* clear bit */
    1077     1424136 :       } else {
    1078             :         /* Case 4: r-r. Add myself as a sibling instead of a child */
    1079     1424136 :         *me = *pa;
    1080     1424136 :         FOLLOW_EDGE( disp->pool, pa[1], _ignore )[2] = ref_to_me;  /* prev->next->prev = me   */
    1081     1424136 :         me[1] = pa[1];                                             /* me->next   = prev->next */
    1082     1424136 :         me[2] = ref_to_pa;                                         /* me->prev   = prev       */
    1083     1424136 :         pa[1] = ref_to_me;                                         /* prev->next = me         */
    1084     1424136 :       }
    1085     1522863 :     }
    1086             : 
    1087             :     /* Step 3: Update the final values */
    1088             :     /* In general, we want to increment the in_degree unless this
    1089             :        transaction is the first to reference this account.  The
    1090             :        exception is that if this account has only readers, including
    1091             :        this transaction, we don't want to increment the in_degree
    1092             :        either.  At this point, we can tell if that is the case based on
    1093             :        ANY_WRITERS.  */
    1094     2922270 :     ele->in_degree += (uint)((ai->last_reference[ lane ]!=0U) & !!(flags & ACCT_INFO_FLAG_ANY_WRITERS( lane )));
    1095     2922270 :     ai->last_reference[ lane ] = ref_to_me;
    1096     2922270 :     ai->flags                  = (uchar)flags;
    1097     2922270 :     edge_idx += fd_uint_if( writable, 1U, 3U );
    1098     2922270 :     acct_idx++;
    1099     2922270 :   }
    1100             :   /* Can't overflow by construction, except for the intentional overflow on the first time. */
    1101     3315708 :   ele->edge_cnt_etc += (uint)addr_cnt<<fd_int_if( writable, 0, 7 );
    1102     3315708 : }
    1103             : 
    1104             : 
    1105             : /* should be called with all writable accounts first */
    1106             : static void
    1107             : add_unstaged_edges( fd_rdisp_t * disp,
    1108             :                     fd_rdisp_txn_t       * ele,
    1109             :                     fd_rdisp_unstaged_t  * unstaged,
    1110             :                     fd_acct_addr_t const * addr,
    1111             :                     ulong                  addr_cnt,
    1112             :                     int                    writable,
    1113        3816 :                     int                    update_score ) {
    1114        3816 :   ulong base_idx = unstaged->writable_cnt+unstaged->readonly_cnt;
    1115        3816 :   FD_TEST( !writable || unstaged->readonly_cnt==0U );
    1116       80802 :   for( ulong i=0UL; i<addr_cnt; i++ ) {
    1117       76986 :     unstaged->keys[ base_idx+i ] = addr[i];
    1118       76986 :     if( FD_LIKELY( update_score ) ) {
    1119       76986 :       ulong idx = acct_map_idx_query( disp->acct_map, addr+i, ULONG_MAX, disp->acct_pool );
    1120       76986 :       if( FD_UNLIKELY( idx==ULONG_MAX ) ) idx = acct_map_idx_query( disp->free_acct_map, addr+i, ULONG_MAX, disp->acct_pool );
    1121             :       /* since these are unstaged, we don't bother moving accounts
    1122             :          around */
    1123       76986 :       float score_change = 1.0f;
    1124       76986 :       if( FD_LIKELY( idx!=ULONG_MAX ) ) score_change = 1.0f - update_ema( disp->acct_pool+idx, disp->global_insert_cnt );
    1125       76986 :       ele->score *= fd_float_if( writable & update_score, score_change, 1.0f );
    1126       76986 :     }
    1127       76986 :   }
    1128        3816 :   *(fd_ptr_if( writable, &(unstaged->writable_cnt), &(unstaged->readonly_cnt) ) ) += (uint)addr_cnt;
    1129        3816 : }
    1130             : 
    1131             : ulong
    1132             : fd_rdisp_add_txn( fd_rdisp_t          *  disp,
    1133             :                   FD_RDISP_BLOCK_TAG_T   insert_block,
    1134             :                   fd_txn_t const       * txn,
    1135             :                   uchar const          * payload,
    1136             :                   fd_acct_addr_t const * alts,
    1137      553152 :                   int                    serializing ) {
    1138             : 
    1139      553152 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &insert_block, NULL, disp->block_pool );
    1140      553152 :   if( FD_UNLIKELY( !block || !block->insert_ready ) ) return 0UL;
    1141      553149 :   if( FD_UNLIKELY( !pool_free( disp->pool       ) ) ) return 0UL;
    1142             : 
    1143      553149 :   ulong idx = pool_idx_acquire( disp->pool );
    1144      553149 :   fd_rdisp_txn_t * rtxn = disp->pool + idx;
    1145      553149 :   if( FD_UNLIKELY( rtxn->in_degree!=IN_DEGREE_FREE ) ) FD_LOG_CRIT(( "pool[%lu].in_degree==%u but free", idx, rtxn->in_degree ));
    1146             : 
    1147      553149 :   fd_acct_addr_t const * imm_addrs = fd_txn_get_acct_addrs( txn, payload );
    1148             : 
    1149      553149 :   if( FD_UNLIKELY( !block->staged ) ) {
    1150         636 :     rtxn->in_degree = IN_DEGREE_UNSTAGED;
    1151         636 :     rtxn->score     = 0.999f;
    1152             : 
    1153         636 :     fd_rdisp_unstaged_t * unstaged = disp->unstaged + idx;
    1154         636 :     unstaged->block = insert_block;
    1155         636 :     unstaged->writable_cnt = 0U;
    1156         636 :     unstaged->readonly_cnt = 0U;
    1157             : 
    1158         636 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs,
    1159         636 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER        ), 1, 1 );
    1160         636 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
    1161         636 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), 1, 1 );
    1162         636 :     if( FD_LIKELY( alts ) )
    1163         636 :       add_unstaged_edges( disp, rtxn, unstaged, alts,
    1164         636 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),           1, 1 );
    1165         636 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
    1166         636 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER        ), 0, 1 );
    1167         636 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
    1168         636 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), 0, 1 );
    1169         636 :     if( FD_LIKELY( alts ) )
    1170         636 :       add_unstaged_edges( disp, rtxn, unstaged, alts   +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
    1171         636 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ),           0, 1 );
    1172             : 
    1173         636 :     unstaged_txn_ll_ele_push_tail( block->ll, rtxn, disp->pool );
    1174      552513 :   } else {
    1175      552513 :     uint lane = block->staging_lane;
    1176             : 
    1177      552513 :     rtxn->in_degree    = 0U;
    1178      552513 :     rtxn->score        = 0.999f;
    1179             :     /* There's not a good way to initialize w_cnt_1 to -1, which is what
    1180             :        it should be at this point, but this is close.  We must be sure
    1181             :        to add at least one writer (which we are assured of because we
    1182             :        know the transaction passed fd_txn_parse) to correct this value. */
    1183      552513 :     rtxn->edge_cnt_etc = ((block->linear_block_number<<16) | (lane<<14)) - 1U;
    1184             : 
    1185      552513 :     add_edges( disp, rtxn, imm_addrs,
    1186      552513 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER        ), lane, 1, 1 );
    1187      552513 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
    1188      552513 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), lane, 1, 1 );
    1189      552513 :     if( FD_LIKELY( alts ) )
    1190      552510 :       add_edges( disp, rtxn, alts,
    1191      552510 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),           lane, 1, 1 );
    1192      552513 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
    1193      552513 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER        ), lane, 0, 1 );
    1194      552513 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
    1195      552513 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), lane, 0, 1 );
    1196      552513 :     if( FD_LIKELY( alts ) )
    1197      552510 :       add_edges( disp, rtxn, alts   +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
    1198      552510 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ),           lane, 0, 1 );
    1199      552513 :   }
    1200             : 
    1201      553149 :   if( FD_UNLIKELY( serializing | block->last_insert_was_serializing ) ) {
    1202           6 :     block->last_serializing = block->inserted_cnt;
    1203           6 :   }
    1204      553149 :   block->last_insert_was_serializing = (uint)!!serializing;
    1205      553149 :   rtxn->score += (float)block->last_serializing;
    1206             : 
    1207      553149 :   block->inserted_cnt++;
    1208      553149 :   disp->global_insert_cnt++;
    1209             : 
    1210      553149 :   if( FD_LIKELY( (block->staged) & (rtxn->in_degree==0U) ) ) {
    1211      434178 :     pending_prq_ele_t temp[1] = {{ .score = rtxn->score, .linear_block_number = block->linear_block_number, .txn_idx = (uint)idx }};
    1212      434178 :     pending_prq_insert( disp->lanes[ block->staging_lane ].pending, temp );
    1213      434178 :   }
    1214             : 
    1215      553149 :   return idx;
    1216      553149 : }
    1217             : 
    1218             : ulong
    1219             : fd_rdisp_get_next_ready( fd_rdisp_t           * disp,
    1220      725691 :                          FD_RDISP_BLOCK_TAG_T   schedule_block ) {
    1221      725691 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &schedule_block, NULL, disp->block_pool );
    1222      725691 :   if( FD_UNLIKELY( !block || !block->schedule_ready ) ) return 0UL;
    1223             : 
    1224      725676 :   ulong idx;
    1225      725676 :   if( FD_LIKELY( block->staged ) ) {
    1226      725049 :     ulong staging_lane = block->staging_lane;
    1227      725049 :     per_lane_info_t * l = disp->lanes + staging_lane;
    1228             : 
    1229      725049 :     if( FD_UNLIKELY( !pending_prq_cnt( l->pending )                                ) ) return 0UL;
    1230      552837 :     if( FD_UNLIKELY( l->pending->linear_block_number != block->linear_block_number ) ) return 0UL;
    1231             :     /* e.g. when completed_cnt==0, we can accept any score below 1.0 */
    1232      552831 :     if( FD_UNLIKELY( l->pending->score>=(float)(block->completed_cnt+1U)           ) ) return 0UL;
    1233      552831 :     idx = l->pending->txn_idx;
    1234      552831 :     pending_prq_remove_min( l->pending );
    1235      552831 :     disp->pool[ idx ].in_degree = IN_DEGREE_DISPATCHED;
    1236      552831 :   } else {
    1237         627 :     if( FD_UNLIKELY( block->dispatched_cnt!=block->completed_cnt       ) ) return 0UL;
    1238         324 :     if( FD_UNLIKELY( unstaged_txn_ll_is_empty( block->ll, disp->pool ) ) ) return 0UL;
    1239         318 :     idx = unstaged_txn_ll_idx_peek_head( block->ll, disp->pool );
    1240         318 :     disp->pool[ idx ].in_degree = IN_DEGREE_UNSTAGED_DISPATCHED;
    1241         318 :   }
    1242      553149 :   block->dispatched_cnt++;
    1243             : 
    1244      553149 :   return idx;
    1245      725676 : }
    1246             : 
    1247             : void
    1248             : fd_rdisp_complete_txn( fd_rdisp_t * disp,
    1249             :                        ulong        txn_idx,
    1250      553152 :                        int          reclaim ) {
    1251             : 
    1252      553152 :   fd_rdisp_txn_t * rtxn = disp->pool + txn_idx;
    1253      553152 :   fd_rdisp_blockinfo_t * block = NULL;
    1254             : 
    1255      553152 :   if( FD_UNLIKELY( rtxn->in_degree==IN_DEGREE_UNSTAGED_DISPATCHED ) ) {
    1256             :     /* Unstaged */
    1257         318 :     block = block_map_ele_query( disp->blockmap, &disp->unstaged[ txn_idx ].block, NULL, disp->block_pool );
    1258         318 :     FD_TEST( rtxn==unstaged_txn_ll_ele_peek_head( block->ll, disp->pool ) );
    1259         318 :     unstaged_txn_ll_ele_pop_head( block->ll, disp->pool );
    1260         318 :     block->completed_cnt++;
    1261      552834 :   } else if( FD_LIKELY( rtxn->in_degree==IN_DEGREE_DISPATCHED ) ) {
    1262             :     /* Staged */
    1263      552831 :     ulong w_cnt_1 = (rtxn->edge_cnt_etc    ) & 0x7FU;
    1264      552831 :     ulong r_cnt   = (rtxn->edge_cnt_etc>> 7) & 0x7FU;
    1265      552831 :     ulong lane    = (rtxn->edge_cnt_etc>>14) & 0x3U;
    1266      552831 :     uint  tail_linear_block_num = (uint)(disp->lanes[lane].linear_block_number);
    1267      552831 :     ulong edge_idx = 0UL;
    1268     3475101 :     for( ulong i=0UL; i<=w_cnt_1+r_cnt; i++ ) {
    1269     2922270 :       edge_t const * e = rtxn->edges+edge_idx;
    1270     2922270 :       edge_t const  e0 = *e;
    1271     2922270 :       edge_t ref_to_me = (uint)((txn_idx<<8) | i);
    1272             : 
    1273             :       /* To help with explanations, consider the following DAG:
    1274             : 
    1275             :            --> B --\   --> E --\
    1276             :           /         V /         V
    1277             :          A           D         (G)
    1278             :           \         ^ \         ^
    1279             :            --> C --/   --> F --/     */
    1280             : 
    1281     2922270 :       if( FD_UNLIKELY( EDGE_IS_LAST( e0 ) ) ) {
    1282     2351685 :         ulong acct_idx = e0 & 0x7FFFFFFFU;
    1283     2351685 :         acct_info_t * ai = disp->acct_pool + acct_idx;
    1284             :         /* If this is a writer, e.g. node G above, we know it's the last
    1285             :            one, so we can clear last_reference.
    1286             :            If this is a reader, e.g. node E and F above, if node G
    1287             :            didn't exist, we need to check if it's the last one
    1288             :            (me==me->next).  If so, we can clear last_reference.  If not,
    1289             :            we need to delete this node from the linked list */
    1290     2351685 :         if( edge_idx<=w_cnt_1 || e[1]==ref_to_me ) {
    1291     1525917 :           ai->last_reference[ lane ] = 0U;
    1292     1525917 :           ai->flags = (uchar)(ai->flags & (~(ACCT_INFO_FLAG_ANY_WRITERS( lane ) | ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ))));
    1293     1525917 :         } else {
    1294      825768 :           int _ignore;
    1295      825768 :           FOLLOW_EDGE( disp->pool, e[1], _ignore )[2] = e[2];  /* me->next->prev = me->prev */
    1296      825768 :           FOLLOW_EDGE( disp->pool, e[2], _ignore )[1] = e[1];  /* me->prev->next = me->next */
    1297      825768 :           ai->last_reference[ lane ]= fd_uint_if( ai->last_reference[ lane ]==ref_to_me, e[1], ai->last_reference[ lane ] );
    1298      825768 :         }
    1299             : 
    1300             :         /* Potentially transition from ACTIVE -> CACHED */
    1301     2351685 :         if( FD_UNLIKELY( (ai->last_reference[ 0 ]==0U)&(ai->last_reference[ 1 ]==0U)&
    1302     2351685 :                          (ai->last_reference[ 2 ]==0U)&(ai->last_reference[ 3 ]==0U) ) ) {
    1303     1081614 :           ai->flags = 0;
    1304     1081614 :           acct_map_idx_remove_fast( disp->acct_map,        acct_idx, disp->acct_pool );
    1305     1081614 :           acct_map_idx_insert     ( disp->free_acct_map,   acct_idx, disp->acct_pool );
    1306     1081614 :           free_dlist_idx_push_tail( disp->free_acct_dlist, acct_idx, disp->acct_pool );
    1307     1081614 :         }
    1308     2351685 :       } else {
    1309      570585 :         int child_is_writer;
    1310             : 
    1311      570585 :         edge_t next_e = e0;
    1312      570585 :         edge_t const * child_edge;
    1313      616599 :         while( 1 ) {
    1314             :           /* This loop first traverses the me->child link, and then
    1315             :              traverses any sibling links.  For example, in the case that
    1316             :              we're completing node D above, the first child_txn is E,
    1317             :              and the second child_txn is F. */
    1318      616599 :           /*            */ child_edge = FOLLOW_EDGE(     disp->pool, next_e, child_is_writer );
    1319      616599 :           fd_rdisp_txn_t * child_txn  = FOLLOW_EDGE_TXN( disp->pool, next_e                  );
    1320             : 
    1321             :           /* Sanity test */
    1322      616599 :           FD_TEST( child_txn->in_degree>0U                   );
    1323      616599 :           FD_TEST( child_txn->in_degree<IN_DEGREE_DISPATCHED );
    1324             : 
    1325      616599 :           if( FD_UNLIKELY( 0U==(--(child_txn->in_degree)) ) ) {
    1326             :             /* We need an operation something like
    1327             :                fd_frag_meta_ts_decomp. child_txn has the low 16 bits,
    1328             :                and tail_linear_block_num has the full 32 bits, except
    1329             :                for tail_linear_block_num refers to a block < block_depth
    1330             :                later.  Since block_depth<2^16, that means we can resolve
    1331             :                this unambiguously.  Basically, we copy the high 16 bits
    1332             :                frorm tail_linear_block_num unless that would make
    1333             :                linear_block_num larger than tail_linear_block_num, in
    1334             :                which case, we subtract 2^16. */
    1335      118647 :             uint low_16_bits = child_txn->edge_cnt_etc>>16;
    1336      118647 :             uint linear_block_num = ((tail_linear_block_num & ~0xFFFFU) | low_16_bits) - (uint)((low_16_bits>(tail_linear_block_num&0xFFFFU))<<16);
    1337      118647 :             pending_prq_ele_t temp[1] = {{ .score               = child_txn->score,
    1338      118647 :                                            .linear_block_number = linear_block_num,
    1339      118647 :                                            .txn_idx             = (uint)(child_txn-disp->pool) }};
    1340      118647 :             pending_prq_insert( disp->lanes[ lane ].pending, temp );
    1341      118647 :           }
    1342      616599 :           if( child_is_writer || child_edge[1]==e0 ) break;
    1343       46014 :           next_e = child_edge[1];
    1344       46014 :         }
    1345             :         /* In the case that the completed transaction is a reader, say B
    1346             :            or C above, it seems like we should need to remove it from
    1347             :            the doubly linked list, but we actually don't.  The times
    1348             :            that we need to read the sibling pointers are:
    1349             :             1. Completing the writer before a reader (e.g. completing A)
    1350             :             2. Completing a reader that's the last in the DAG (e.g.
    1351             :                completing E/F if G didn't exist)
    1352             :             3. Adding another reader to the same set of readers (e.g. if
    1353             :                G were added as a reader instead of a writer)
    1354             :             4. Adding a writer after a set of readers (e.g. adding G).
    1355             : 
    1356             :            Supposing that the completed transaction is a reader, since
    1357             :            we checked EDGE_IS_LAST( e0 ), we know that there is at least
    1358             :            one writer that follows this reader, e.g. D above.
    1359             : 
    1360             :            And so none of these reason can apply to this group of readers:
    1361             :             1. Completing B or C implies that A has already completed,
    1362             :                so it can't complete again.
    1363             :             2. We know that we're not in that case because we checked
    1364             :                EDGE_IS_LAST( e0 ), and if we're not last now, we cannot
    1365             :                become last later, because the growth happens in the
    1366             :                other direction.
    1367             :             3. Similarly, because we checked EDGE_IS_LAST, any future
    1368             :                additions of readers won't be to this group of readers.
    1369             :             4. Similarly, we know that there already is a writer that
    1370             :                follows this group of readers, so a later writer would
    1371             :                not read this set of readers.
    1372             : 
    1373             :            So then, we don't need to deal with the sibling edges.  The
    1374             :            fact that we only need to do it in one case almost calls into
    1375             :            question whether we need to maintain the whole circular
    1376             :            system in the first place, and whether we could get away with
    1377             :            a reference count or something instead, but it's important in
    1378             :            one critical case: suppose we add a bunch of readers, then
    1379             :            some of them complete, and then we add a writer (case 4
    1380             :            above).  We need to be able to enumerate the nodes in the DAG
    1381             :            that have not yet completed, and we need to be able to remove
    1382             :            them from that set in O(1).  Those two requirements don't
    1383             :            leave us with many alternatives besides a doubly linked list. */
    1384             : 
    1385      570585 :         if( FD_UNLIKELY( EDGE_IS_LAST( *child_edge ) ) ) {
    1386             :           /* For example, either:
    1387             :              1. completing D when if G didn't exist
    1388             :              2. completing E or F with G in the DAG
    1389             : 
    1390             :              After completing this transaction, there's either 0 writers
    1391             :              left (case 1) or 1 writer left (case 2) in the DAG. and if
    1392             :              there is one, it is the last reference.
    1393             : 
    1394             :              Either way, we want to set ANY_WRITERS to
    1395             :              LAST_REF_WAS_WRITE. */
    1396      393645 :           acct_info_t * ai = disp->acct_pool + (*child_edge & 0x7FFFFFFFU);
    1397      393645 :           ulong flags = ai->flags;
    1398      393645 :           flags &= ~(ulong)ACCT_INFO_FLAG_ANY_WRITERS( lane );
    1399      393645 :           flags |= (flags & (ulong)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ))<<1;
    1400      393645 :           ai->flags = (uchar)flags;
    1401      393645 :         }
    1402      570585 :       }
    1403     2922270 :       edge_idx += fd_ulong_if( i<=w_cnt_1, 1UL, 3UL );
    1404     2922270 :     }
    1405      552831 :     block = block_slist_ele_peek_head( disp->lanes[ lane ].block_ll, disp->block_pool );
    1406      552831 :     block->completed_cnt++;
    1407      552831 :   } else if( FD_LIKELY( rtxn->in_degree==IN_DEGREE_ZOMBIE ) ) {
    1408           3 :     FD_TEST( reclaim );
    1409           3 :     block = block_pool_ele( disp->block_pool, rtxn->block_idx );
    1410           3 :     zombie_dlist_ele_remove( block->zombie_list, rtxn, disp->pool );
    1411             :     /* Fall through to the pool release branch below. */
    1412           3 :   } else {
    1413           0 :     FD_LOG_CRIT(( "completed un-dispatched transaction %lu", txn_idx ));
    1414           0 :   }
    1415      553152 :   if( reclaim ) {
    1416             :     /* For testing purposes, to make sure we don't read a completed
    1417             :        transaction, we can clobber the memory. */
    1418             :     /* memset( disp->pool+txn_idx, '\xCC', sizeof(fd_rdisp_txn_t) ); */
    1419      553149 :     rtxn->in_degree = IN_DEGREE_FREE;
    1420      553149 :     pool_idx_release( disp->pool, txn_idx );
    1421      553149 :   } else {
    1422           3 :     rtxn->in_degree = IN_DEGREE_ZOMBIE;
    1423           3 :     zombie_dlist_ele_push_tail( block->zombie_list, rtxn, disp->pool );
    1424           3 :     rtxn->block_idx = (uint)block_pool_idx( disp->block_pool, block );
    1425           3 :   }
    1426      553152 : }
    1427             : 
    1428             : 
    1429             : ulong
    1430             : fd_rdisp_staging_lane_info( fd_rdisp_t           const * disp,
    1431          45 :                             fd_rdisp_staging_lane_info_t out_sched[ static 4 ] ) {
    1432         225 :   for( ulong i=0UL; i<4UL; i++ ) {
    1433         180 :     if( !(disp->free_lanes & (1<<i) ) ) {
    1434          66 :       block_slist_t const * sl = disp->lanes[ i ].block_ll;
    1435          66 :       out_sched[ i ].insert_ready_block   = block_slist_ele_peek_tail( sl, disp->block_pool )->block;
    1436          66 :       out_sched[ i ].schedule_ready_block = block_slist_ele_peek_head( sl, disp->block_pool )->block;
    1437          66 :     }
    1438         180 :   }
    1439          45 :   return 0xFUL & ~(ulong)disp->free_lanes;
    1440          45 : }
    1441             : 
    1442             : void
    1443             : fd_rdisp_verify( fd_rdisp_t const * disp,
    1444      155454 :                  uint             * scratch ) {
    1445      155454 :   ulong acct_depth  = disp->depth*MAX_ACCT_PER_TXN;
    1446      155454 :   ulong block_depth = disp->block_depth;
    1447      155454 :   FD_TEST( 0==acct_map_verify ( disp->acct_map,      acct_depth+1UL,  disp->acct_pool ) );
    1448      155454 :   FD_TEST( 0==acct_map_verify ( disp->free_acct_map, acct_depth+1UL,  disp->acct_pool ) );
    1449      155454 :   FD_TEST( 0==block_map_verify( disp->blockmap,     block_depth+1UL, disp->block_pool ) );
    1450             : 
    1451             :   /* Check all the in degree counts are right */
    1452      155454 :   memset( scratch, '\0', sizeof(uint)*(disp->depth+1UL) );
    1453      155454 :   scratch[ 0 ] = UINT_MAX;
    1454    46783854 :   for( ulong j=1UL; j<disp->depth+1UL; j++ ) {
    1455    46628400 :     fd_rdisp_txn_t const * rtxn = disp->pool+j;
    1456    46628400 :     if( rtxn->in_degree==IN_DEGREE_FREE ) { scratch[ j ]=UINT_MAX; continue; }
    1457             : 
    1458    11538054 :     if( (rtxn->in_degree==IN_DEGREE_UNSTAGED_DISPATCHED) |
    1459    11538054 :         (rtxn->in_degree==IN_DEGREE_UNSTAGED)            |
    1460    11538054 :         (rtxn->in_degree==IN_DEGREE_ZOMBIE) ) continue;
    1461             : 
    1462    11537427 :     ulong w_cnt_1 = (rtxn->edge_cnt_etc    ) & 0x7FU;
    1463    11537427 :     ulong r_cnt   = (rtxn->edge_cnt_etc>> 7) & 0x7FU;
    1464    11537427 :     ulong edge_idx = 0UL;
    1465             : 
    1466   155631774 :     for( ulong i=0UL; i<=w_cnt_1+r_cnt; i++ ) {
    1467   144094347 :       edge_t const * e = rtxn->edges+edge_idx;
    1468   144094347 :       edge_t const  e0 = *e;
    1469             : 
    1470   144094347 :       edge_idx += fd_ulong_if( i<=w_cnt_1, 1UL, 3UL );
    1471             : 
    1472   144094347 :       if( FD_UNLIKELY( EDGE_IS_LAST( e0 ) ) ) continue;
    1473             : 
    1474    29105160 :       edge_t next_e = e0;
    1475    29105160 :       edge_t const * child_edge;
    1476    29105160 :       edge_t last_e = 0U;
    1477    30354588 :       while( 1 ) {
    1478    30354588 :         int child_is_writer;
    1479             :         /* This loop first traverses the me->child link, and then
    1480             :            traverses any sibling links.  For example, in the case that
    1481             :            we're completing node D above, the first child_txn is E,
    1482             :            and the second child_txn is F. */
    1483    30354588 :         /*            */ child_edge = FOLLOW_EDGE(     disp->pool, next_e, child_is_writer );
    1484    30354588 :         fd_rdisp_txn_t * child_txn  = FOLLOW_EDGE_TXN( disp->pool, next_e                  );
    1485    30354588 :         scratch[ child_txn - disp->pool ]++;
    1486    30354588 :         if( child_is_writer || child_edge[1]==e0 ) break;
    1487     1249428 :         if( last_e!=0U ) FD_TEST( child_edge[2]==last_e );
    1488     1249428 :         last_e = next_e;
    1489     1249428 :         next_e = child_edge[1];
    1490     1249428 :         FD_TEST( next_e>=0x100U );
    1491     1249428 :       }
    1492    29105160 :     }
    1493    11537427 :   }
    1494    46783854 :   for( ulong i=1UL; i<disp->depth+1UL; i++ ) {
    1495    46628400 :     FD_TEST( scratch[ i ]==UINT_MAX ||
    1496    46628400 :              disp->pool[ i ].in_degree==IN_DEGREE_DISPATCHED ||
    1497    46628400 :              disp->pool[ i ].in_degree==IN_DEGREE_UNSTAGED ||
    1498    46628400 :              disp->pool[ i ].in_degree==IN_DEGREE_UNSTAGED_DISPATCHED ||
    1499    46628400 :              disp->pool[ i ].in_degree==IN_DEGREE_ZOMBIE ||
    1500    46628400 :              disp->pool[ i ].in_degree==scratch[ i ] );
    1501    46628400 :   }
    1502      155454 : }
    1503             : 
    1504           9 : void * fd_rdisp_leave ( fd_rdisp_t * disp ) { return disp; }
    1505           9 : void * fd_rdisp_delete( void * mem        ) { return  mem; }

Generated by: LCOV version 1.14