LCOV - code coverage report
Current view: top level - discof/replay - fd_rdisp.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 541 562 96.3 %
Date: 2025-09-19 04:41:14 Functions: 21 23 91.3 %

          Line data    Source code
       1             : #include "fd_rdisp.h"
       2             : #include <math.h> /* for the EMA */
       3             : 
       4             : /* The conflict graph that this file builds is not a general DAG, but
       5             :    the union of several special account-conflict graphs.  Each
       6             :    account-conflict graph has special structure:
       7             :                            ---> 3 --
       8             :                           /          \
       9             :                          /            v
      10             :                1  ---> 2 -----> 4 --> 6 ----> 7
      11             :                         \             ^
      12             :                          \           /
      13             :                           ----> 5 --
      14             : 
      15             :    That is, the graph is almost a line, but may have fan-out and fan-in
      16             :    regions.  The tricky part about representing a graph like this
      17             :    without dynamic memory allocation is that nodes may have arbitrary
      18             :    in-degree and out-degree.  Thus, we use a pretty standard trick and
      19             :    use sibling pointers, denoted below with dotted lines.  Each node
      20             :    maintains at most one successor pointer and at most one sibling
      21             :    pointer.  Although not shown below, the sibling pointers are
      22             :    circularly linked, so 5's sibling is 3.  Additionally, though not
      23             :    shown below, the child of the last node (7) stores information about
      24             :    what account address all this is for, which facilitates deleting the
      25             :    last node in the graph.
      26             : 
      27             : 
      28             :                               --> 3 --
      29             :                              /    ^   \
      30             :                             /     :    \
      31             :                            /      V     v
      32             :                  1  ---> 2        4 --> 6 ----> 7
      33             :                                   ^     ^
      34             :                                   :    /
      35             :                                   V   /
      36             :                                   5 --
      37             : 
      38             :    The normal edge 2->3 along with the sibling edge 3..>4 implies a
      39             :    normal edge 2->4.  That then transitively implies an edge 2->5.
      40             : 
      41             :    We want each node to maintain a count of its in-degree so that we
      42             :    know when it can be executed.  The implied edges also count for the
      43             :    in-degree.  In this example, node 1 has in-degree 0, node 6 has
      44             :    in-degree 3, and the rest have in-degree 1.
      45             : 
      46             :    Maintaining each account-conflict graph is relatively easy given the
      47             :    operations we want to support.  Only the details about sibling edges
      48             :    are worth mentioning.  For example, when deleting node 2, we
      49             :    decrement the in-degree count for its successor, and then follow the
      50             :    sibling pointers, decrementing all the in-degree counts as we go to
      51             :    mark the deletion of the implied edges.  Sibling edges also need to
      52             :    be doubly linked, so that e.g. nodes 3 and 5 can be re-linked in O(1)
      53             :    if node 4 is deleted.  They're also circularly linked as well for
      54             :    convenience.
      55             : 
      56             :    When building the graph, we maintain a map of account to the last
      57             :    node that references it, whether that was a read or write, and
      58             :    whether there are any writers to that account in the graph right now.
      59             :    If the new node reads from an account that was last read, the new
      60             :    node becomes a sibling of the last read, with in-degree increased if
      61             :    there are any writers.  Otherwise, it becomes a successor of the node
      62             :    that last referenced the account. */
      63             : 
      64             : /* For a task like this with lots of graph traversal and pointer
      65             :    chasing, performance is typically limited by memory latency.  That
      66             :    means that the more that can fit in cache, the better the
      67             :    performance.  This implementation uses a lot of bit-packing to
      68             :    improve cache footprint. */
      69             : 
      70             : /* The following structs are all very local to this compilation unit,
      71             :    and so they don't have globally acceptable type names (e.g.
      72             :    fd_rdisp_edge_t). */
      73             : 
      74             : /* Everything is set up to allow this to be 128, but we can save the
      75             :    space until it's necessary. */
      76      573531 : #define MAX_ACCT_PER_TXN 64UL
      77             : 
      78             : /* edge_t: Fields typed edge_t represent an edge in one of the parallel
      79             :    account-conflict DAGs.  Each transaction stores a list of all its
      80             :    outgoing edges.  The type is actually a union of bitfield, but C
      81             :    bitfields are gross, so we just do it manually with macros.  If the
      82             :    high bit is set, that means the transaction storing this edge_t value
      83             :    is it the last in this specific DAG, and the lower 31 bits of the
      84             :    value are an index in the map_pool for the account pubkey for this
      85             :    DAG.  See the comments about the hidden edge outgoing from node 7 in
      86             :    the DAG at the top of this file for an example.
      87             :    If the high bit is not set, then the next 23 bits store the
      88             :    destination of the edge, represented by its index in the pool of
      89             :    transactions.  Then the lowest 8 bits store the account index within
      90             :    that transaction of the edge that is part of the same
      91             :    account-specific DAG as this edge.  Because the max depth is 2^23-1,
      92             :    and each transaction can reference 128 accounts, the max accounts
      93             :    that can be referenced fits in 30 bits.
      94             :    The proper type would be something like
      95             :    typedef union {
      96             :      struct {
      97             :        uint is_last:1;
      98             :        uint map_pool_idx:31;
      99             :      } last;
     100             :      struct {
     101             :        uint is_last:1;
     102             :        uint txn_idx:23;
     103             :        uint acct_idx:8;
     104             :      } e;
     105             :    } edge_t;
     106             : 
     107             :    */
     108             : typedef uint edge_t;
     109             : 
     110             : 
     111             : /* txn_node_t is the representation of a transaction as a node in the
     112             :    DAG. */
     113             : struct fd_rdisp_txn {
     114             :   /* in_degree: in the worst case, all the other transactions in the
     115             :      pool read from each of the max number of accounts that this
     116             :      transaction writes to,  so there are MAX_ACCT_LOCKS*depth edges
     117             :      that come into this node, which is less than UINT_MAX.  If
     118             :      in_degree == UINT_MAX, it means the transaction is not staged. */
     119             :   uint    in_degree;
     120             : 
     121             :   /* score: integer part stores how many transactions in the block must
     122             :      have completed before this transaction can be scheduled.  This is
     123             :      useful for transactions marked as serializing.  The fractional part
     124             :      gives some measure of how urgent the transaction is, where lower is
     125             :      more urgent.  This means we can't have more transactions in a block
     126             :      marked as serializing than the first integer that a float cannot
     127             :      represent.  That value is about 16M, which is much higher than the
     128             :      maximum number of transactions in a block, so this is not a
     129             :      problem.  If in the very rare case that there are more than just a
     130             :      few serializing transactions, we will lose a bit of precision for
     131             :      the fractional portion, which makes total sense; if there's not
     132             :      much room for parallel execution, having the optimal parallel
     133             :      execution is not very important. */
     134             :   float   score;
     135             : 
     136             :   /* edge_cnt_etc:
     137             :      0xFFFF0000 (16 bits) for linear block number,
     138             :      0x0000C000 (2 bits) for concurrency lane,
     139             :      0x00003F80 (7 bits) for r_cnt
     140             :      0x0000007F (7 bits) for w_cnt */
     141             :   union {
     142             :     uint edge_cnt_etc;
     143             :     /* edge_cnt_etc is only used when the transaction is STAGED and
     144             :        PENDING, READY, or DISPATCHED.  If UNSTAGED or FREE, the next
     145             :        pointer is also here.  It can't be UNSTAGED and FREE at the same
     146             :        time, so there's no conflict with storage there either. */
     147             :     uint unstaged_next;
     148             :     uint free_next;
     149             :   };
     150             : 
     151             : 
     152             :   /* When a transaction writes to an account, it only creates one
     153             :      link.  When a transaction reads from an account, we need the full
     154             :      doubly linked list with its siblings, so it creates 3 edges
     155             :      (child, next, prev).  All edges from writable accounts come first,
     156             :      and we keep track of how many there are.  In the worst case, all
     157             :      accounts are reads, so we size it appropriately. */
     158             :   edge_t edges[3UL*MAX_ACCT_PER_TXN]; /* addressed [0, w_cnt+3*r_cnt) */
     159             : };
     160             : typedef struct fd_rdisp_txn fd_rdisp_txn_t;
     161             : 
     162             : #define EDGE_IS_LAST(x) ((x)&0x80000000U)
     163             : 
     164             : /* Two more definitions:
     165             :    An edge index is an array position within the edges array.  An
     166             :    account index is a position within a transaction's account addresses,
     167             :    reordered so that the writable ones come first.  Since writable
     168             :    accounts use one position in the edges array per account address,
     169             :    these often coincide. */
     170             : 
     171             : 
     172             : /* FOLLOW_EDGE and FOLLOW_EDGE_TXN are helper macros for dealing with
     173             :    edges.  Given an edge_t x, and transaction pool base, FOLLOW_EDGE_TXN
     174             :    returns a pointer to transaction that the edge points to; FOLLOW_EDGE
     175             :    returns a pointer to the (first) edge_t within that transaction that
     176             :    is part of the same DAG as this edge.  FOLLOW_EDGE and
     177             :    FOLLOW_EDGE_TXN must not be called if EDGE_IS_LAST is non-zero.
     178             : 
     179             :    Then the edge index, i.e. the position in the edges array of the
     180             :    (first) edge for an account index is:
     181             :           acct_idx                      if acct_idx<w_cnt
     182             :           w_cnt + 3*(acct_idx-w_cnt)    else.
     183             :   Simplifying gives
     184             :          acct_idx + 2*signed_max( 0, acct_idx-w_cnt ).
     185             :   In doing this calculation, we also basically get for free whether the
     186             :   edge is for a writable account or a readonly account, so we return
     187             :   that as well via the w parameter.  If the child transaction only reads
     188             :   the account address for this DAG, then the next and prev pointers can
     189             :   be accessed using the returned value +1 and +2, respectively. */
     190    19169121 : #define FOLLOW_EDGE(base, x, w) (__extension__({                                 \
     191    19169121 :         uint __e = (x);                                                          \
     192    19169121 :         fd_rdisp_txn_t * __txn = ((base)+(__e>>8));                              \
     193    19169121 :         uint __wcnt = __txn->edge_cnt_etc & 0x7FU;                               \
     194    19169121 :         uint __idx  = (__e & 0xFFU);                                             \
     195    19169121 :         (w) = __idx<__wcnt;                                                      \
     196    19169121 :         (void)(w);  /* not robust... */                                          \
     197    19169121 :         __txn->edges + __idx + 2*fd_int_max( 0, (int)(__idx)-(int)(__wcnt) ); }))
     198     3352758 : #define FOLLOW_EDGE_TXN(base, x) ( (base)+((x)>>8) )
     199             : 
     200             : /* The pool and slist are almost the same, but they are used
     201             :    differently, so keep them as different structures for now. */
     202             : 
     203             : #define POOL_NAME     pool
     204          12 : #define POOL_T        fd_rdisp_txn_t
     205             : #define POOL_IDX_T    uint
     206     1163178 : #define POOL_NEXT     free_next
     207             : #define POOL_SENTINEL 1
     208             : #include "../../util/tmpl/fd_pool.c"
     209             : 
     210             : #define SLIST_NAME unstaged_txn_ll
     211             : #define SLIST_ELE_T fd_rdisp_txn_t
     212          18 : #define SLIST_IDX_T uint
     213          72 : #define SLIST_NEXT  unstaged_next
     214             : #include "../../util/tmpl/fd_slist.c"
     215             : 
     216             : 
     217             : /* ACCT_INFO_FLAG: It's a bit unfortunate that we have to maintain these
     218             :    flags, but basically we need to be able to distinguish the case where
     219             :    there are only readers so that we don't increment in_degree when
     220             :    adding a new txn_node.  If we have any writers, the only way to
     221             :    transition into a state where there are only readers is to complete
     222             :    the last writer.  We know we are in this case when the completed
     223             :    node's child doesn't have a child, and the completed node's child is
     224             :    a reader, as indicated by the LAST_REF_WAS_WRITE bit.
     225             :    LAST_REFERENCE_WAS_WRITE also has the advantage of being easy to
     226             :    maintain. */
     227    10952166 : #define ACCT_INFO_FLAG_LAST_REF_WAS_WRITE(lane) (((uchar)1)<<(2*(lane)))
     228    10444404 : #define ACCT_INFO_FLAG_ANY_WRITERS(       lane) (((uchar)2)<<(2*(lane)))
     229             : 
     230             : /* acct_info_t is a node in a map_chain that contains the metadata for a
     231             :    single account address's conflict graph DAG.  In particular, it
     232             :    contains the information needed to know where to insert a node that
     233             :    reads from or writes to the account.  The objects of this type follow
     234             :    this state machine:
     235             : 
     236             :                  FREE  -----> ACTIVE ----> CACHED
     237             :                                 ^            |
     238             :                                 |-------------
     239             :    When FREE, it is in free_acct_dlist only.  When ACTIVE, it is in
     240             :    acct_map only.  When CACHED, it is in both free_acct_dlist and
     241             :    cached_acct_map.
     242             : */
     243             : struct acct_info {
     244             :   /* key, next, and prev are the map_chain fields. Used in the ACTIVE
     245             :      and CACHED states.  next and prev set to 0 when in the FREE state.
     246             :      Element 0 is a sentinel and isn't inserted to the free_acct_dlist,
     247             :      so this is unambiguous. */
     248             :   fd_acct_addr_t key;
     249             :   uint next;
     250             :   uint prev;
     251             : 
     252             :   union {
     253             :     struct {
     254             :       /* This is effectively a pointer to the last node in the DAG for
     255             :          this pubkey, one for each staging lane.
     256             :          EDGE_IS_LAST(FOLLOW_EDGE(base, last_reference[i])) is non-zero.
     257             :          */
     258             :       edge_t last_reference[4];
     259             : 
     260             :     }; /* When in the ACTIVE state */
     261             :     struct {
     262             :       uint free_ll_next;
     263             :       uint free_ll_prev;
     264             :       /* 8 bytes of padding here */
     265             :     }; /* When not in the ACTIVE state, used by the free_acct_dlist */
     266             :   };
     267             :   /* flags: a combination of ACCT_INFO_FLAG_* bitfields above.  Used
     268             :      when ACTIVE. */
     269             :   uint flags:8;
     270             : 
     271             : 
     272             :   /* We want to dispatch the READY transactions in an order that
     273             :      maximizes parallelism, but we also want to be able to start
     274             :      dispatching transactions decently well before we have the full
     275             :      conflict graph.  We can do that because we know that contentious
     276             :      accounts tend to stay contentious and uncontentious accounts tend
     277             :      to stay uncontentious.
     278             : 
     279             :      To accomplish this, we maintain a special EMA.  Let x_i be 1 if
     280             :      transaction i references it and 0 if not.  If we squint and assume
     281             :      the transactions are independent and all drawn from some
     282             :      distribution (which is not true, but that's why we squint), an EMA
     283             :      of x_i estimates the probability that the next transaction
     284             :      references this account.  How we use this value is detailed later.
     285             : 
     286             :      We can't update this value for every pubkey for each transaction,
     287             :      so we maintain it in a lazy way, by applying updates only when we
     288             :      need to read the value, which also happens to be every time we want
     289             :      to add a 1 value to the EMA.  We then just need to maintain the
     290             :      last index i at which x_i was updated and the current value.  We
     291             :      only have 24 bits for the index, which means that we can't maintain
     292             :      it in a fork-aware way, which doesn't seem like a problem.  Also,
     293             :      it's possible it can overflow, and that can result in an incorrect
     294             :      value, but that means the account is only referenced ~ 1/2^24
     295             :      transactions, which is also fine.
     296             : 
     297             :      last_ref is in the domain of global_inserted_txn_cnt.  ema_refs is
     298             :      in [0, 1].  Both fields are used in ACTIVE and CACHED.  Maintaining
     299             :      these fields is actually the main reason CACHED exists. */
     300             :   uint   last_ref:24;
     301             :   float  ema_refs;
     302             : };
     303             : typedef struct acct_info acct_info_t;
     304             : 
     305             : FD_STATIC_ASSERT( sizeof(acct_info_t)==64UL, acct_info_t );
     306             : 
     307             : /* For the acct_map and the free_acct_map */
     308             : #define MAP_NAME          acct_map
     309     4432641 : #define MAP_ELE_T         acct_info_t
     310   280755837 : #define MAP_IDX_T         uint
     311             : #define MAP_KEY_T         fd_acct_addr_t
     312   532245834 : #define MAP_KEY_HASH(k,s) fd_hash( (s), (k)->b, 32UL )
     313   275106585 : #define MAP_KEY_EQ(k0,k1) (!memcmp( (k0)->b, (k1)->b, 32UL ))
     314             : #define MAP_OPTIMIZE_RANDOM_ACCESS_REMOVAL 1
     315             : #include "../../util/tmpl/fd_map_chain.c"
     316             : 
     317             : 
     318             : #define DLIST_IDX_T uint
     319     4491588 : #define DLIST_PREV  free_ll_prev
     320     4491588 : #define DLIST_NEXT  free_ll_next
     321             : #define DLIST_NAME  free_dlist
     322             : #define DLIST_ELE_T acct_info_t
     323             : #include "../../util/tmpl/fd_dlist.c"
     324             : 
     325             : 
     326             : struct pending_prq_ele {
     327             :   /* lower score means should be scheduled sooner.  Integer part has a
     328             :      special meaning as explained above. */
     329             :   float score;
     330             : 
     331             :   uint linear_block_number;
     332             :   uint txn_idx;
     333             : 
     334             :   /* It seems like we should be able to get this whole struct in 8
     335             :      bytes, but we're a few bits over.
     336             : 
     337             :      23 bits for txn_idx
     338             :      16 bit for linear_block_number
     339             :      Then the integer part of the score needs to be at least 18 or 19
     340             :      bits, but we can't use a custom floating point number. */
     341             : };
     342             : 
     343             : typedef struct pending_prq_ele pending_prq_ele_t;
     344             : 
     345             : 
     346             : #define PRQ_NAME pending_prq
     347     2197725 : #define PRQ_T    pending_prq_ele_t
     348             : #define PRQ_EXPLICIT_TIMEOUT 0
     349             : /* returns 1 if x is strictly after y */
     350     1045926 : #define PRQ_AFTER(x,y) (__extension__( {                                            \
     351     1045926 :             int cmp0 = (int)(x).linear_block_number - (int)(y).linear_block_number; \
     352     1045926 :             fd_int_if( cmp0!=0, cmp0>0, (x).score>(y).score );                      \
     353     1045926 :             }))
     354             : #include "../../util/tmpl/fd_prq.c"
     355             : 
     356             : /* fd_rdisp_blockinfo_t maintains a little metadata about transactions for each
     357             :    slot.  */
     358             : struct fd_rdisp_blockinfo {
     359             :   FD_RDISP_BLOCK_TAG_T block;
     360             :   uint  linear_block_number;
     361             : 
     362             :   uint  insert_ready:1;
     363             :   uint  schedule_ready:1;
     364             :   uint  staged:1;
     365             :   uint  staging_lane:2; /* ignored if staged==0 */
     366             :   uint  last_insert_was_serializing:1;
     367             : 
     368             :   uint inserted_cnt;
     369             :   uint dispatched_cnt;
     370             :   uint completed_cnt;
     371             :   uint last_serializing;
     372             : 
     373             :   uint map_chain_next;
     374             :   uint ll_next;
     375             :   unstaged_txn_ll_t ll[ 1 ]; /* used only when unstaged */
     376             : };
     377             : typedef struct fd_rdisp_blockinfo fd_rdisp_blockinfo_t;
     378             : 
     379             : #define POOL_NAME     block_pool
     380          12 : #define POOL_T        fd_rdisp_blockinfo_t
     381             : #define POOL_IDX_T    uint
     382       18135 : #define POOL_NEXT     ll_next
     383             : #define POOL_SENTINEL 1
     384             : #include "../../util/tmpl/fd_pool.c"
     385             : 
     386             : #define MAP_NAME  block_map
     387             : #define MAP_ELE_T fd_rdisp_blockinfo_t
     388             : #define MAP_KEY_T FD_RDISP_BLOCK_TAG_T
     389        9033 : #define MAP_KEY   block
     390     1147191 : #define MAP_NEXT  map_chain_next
     391     4637484 : #define MAP_IDX_T uint
     392             : #include "../../util/tmpl/fd_map_chain.c"
     393             : 
     394             : #define SLIST_NAME  block_slist
     395             : #define SLIST_ELE_T fd_rdisp_blockinfo_t
     396        9033 : #define SLIST_IDX_T uint
     397       18069 : #define SLIST_NEXT  ll_next
     398             : #include "../../util/tmpl/fd_slist.c"
     399             : 
     400             : struct fd_rdisp_unstaged {
     401             :   FD_RDISP_BLOCK_TAG_T block;
     402             : 
     403             :   uint writable_cnt;
     404             :   uint readonly_cnt;
     405             :   fd_acct_addr_t keys[MAX_ACCT_PER_TXN];
     406             : };
     407             : typedef struct fd_rdisp_unstaged fd_rdisp_unstaged_t;
     408             : 
     409             : typedef struct {
     410             :   pending_prq_ele_t * pending;
     411             :   ulong               linear_block_number;
     412             :   block_slist_t       block_ll[1];
     413             :   ulong               inserted_cnt;
     414             :   ulong               dispatched_cnt;
     415             :   ulong               completed_cnt;
     416             : } per_lane_info_t;
     417             : 
     418             : /* We maintain two maps from pubkeys to acct_info_t.  The first one is
     419             :    the main acct_map, just called acct_map.  All pubkeys in this map
     420             :    have >0 references in the one of the staging lane DAGs.  When an
     421             :    account goes to 0 references, it gets removed from main map_chain and
     422             :    moved to free map_chain, called free_acct_map.  The free_acct_map
     423             :    exists to maintain the reference count EMA information lazily.
     424             :    Unless we need the acct_info_t for something in the DAG, we might as
     425             :    well maintain the EMA info.
     426             : 
     427             :    When we start up, all the acct_info_t structs are in the
     428             :    free_acct_dlist.  Whenever something is added to the free_acct_map,
     429             :    it's also added to the tail of the free_acct_dlist.  When we need an
     430             :    acct_info_t that's not in the free_acct_map, we pop the head of the
     431             :    free_acct_dlist.  In general, the free_acct_dlist contains everything
     432             :    in the free_acct_map, potentially plus some elements that have never
     433             :    been used; all acct_info_t objects are in exactly one of the main
     434             :    acct_map and the free_acct_dlist (not free_acct_map).  See
     435             :    acct_info_t for more information about this. */
     436             : 
     437             : struct fd_rdisp {
     438             :   ulong depth;
     439             :   ulong block_depth;
     440             : 
     441             :   ulong global_insert_cnt;
     442             :   ulong unstaged_lblk_num;
     443             : 
     444             :   /* pool: an fd_pool, indexed [0, depth+1), with 0 being a sentinel */
     445             :   fd_rdisp_txn_t       * pool;
     446             :   fd_rdisp_unstaged_t  * unstaged; /* parallel to pool with additional info */
     447             : 
     448             :   block_map_t          * blockmap; /* map chain */
     449             :   fd_rdisp_blockinfo_t * block_pool;
     450             : 
     451             :   int free_lanes; /* a bitmask */
     452             :   per_lane_info_t lanes[4];
     453             : 
     454             :   acct_map_t   * acct_map;
     455             :   acct_map_t   * free_acct_map;
     456             :   /* acct_pool is not an fd_pool, but is just a flat array, since we
     457             :      don't need to acquire and release from it because of the dlist. */
     458             :   acct_info_t  * acct_pool;
     459             :   free_dlist_t   free_acct_dlist[1];
     460             : };
     461             : 
     462             : typedef struct fd_rdisp fd_rdisp_t;
     463             : 
     464             : 
     465             :  #define ACCT_ITER_TO_PTR( iter ) (__extension__( {                                             \
     466             :        ulong __idx = fd_txn_acct_iter_idx( iter );                                              \
     467             :        fd_ptr_if( __idx<fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_IMM ), accts, alt_adj )+__idx; \
     468             :        }))
     469             : 
     470             : 
     471          78 : ulong fd_rdisp_align( void ) { return 128UL; }
     472             : 
     473             : ulong
     474             : fd_rdisp_footprint( ulong depth,
     475           6 :                     ulong block_depth ) {
     476           6 :   if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH) | (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) ) ) return 0UL;
     477             : 
     478           6 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     479           6 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     480           6 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     481             : 
     482           6 :   ulong l = FD_LAYOUT_INIT;
     483           6 :   l = FD_LAYOUT_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     484           6 :   l = FD_LAYOUT_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) ); /* pool       */
     485           6 :   l = FD_LAYOUT_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) ); /* unstaged   */
     486           6 :   l = FD_LAYOUT_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) ); /* blockmap   */
     487           6 :   l = FD_LAYOUT_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) ); /* block_pool */
     488           6 :   l = FD_LAYOUT_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) ); /* pending    */
     489           6 :   l = FD_LAYOUT_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) ); /* acct_map   */
     490           6 :   l = FD_LAYOUT_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) ); /* free_acct_map */
     491           6 :   l = FD_LAYOUT_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            ); /* acct_pool  */
     492           6 :   return FD_LAYOUT_FINI( l, fd_rdisp_align() );
     493           6 : }
     494             : 
     495             : void *
     496             : fd_rdisp_new( void * mem,
     497             :               ulong  depth,
     498             :               ulong  block_depth,
     499           6 :               ulong  seed ) {
     500           6 :   if( FD_UNLIKELY( (depth>FD_RDISP_MAX_DEPTH) | (block_depth>FD_RDISP_MAX_BLOCK_DEPTH) ) ) return NULL;
     501             : 
     502           6 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     503           6 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     504           6 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     505             : 
     506           6 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     507           6 :   fd_rdisp_t * disp   = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     508           6 :   void  * _pool       = FD_SCRATCH_ALLOC_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) );
     509           6 :   void  * _unstaged   = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) );
     510           6 :   void  * _bmap       = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) );
     511           6 :   void  * _bpool      = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) );
     512           6 :   uchar * _pending    = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) );
     513           6 :   void  * _acct_map   = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     514           6 :   void  * _freea_map  = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     515           6 :   acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            );
     516           6 :   FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
     517             : 
     518           6 :   disp->depth             = depth;
     519           6 :   disp->block_depth       = block_depth;
     520           6 :   disp->global_insert_cnt = 0UL;
     521           6 :   disp->unstaged_lblk_num = 0UL;
     522             : 
     523           6 :   pool_new( _pool, depth+1UL );
     524           6 :   memset( _unstaged, '\0', sizeof(fd_rdisp_unstaged_t)*(depth+1UL) );
     525             : 
     526           6 :   block_map_new ( _bmap,  chain_cnt, seed );
     527           6 :   block_pool_new( _bpool, block_depth+1UL );
     528             : 
     529           6 :   disp->free_lanes = 0xF;
     530          30 :   for( ulong i=0UL; i<4UL; i++ ) {
     531          24 :     pending_prq_new( _pending, depth );
     532          24 :     _pending += pending_prq_footprint( depth );
     533             : 
     534          24 :     disp->lanes[i].linear_block_number = 0UL;
     535          24 :     disp->lanes[i].inserted_cnt        = 0U;
     536          24 :     disp->lanes[i].dispatched_cnt      = 0U;
     537          24 :     disp->lanes[i].completed_cnt       = 0U;
     538          24 :   }
     539             : 
     540           6 :   acct_map_new( _acct_map,  acct_chain_cnt, fd_ulong_hash( seed+1UL ) );
     541           6 :   acct_map_new( _freea_map, acct_chain_cnt, fd_ulong_hash( seed+2UL ) );
     542             : 
     543           6 :   free_dlist_t * temp_join = free_dlist_join( free_dlist_new( disp->free_acct_dlist ) );
     544       38406 :   for( ulong i=1UL; i<acct_depth+1UL; i++ ) {
     545       38400 :     apool[ i ].next = apool[ i ].prev = 0U;
     546       38400 :     free_dlist_idx_push_tail( disp->free_acct_dlist, i, apool );
     547       38400 :   }
     548           6 :   free_dlist_leave( temp_join );
     549             : 
     550           6 :   return disp;
     551           6 : }
     552             : 
     553             : fd_rdisp_t *
     554           6 : fd_rdisp_join( void * mem ) {
     555           6 :   fd_rdisp_t * disp = (fd_rdisp_t *)mem;
     556             : 
     557           6 :   ulong depth          = disp->depth;
     558           6 :   ulong block_depth    = disp->block_depth;
     559           6 :   ulong chain_cnt      = block_map_chain_cnt_est( block_depth );
     560           6 :   ulong acct_depth     = depth*MAX_ACCT_PER_TXN;
     561           6 :   ulong acct_chain_cnt = acct_map_chain_cnt_est( acct_depth );
     562             : 
     563           6 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     564           6 :   /*                 */ FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             sizeof(fd_rdisp_t)                              );
     565           6 :   void  * _pool       = FD_SCRATCH_ALLOC_APPEND( l, pool_align(),                 pool_footprint              ( depth+1UL       ) );
     566           6 :   void  * _unstaged   = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_rdisp_unstaged_t), sizeof(fd_rdisp_unstaged_t)*( depth+1UL       ) );
     567           6 :   void  * _bmap       = FD_SCRATCH_ALLOC_APPEND( l, block_map_align(),            block_map_footprint         ( chain_cnt       ) );
     568           6 :   void  * _bpool      = FD_SCRATCH_ALLOC_APPEND( l, block_pool_align(),           block_pool_footprint        ( block_depth+1UL ) );
     569           6 :   uchar * _pending    = FD_SCRATCH_ALLOC_APPEND( l, pending_prq_align(),          4UL*pending_prq_footprint   ( depth           ) );
     570           6 :   void  * _acct_map   = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     571           6 :   void  * _freea_map  = FD_SCRATCH_ALLOC_APPEND( l, acct_map_align(),             acct_map_footprint          ( acct_chain_cnt  ) );
     572           6 :   acct_info_t * apool = FD_SCRATCH_ALLOC_APPEND( l, alignof(acct_info_t),         (acct_depth+1UL)*sizeof(acct_info_t)            );
     573           6 :   FD_SCRATCH_ALLOC_FINI( l, fd_rdisp_align() );
     574             : 
     575           6 :   disp->pool       = pool_join( _pool );
     576           6 :   disp->unstaged   = (fd_rdisp_unstaged_t *)_unstaged;
     577           6 :   disp->blockmap   = block_map_join( _bmap );
     578           6 :   disp->block_pool = block_pool_join( _bpool );
     579             : 
     580          30 :   for( ulong i=0UL; i<4UL; i++ ) {
     581          24 :     disp->lanes[i].pending = pending_prq_join( _pending );
     582          24 :     _pending += pending_prq_footprint( depth );
     583          24 :   }
     584             : 
     585           6 :   disp->acct_map      = acct_map_join( _acct_map );
     586           6 :   disp->free_acct_map = acct_map_join( _freea_map );
     587           6 :   disp->acct_pool     = apool;
     588           6 :   free_dlist_join( disp->free_acct_dlist );
     589             : 
     590           6 :   return disp;
     591           6 : }
     592             : 
     593             : static inline void
     594             : free_lane( fd_rdisp_t * disp,
     595        9021 :            ulong        staging_lane ) {
     596        9021 :   disp->free_lanes |= 1<<staging_lane;
     597        9021 :   per_lane_info_t * l = disp->lanes+staging_lane;
     598        9021 :   FD_TEST( pending_prq_cnt( l->pending )==0UL );
     599        9021 :   l->linear_block_number = 0UL;
     600        9021 :   l->inserted_cnt   = 0UL;
     601        9021 :   l->dispatched_cnt = 0UL;
     602        9021 :   l->completed_cnt  = 0UL;
     603        9021 :   block_slist_delete( block_slist_leave( l->block_ll ) );
     604        9021 : }
     605             : 
     606             : static inline void
     607             : alloc_lane( fd_rdisp_t * disp,
     608        9024 :             ulong        staging_lane ) {
     609        9024 :   disp->free_lanes &= ~(1<<staging_lane);
     610        9024 :   block_slist_join( block_slist_new( disp->lanes[staging_lane].block_ll ) );
     611        9024 : }
     612             : 
     613             : ulong
     614             : fd_rdisp_suggest_staging_lane( fd_rdisp_t const *   disp,
     615             :                                FD_RDISP_BLOCK_TAG_T parent_block,
     616           0 :                                int                  duplicate ) {
     617             : 
     618             :   /* 1. If it's a duplicate, suggest FD_RDISP_UNSTAGED */
     619           0 :   if( FD_UNLIKELY( duplicate ) ) return FD_RDISP_UNSTAGED;
     620             : 
     621             :   /* 2. If parent is the last block in any existing staging lane, suggest
     622             :         that lane */
     623           0 :   fd_rdisp_blockinfo_t const * block_pool = disp->block_pool;
     624           0 :   fd_rdisp_blockinfo_t const * block = block_map_ele_query_const( disp->blockmap, &parent_block, NULL, block_pool );
     625           0 :   if( FD_LIKELY( block && block->insert_ready && block->staged ) ) return block->staging_lane;
     626             : 
     627             :   /* 3. If there is at least one free lane, suggest a free lane */
     628           0 :   if( FD_LIKELY( disp->free_lanes!=0 ) ) return (ulong)fd_uint_find_lsb( (uint)disp->free_lanes );
     629             : 
     630             :   /* 4. Else, suggest FD_RDISP_UNSTAGED */
     631           0 :   return FD_RDISP_UNSTAGED;
     632           0 : }
     633             : 
     634             : int
     635             : fd_rdisp_add_block( fd_rdisp_t          * disp,
     636             :                    FD_RDISP_BLOCK_TAG_T   new_block,
     637        9039 :                    ulong                  staging_lane ) {
     638        9039 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     639             : 
     640        9039 :   if( FD_UNLIKELY( !block_pool_free( block_pool )                                                             ) ) return -1;
     641        9039 :   if( FD_UNLIKELY(  ULONG_MAX!=block_map_idx_query_const( disp->blockmap, &new_block, ULONG_MAX, block_pool ) ) ) return -1;
     642        9033 :   fd_rdisp_blockinfo_t * block = block_pool_ele_acquire( block_pool );
     643        9033 :   block->block = new_block;
     644        9033 :   block_map_ele_insert( disp->blockmap, block, block_pool );
     645             : 
     646        9033 :   block->insert_ready = 1;
     647        9033 :   block->staged       = staging_lane!=FD_RDISP_UNSTAGED;
     648        9033 :   block->staging_lane = (uint)(staging_lane & 0x3UL);
     649        9033 :   block->last_insert_was_serializing = 0U;
     650             : 
     651        9033 :   block->inserted_cnt     = 0U;
     652        9033 :   block->dispatched_cnt   = 0U;
     653        9033 :   block->completed_cnt    = 0U;
     654        9033 :   block->last_serializing = 0U;
     655             : 
     656        9033 :   if( FD_UNLIKELY( staging_lane==FD_RDISP_UNSTAGED ) ) {
     657           3 :     block->schedule_ready = 1;
     658           3 :     block->linear_block_number = (uint)disp->unstaged_lblk_num++;
     659           3 :     unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
     660        9030 :   } else {
     661        9030 :     block->linear_block_number = (uint)disp->lanes[staging_lane].linear_block_number++;
     662        9030 :     block->schedule_ready      = (uint)(1 & (disp->free_lanes >> staging_lane));
     663             : 
     664        9030 :     if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
     665             : 
     666        9030 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     667        9030 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) )  block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
     668        9030 :     block_slist_ele_push_tail( sl, block, block_pool );
     669        9030 :   }
     670        9033 :   return 0;
     671        9039 : }
     672             : 
     673             : 
     674             : 
     675             : int
     676             : fd_rdisp_remove_block( fd_rdisp_t          * disp,
     677          30 :                        FD_RDISP_BLOCK_TAG_T   block_tag ) {
     678          30 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     679             : 
     680          30 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     681          30 :   if( FD_UNLIKELY( block==NULL ) ) return -1;
     682             : 
     683          27 :   FD_TEST( block->schedule_ready );
     684          27 :   FD_TEST( block->completed_cnt==block->inserted_cnt );
     685             : 
     686          27 :   if( FD_LIKELY( block->staged ) ) {
     687          27 :     ulong staging_lane = (ulong)block->staging_lane;
     688          27 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     689             : 
     690          27 :     FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
     691          27 :     block_slist_idx_pop_head( sl, block_pool );
     692          27 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     693          18 :     else                                                       free_lane( disp, staging_lane );
     694          27 :   } else {
     695           0 :     unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     696           0 :   }
     697          27 :   block_pool_idx_release( block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, block_pool ) );
     698             : 
     699          27 :   return 0;
     700          27 : }
     701             : 
     702             : 
     703             : int
     704             : fd_rdisp_abandon_block( fd_rdisp_t          * disp,
     705        9006 :                         FD_RDISP_BLOCK_TAG_T   block_tag ) {
     706        9006 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     707             : 
     708        9006 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, disp->block_pool );
     709        9006 :   if( FD_UNLIKELY( block==NULL ) ) return -1;
     710             : 
     711        9003 :   FD_TEST( block->schedule_ready );
     712        9003 :   FD_TEST( block->dispatched_cnt==block->completed_cnt ); /* TODO: remove this when it can call complete properly */
     713        9012 :   while( block->completed_cnt<block->inserted_cnt ) {
     714             :     /* because there is nothing DISPATCHED, there has to be something
     715             :        READY */
     716           9 :     ulong txn = fd_rdisp_get_next_ready( disp, block_tag );
     717           9 :     FD_TEST( txn );
     718           9 :     fd_rdisp_complete_txn( disp, txn );
     719           9 :   }
     720             : 
     721        9003 :   if( FD_LIKELY( block->staged ) ) {
     722        9003 :     ulong staging_lane = (ulong)block->staging_lane;
     723        9003 :     block_slist_t * sl = disp->lanes[staging_lane].block_ll;
     724             : 
     725        9003 :     FD_TEST( block==block_slist_ele_peek_head( sl, block_pool ) );
     726        9003 :     block_slist_idx_pop_head( sl, block_pool );
     727        9003 :     if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     728        9000 :     else                                                       free_lane( disp, staging_lane );
     729        9003 :   } else {
     730           0 :     unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     731           0 :   }
     732        9003 :   block_pool_idx_release( disp->block_pool, block_map_idx_remove( disp->blockmap, &block_tag, ULONG_MAX, disp->block_pool ) );
     733             : 
     734        9003 :   return 0;
     735        9003 : }
     736             : 
     737             : static void
     738             : add_edges( fd_rdisp_t           * disp,
     739             :            fd_rdisp_txn_t       * ele,
     740             :            fd_acct_addr_t const * addr,
     741             :            ulong                  addr_cnt,
     742             :            uint                   staging_lane,
     743             :            int                    writable,
     744             :            int                    update_score );
     745             : /* updates in_degree, edge_cnt_etc */
     746             : 
     747             : int
     748             : fd_rdisp_promote_block( fd_rdisp_t *          disp,
     749             :                         FD_RDISP_BLOCK_TAG_T  block_tag,
     750           6 :                         ulong                 staging_lane ) {
     751           6 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     752           6 :   per_lane_info_t * lane = disp->lanes + staging_lane;
     753           6 :   block_slist_t * sl = lane->block_ll;
     754             : 
     755           6 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     756           6 :   if( FD_UNLIKELY( block==NULL   ) ) return -1;
     757           6 :   if( FD_UNLIKELY( block->staged ) ) return -1;
     758             : 
     759           6 :   block->staged = 1;
     760           6 :   block->staging_lane = (uint)(staging_lane & 0x3);
     761           6 :   block->insert_ready = 1;
     762           6 :   block->schedule_ready = (uint)(1 & (disp->free_lanes >> staging_lane));
     763             : 
     764           6 :   if( FD_LIKELY( disp->free_lanes & (1<<staging_lane) ) ) alloc_lane( disp, staging_lane );
     765             : 
     766           6 :   if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) )  block_slist_ele_peek_tail( sl, block_pool )->insert_ready = 0;
     767           6 :   block_slist_ele_push_tail( sl, block, block_pool );
     768           6 :   uint linear_block_number = (uint)lane->linear_block_number++;
     769           6 :   block->linear_block_number = linear_block_number;
     770             : 
     771           6 :   unstaged_txn_ll_iter_t next;
     772           6 :   for( unstaged_txn_ll_iter_t iter = unstaged_txn_ll_iter_init( block->ll, disp->pool );
     773          24 :       !unstaged_txn_ll_iter_done( iter, block->ll, disp->pool );
     774          18 :       iter = next ) {
     775          18 :       next = unstaged_txn_ll_iter_next( iter, block->ll, disp->pool );
     776             : 
     777          18 :     fd_rdisp_txn_t      * ele = unstaged_txn_ll_iter_ele( iter, block->ll, disp->pool );
     778          18 :     fd_rdisp_unstaged_t * uns = disp->unstaged + unstaged_txn_ll_iter_idx( iter, block->ll, disp->pool );
     779          18 :     FD_TEST( ele->in_degree==UINT_MAX );
     780             : 
     781          18 :     ele->in_degree    = 0U;
     782          18 :     ele->edge_cnt_etc = 0U;
     783             : 
     784          18 :     add_edges( disp, ele, uns->keys,                   uns->writable_cnt, (uint)staging_lane, 1, 0 );
     785          18 :     add_edges( disp, ele, uns->keys+uns->writable_cnt, uns->readonly_cnt, (uint)staging_lane, 0, 0 );
     786             : 
     787          18 :     ele->edge_cnt_etc |= (uint)staging_lane<<14;
     788          18 :     ele->edge_cnt_etc |= linear_block_number<<16;
     789             : 
     790          18 :     if( FD_UNLIKELY( ele->in_degree==0U ) ) {
     791           3 :       pending_prq_ele_t temp[1] = {{ .score = ele->score, .linear_block_number = linear_block_number, .txn_idx = (uint)(ele-disp->pool)}};
     792           3 :       pending_prq_insert( lane->pending, temp );
     793           3 :     }
     794          18 :   }
     795           6 :   unstaged_txn_ll_delete( unstaged_txn_ll_leave( block->ll ) );
     796             : 
     797           6 :   lane->inserted_cnt   += block->inserted_cnt;
     798           6 :   lane->dispatched_cnt += block->dispatched_cnt;
     799           6 :   lane->completed_cnt  += block->completed_cnt;
     800             : 
     801           6 :   return 0;
     802           6 : }
     803             : 
     804             : int
     805             : fd_rdisp_demote_block( fd_rdisp_t *          disp,
     806           3 :                        FD_RDISP_BLOCK_TAG_T  block_tag ) {
     807           3 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     808             : 
     809           3 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &block_tag, NULL, block_pool );
     810           3 :   if( FD_UNLIKELY(  block==NULL           ) ) return -1;
     811           3 :   if( FD_UNLIKELY( !block->staged         ) ) return -1;
     812           3 :   if( FD_UNLIKELY( !block->schedule_ready ) ) return -1;
     813           3 :   if( FD_UNLIKELY(  block->completed_cnt!=block->inserted_cnt ) ) FD_LOG_ERR(( "demote_block called with non-empty block" ));
     814           3 :   ulong staging_lane = block->staging_lane;
     815           3 :   block->staged = 0;
     816             : 
     817           3 :   per_lane_info_t * lane = disp->lanes + staging_lane;
     818           3 :   block_slist_t * sl = lane->block_ll;
     819             : 
     820           3 :   lane->inserted_cnt   -= block->inserted_cnt;
     821           3 :   lane->dispatched_cnt -= block->dispatched_cnt;
     822           3 :   lane->completed_cnt  -= block->completed_cnt;
     823             : 
     824           3 :   block->linear_block_number = (uint)disp->unstaged_lblk_num++;
     825             : 
     826             :   /* staged and schedule_ready means it must be the head of the staging lane */
     827           3 :   FD_TEST( block_slist_ele_peek_head( sl, block_pool )==block );
     828           3 :   block_slist_idx_pop_head( sl, block_pool );
     829             : 
     830           3 :   unstaged_txn_ll_join( unstaged_txn_ll_new( block->ll ) );
     831             : 
     832           3 :   if( FD_LIKELY( !block_slist_is_empty( sl, block_pool ) ) ) block_slist_ele_peek_head( sl, block_pool )->schedule_ready = 1;
     833           3 :   else                                                       free_lane( disp, staging_lane );
     834           3 :   return 0;
     835           3 : }
     836             : 
     837             : int
     838             : fd_rdisp_rekey_block( fd_rdisp_t *           disp,
     839             :                       FD_RDISP_BLOCK_TAG_T   new_tag,
     840           0 :                       FD_RDISP_BLOCK_TAG_T   old_tag ) {
     841           0 :   fd_rdisp_blockinfo_t * block_pool = disp->block_pool;
     842             : 
     843           0 :   if( FD_UNLIKELY(        NULL!= block_map_ele_query_const( disp->blockmap, &new_tag, NULL, block_pool ) ) ) return -1;
     844           0 :   fd_rdisp_blockinfo_t * block = block_map_ele_query      ( disp->blockmap, &old_tag, NULL, block_pool );
     845           0 :   if( FD_UNLIKELY(        NULL== block ) )                                                                   return -1;
     846             : 
     847           0 :   block->block = new_tag;
     848           0 :   block_map_ele_insert( disp->blockmap, block, block_pool );
     849           0 :   return 0;
     850           0 : }
     851             : 
     852             : 
     853             : /* "Registers" a reference to the account in info at transaction
     854             :    global_insert_cnt.  Returns the value of the EMA, which is an
     855             :    estimate of the probability that the next transaction also references
     856             :    the account.  This value does not matter for correctness, which is
     857             :    why floating point arithmetic is okay. */
     858             : static inline float
     859             : update_ema( acct_info_t * info,
     860     6692466 :             ulong         global_insert_cnt ) {
     861             : #if FD_RDISP_DISABLE_EMA
     862             :   (void)info;
     863             :   (void)global_insert_cnt;
     864             :   return 0.0f;
     865             : #else
     866    13384932 : #define ALPHA 0.005f
     867             :   /* The normal EMA update equation is
     868             :                 e_i = (alpha) * x_i + (1-alpha)*e_{i-1},
     869             :      where alpha is a constant in (0,1).  Let L be the last reference of
     870             :      the account, and G be the current global_insert_cnt value.  We know
     871             :      that e_L = ema_refs, and that for i in [L+1, G), x_i = 0.
     872             :      That means
     873             :                e_{G-1} =         (1-alpha)^(G-L-1) * ema_refs
     874             :                e_G     = alpha + (1-alpha)^(G-L)   * ema_refs
     875             :    */
     876             :   /* last_ref only captures the low 24 bits, so we guess its the highest
     877             :      value for them that would still make it less than
     878             :      global_insert_cnt.  Turns out, we can calculate that with just an
     879             :      AND. */
     880     6692466 :   ulong last_ref = (ulong)info->last_ref;
     881     6692466 :   ulong delta = (global_insert_cnt - last_ref) & 0xFFFFFFUL;
     882     6692466 :   float ema_refs = ALPHA + powf( 1.0f-ALPHA, (float)delta ) * info->ema_refs;
     883             : 
     884     6692466 :   info->ema_refs = ema_refs;
     885     6692466 :   info->last_ref = (uint)(global_insert_cnt & 0xFFFFFFUL);
     886     6692466 : #undef ALPHA
     887             : 
     888     6692466 :   return ema_refs;
     889     6692466 : #endif
     890     6692466 : }
     891             : 
     892             : static void
     893             : add_edges( fd_rdisp_t           * disp,
     894             :            fd_rdisp_txn_t       * ele,
     895             :            fd_acct_addr_t const * addrs,
     896             :            ulong                  addr_cnt,
     897             :            uint                   lane,
     898             :            int                    writable,
     899     3487512 :            int                    update_score ) {
     900             : 
     901     3487512 :   ulong acct_idx = (ele->edge_cnt_etc & 0x7FU) +     ((ele->edge_cnt_etc>>7) & 0x7FU);
     902     3487512 :   ulong edge_idx = (ele->edge_cnt_etc & 0x7FU) + 3UL*((ele->edge_cnt_etc>>7) & 0x7FU);
     903             : 
     904    10179903 :   for( ulong i=0UL; i<addr_cnt; i++ ) {
     905     6692391 :     fd_acct_addr_t const * addr = addrs+i;
     906     6692391 :     acct_info_t * ai = NULL;
     907             : 
     908             :     /* Step 1: lookup the pubkey */
     909     6692391 :     ulong idx = acct_map_idx_query( disp->acct_map, addr, ULONG_MAX, disp->acct_pool );
     910     6692391 :     if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
     911     2226594 :       idx = acct_map_idx_query( disp->free_acct_map, addr, ULONG_MAX, disp->acct_pool );
     912     2226594 :       if( FD_UNLIKELY( idx==ULONG_MAX ) ) {
     913             :         /* The acct pool is sized so that the list cannot be empty at this
     914             :            point.  However, the element at the head might be the free
     915             :            map with a different pubkey. */
     916      197904 :         idx = free_dlist_idx_peek_head( disp->free_acct_dlist, disp->acct_pool );
     917      197904 :         ai = disp->acct_pool+idx;
     918             : 
     919             :         /* CACHED -> FREE transition */
     920      197904 :         if( FD_LIKELY( ai->next!=0U ) ) {
     921      177357 :           acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
     922      177357 :         }
     923             : 
     924             :         /* FREE -> ACTIVE transition */
     925      197904 :         ai->key      = *addr;
     926      197904 :         ai->flags    = 0U;
     927      197904 :         ai->last_ref = 0U;
     928      197904 :         ai->ema_refs = 0.0f;
     929     2028690 :       } else {
     930             :         /* CACHED -> ACTIVE transition */
     931     2028690 :         ai = disp->acct_pool+idx;
     932     2028690 :         ai->flags    = 0U; /* FIXME: unnecessary */
     933     2028690 :         acct_map_idx_remove_fast( disp->free_acct_map, idx, disp->acct_pool );
     934     2028690 :       }
     935             :       /* In either case, at this point, the element is not in any map
     936             :          but is in free_acct_dlist.  It has the right key. last_ref, and
     937             :          ema_refs are valid. flags is 0. */
     938     2226594 :       free_dlist_idx_remove( disp->free_acct_dlist, idx, disp->acct_pool );
     939     2226594 :       memset( ai->last_reference, '\0', sizeof(ai->last_reference) );
     940     2226594 :       acct_map_idx_insert( disp->acct_map, idx, disp->acct_pool );
     941     2226594 :     }
     942     6692391 :     ai = disp->acct_pool+idx;
     943             :     /* At this point, in all cases, the acct_info is now in the ACTIVE
     944             :        state.  It's in acct_map, not in free_acct_map, and not in
     945             :        free_acct_dlist. */
     946             : 
     947             :     /* Assume that transactions are drawn randomly from some large
     948             :        distribution of potential transactions.  We want to estimate the
     949             :        expected value of the probability that this transaction conflicts
     950             :        with the next transaction that is sampled.  Two transactions
     951             :        conflict if they conflict on any account, and in general, they
     952             :        conflict if they both reference the same account, unless both
     953             :        this transaction and the next one only read it.  We don't have
     954             :        read/write info, so the best guess we have is that the next
     955             :        transaction does the same thing to the account that this one
     956             :        does.  That means we only really care about the accounts that
     957             :        this transaction writes to.  Label those a_1, a_2, ..., a_i, and
     958             :        suppose the probability that the next transaction references a_i
     959             :        is p_i (determined using the EMA).  Now then, assuming accounts
     960             :        are independent (which is false, but whatever), then the
     961             :        probability that the next transaction does not conflict with this
     962             :        account is:
     963             :                      (1-p_1) * (1-p_2) * (1 - p_i).
     964             : 
     965             :        Since for the treap, a lower value means we'll schedule it
     966             :        earlier, we'll use the probability of non-conflict as the
     967             :        fractional part of the score. */
     968     6692391 :     float score_change = 1.0f - update_ema( ai, disp->global_insert_cnt );
     969     6692391 :     ele->score *= fd_float_if( writable & update_score, score_change, 1.0f );
     970             : 
     971             :     /* Step 2: add edge. There are 4 cases depending on whether this is
     972             :        a writer or not and whether the previous reference was a writer
     973             :        or not. */
     974     6692391 :     int      _ignore;
     975             : 
     976     6692391 :     edge_t   ref_to_pa = ai->last_reference[ lane ];
     977     6692391 :     edge_t   ref_to_me = (uint)((ulong)((ele - disp->pool)<<8) | acct_idx);
     978     6692391 :     edge_t * pa        = FOLLOW_EDGE( disp->pool, ai->last_reference[ lane ], _ignore );
     979     6692391 :     edge_t * me        = ele->edges + edge_idx;
     980             : 
     981             :     /* In the case that this is the first txn in the DAG, pa will point
     982             :        to edges[0] of the sentinel element, pool[0].  We don't care
     983             :        about what is stored there, so just set it up as a dummy element
     984             :        to make the rest of the code work properly in this case too.  If
     985             :        this is a read, we want me->sibli to ref_to_me in case 4; if this
     986             :        is a write, we want to set me->sibli to 0 in case 2, but we need
     987             :        to make sure pb==pa. */
     988     6692391 :     disp->pool->edges[0] = (1U<<31) | (uint)idx;
     989     6692391 :     disp->pool->edges[1] = fd_uint_if( writable, 0U, ref_to_me );
     990     6692391 :     disp->pool->edges[2] = fd_uint_if( writable, 0U, ref_to_me );
     991             : 
     992     6692391 :     int flags = ai->flags;
     993             : 
     994     6692391 :     if( writable ) { /* also should be known at compile time */
     995     2738880 :       if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
     996             :         /* Case 1: w-w. The parent is the special last pointer.  Point
     997             :            the parent to me, and set me to the last pointer. */
     998     1117680 :         *me = *pa;
     999     1117680 :         *pa = ref_to_me;
    1000     1621200 :       } else {
    1001             :         /* Case 2: r-w. This is the tricky case because there could be
    1002             :            multiple readers.  We need to set all the last readers' child
    1003             :            pointers to me. */
    1004     1621200 :         *me = *pa;
    1005     1621200 :         *pa = ref_to_me;
    1006     1621200 :         edge_t * pb = FOLLOW_EDGE( disp->pool, pa[1], _ignore );
    1007             :         /* Intentionally skip the first in_degree increment, because it
    1008             :            will be done later */
    1009     2229093 :         while( pb!=pa ) {
    1010      607893 :           *pb = ref_to_me;
    1011      607893 :           ele->in_degree++;
    1012      607893 :           pb = FOLLOW_EDGE( disp->pool, pb[1], _ignore );
    1013      607893 :         }
    1014     1621200 :         flags |= ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) | ACCT_INFO_FLAG_ANY_WRITERS( lane );
    1015     1621200 :       }
    1016     3953511 :     } else {
    1017     3953511 :       if( flags & ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ) ) { /* unclear prob */
    1018             :         /* Case 3: w-r. Similar to case 1, but need to initialize my
    1019             :            next and prev sibling pointers too. */
    1020      507762 :         *me = *pa;
    1021      507762 :         *pa = ref_to_me;
    1022      507762 :         me[1] = ref_to_me; /* next */
    1023      507762 :         me[2] = ref_to_me; /* prev */
    1024      507762 :         flags &= ~(int)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ); /* clear bit */
    1025     3445749 :       } else {
    1026             :         /* Case 4: r-r. Add myself as a sibling instead of a child */
    1027     3445749 :         *me = *pa;
    1028     3445749 :         FOLLOW_EDGE( disp->pool, pa[1], _ignore )[2] = ref_to_me;  /* prev->next->prev = me   */
    1029     3445749 :         me[1] = pa[1];                                             /* me->next   = prev->next */
    1030     3445749 :         me[2] = ref_to_pa;                                         /* me->prev   = prev       */
    1031     3445749 :         pa[1] = ref_to_me;                                         /* prev->next = me         */
    1032     3445749 :       }
    1033     3953511 :     }
    1034             : 
    1035             :     /* Step 3: Update the final values */
    1036             :     /* In general, we want to increment the in_degree unless this
    1037             :        transaction is the first to reference this account.  The
    1038             :        exception is that if this account has only readers, including
    1039             :        this transaction, we don't want to increment the in_degree
    1040             :        either.  At this point, we can tell if that is the case based on
    1041             :        ANY_WRITERS.  */
    1042     6692391 :     ele->in_degree += (uint)((ai->last_reference[ lane ]!=0U) & !!(flags & ACCT_INFO_FLAG_ANY_WRITERS( lane )));
    1043     6692391 :     ai->last_reference[ lane ] = ref_to_me;
    1044     6692391 :     ai->flags                  = (uchar)flags;
    1045     6692391 :     edge_idx += fd_uint_if( writable, 1U, 3U );
    1046     6692391 :     acct_idx++;
    1047     6692391 :   }
    1048     3487512 :   ele->edge_cnt_etc += (uint)addr_cnt<<fd_int_if( writable, 0, 7 ); /* Can't overflow by construction */
    1049     3487512 : }
    1050             : 
    1051             : 
    1052             : /* should be called with all writable accounts first */
    1053             : static void
    1054             : add_unstaged_edges( fd_rdisp_t * disp,
    1055             :                     fd_rdisp_txn_t       * ele,
    1056             :                     fd_rdisp_unstaged_t  * unstaged,
    1057             :                     fd_acct_addr_t const * addr,
    1058             :                     ulong                  addr_cnt,
    1059             :                     int                    writable,
    1060         216 :                     int                    update_score ) {
    1061         216 :   ulong base_idx = unstaged->writable_cnt+unstaged->readonly_cnt;
    1062         216 :   FD_TEST( !writable || unstaged->readonly_cnt==0U );
    1063         366 :   for( ulong i=0UL; i<addr_cnt; i++ ) {
    1064         150 :     unstaged->keys[ base_idx+i ] = addr[i];
    1065         150 :     if( FD_LIKELY( update_score ) ) {
    1066         150 :       ulong idx = acct_map_idx_query( disp->acct_map, addr+i, ULONG_MAX, disp->acct_pool );
    1067         150 :       if( FD_UNLIKELY( idx==ULONG_MAX ) ) idx = acct_map_idx_query( disp->free_acct_map, addr+i, ULONG_MAX, disp->acct_pool );
    1068             :       /* since these are unstaged, we don't bother moving accounts
    1069             :          around */
    1070         150 :       float score_change = 1.0f;
    1071         150 :       if( FD_LIKELY( idx!=ULONG_MAX ) ) score_change = 1.0f - update_ema( disp->acct_pool+idx, disp->global_insert_cnt );
    1072         150 :       ele->score *= fd_float_if( writable & update_score, score_change, 1.0f );
    1073         150 :     }
    1074         150 :   }
    1075         216 :   *(fd_ptr_if( writable, &(unstaged->writable_cnt), &(unstaged->readonly_cnt) ) ) += (uint)addr_cnt;
    1076         216 : }
    1077             : 
    1078             : ulong
    1079             : fd_rdisp_add_txn( fd_rdisp_t          *  disp,
    1080             :                   FD_RDISP_BLOCK_TAG_T   insert_block,
    1081             :                   fd_txn_t const       * txn,
    1082             :                   uchar const          * payload,
    1083             :                   fd_acct_addr_t const * alts,
    1084      581286 :                   int                    serializing ) {
    1085             : 
    1086      581286 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &insert_block, NULL, disp->block_pool );
    1087      581286 :   if( FD_UNLIKELY( !block || !block->insert_ready ) ) return 0UL;
    1088      581283 :   if( FD_UNLIKELY( !pool_free( disp->pool       ) ) ) return 0UL;
    1089             : 
    1090      581283 :   ulong idx = pool_idx_acquire( disp->pool );
    1091      581283 :   fd_rdisp_txn_t * rtxn = disp->pool + idx;
    1092             : 
    1093      581283 :   fd_acct_addr_t const * imm_addrs = fd_txn_get_acct_addrs( txn, payload );
    1094             : 
    1095      581283 :   if( FD_UNLIKELY( !block->staged ) ) {
    1096          36 :     rtxn->in_degree = UINT_MAX;
    1097          36 :     rtxn->score     = 0.999f;
    1098             : 
    1099          36 :     fd_rdisp_unstaged_t * unstaged = disp->unstaged + idx;
    1100          36 :     unstaged->block = insert_block;
    1101          36 :     unstaged->writable_cnt = 0U;
    1102          36 :     unstaged->readonly_cnt = 0U;
    1103             : 
    1104          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs,
    1105          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER        ), 1, 1 );
    1106          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
    1107          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), 1, 1 );
    1108          36 :     if( FD_LIKELY( alts ) )
    1109          36 :       add_unstaged_edges( disp, rtxn, unstaged, alts,
    1110          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),           1, 1 );
    1111          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
    1112          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER        ), 0, 1 );
    1113          36 :     add_unstaged_edges( disp, rtxn, unstaged, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
    1114          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), 0, 1 );
    1115          36 :     if( FD_LIKELY( alts ) )
    1116          36 :       add_unstaged_edges( disp, rtxn, unstaged, alts   +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
    1117          36 :                                                         fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ),           0, 1 );
    1118             : 
    1119          36 :     unstaged_txn_ll_ele_push_tail( block->ll, rtxn, disp->pool );
    1120      581247 :   } else {
    1121      581247 :     uint lane = block->staging_lane;
    1122             : 
    1123      581247 :     rtxn->in_degree    = 0U;
    1124      581247 :     rtxn->score        = 0.999f;
    1125      581247 :     rtxn->edge_cnt_etc = (block->linear_block_number<<16) | (lane<<14);
    1126             : 
    1127      581247 :     add_edges( disp, rtxn, imm_addrs,
    1128      581247 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER        ), lane, 1, 1 );
    1129      581247 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER ),
    1130      581247 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ), lane, 1, 1 );
    1131      581247 :     if( FD_LIKELY( alts ) )
    1132      581244 :       add_edges( disp, rtxn, alts,
    1133      581244 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),           lane, 1, 1 );
    1134      581247 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_SIGNER ),
    1135      581247 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_SIGNER        ), lane, 0, 1 );
    1136      581247 :     add_edges( disp, rtxn, imm_addrs+fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_SIGNER | FD_TXN_ACCT_CAT_WRITABLE_NONSIGNER_IMM ),
    1137      581247 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_NONSIGNER_IMM ), lane, 0, 1 );
    1138      581247 :     if( FD_LIKELY( alts ) )
    1139      581244 :       add_edges( disp, rtxn, alts   +fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_WRITABLE_ALT ),
    1140      581244 :                                      fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_READONLY_ALT ),           lane, 0, 1 );
    1141      581247 :   }
    1142             : 
    1143      581283 :   if( FD_UNLIKELY( serializing | block->last_insert_was_serializing ) ) {
    1144           6 :     block->last_serializing = block->inserted_cnt;
    1145           6 :   }
    1146      581283 :   block->last_insert_was_serializing = (uint)!!serializing;
    1147      581283 :   rtxn->score += (float)block->last_serializing;
    1148             : 
    1149      581283 :   block->inserted_cnt++;
    1150      581283 :   disp->global_insert_cnt++;
    1151             : 
    1152      581283 :   if( FD_LIKELY( (block->staged) & (rtxn->in_degree==0U) ) ) {
    1153      103434 :     pending_prq_ele_t temp[1] = {{ .score = rtxn->score, .linear_block_number = block->linear_block_number, .txn_idx = (uint)idx }};
    1154      103434 :     pending_prq_insert( disp->lanes[ block->staging_lane ].pending, temp );
    1155      103434 :   }
    1156             : 
    1157      581283 :   return idx;
    1158      581283 : }
    1159             : 
    1160             : ulong
    1161             : fd_rdisp_get_next_ready( fd_rdisp_t           * disp,
    1162     1145808 :                          FD_RDISP_BLOCK_TAG_T   schedule_block ) {
    1163     1145808 :   fd_rdisp_blockinfo_t * block   = block_map_ele_query( disp->blockmap, &schedule_block, NULL, disp->block_pool );
    1164     1145808 :   if( FD_UNLIKELY( !block || !block->schedule_ready ) ) return 0UL;
    1165             : 
    1166     1145793 :   ulong idx;
    1167     1145793 :   if( FD_LIKELY( block->staged ) ) {
    1168     1145766 :     ulong staging_lane = block->staging_lane;
    1169     1145766 :     per_lane_info_t * l = disp->lanes + staging_lane;
    1170             : 
    1171     1145766 :     if( FD_UNLIKELY( !pending_prq_cnt( l->pending )                                ) ) return 0UL;
    1172      581271 :     if( FD_UNLIKELY( l->pending->linear_block_number != block->linear_block_number ) ) return 0UL;
    1173             :     /* e.g. when completed_cnt==0, we can accept any score below 1.0 */
    1174      581265 :     if( FD_UNLIKELY( l->pending->score>=(float)(block->completed_cnt+1U)           ) ) return 0UL;
    1175      581265 :     idx = l->pending->txn_idx;
    1176      581265 :     pending_prq_remove_min( l->pending );
    1177      581265 :   } else {
    1178          27 :     if( FD_UNLIKELY( block->dispatched_cnt!=block->completed_cnt       ) ) return 0UL;
    1179          24 :     if( FD_UNLIKELY( unstaged_txn_ll_is_empty( block->ll, disp->pool ) ) ) return 0UL;
    1180          18 :     idx = unstaged_txn_ll_idx_peek_head( block->ll, disp->pool );
    1181          18 :   }
    1182      581283 :   block->dispatched_cnt++;
    1183             : 
    1184      581283 :   return idx;
    1185     1145793 : }
    1186             : 
    1187             : void
    1188             : fd_rdisp_complete_txn( fd_rdisp_t * disp,
    1189      581283 :                        ulong        txn_idx ) {
    1190             : 
    1191      581283 :   fd_rdisp_txn_t * rtxn = disp->pool + txn_idx;
    1192             : 
    1193      581283 :   if( FD_UNLIKELY( rtxn->in_degree==UINT_MAX ) ) {
    1194             :     /* Unstaged */
    1195          18 :     fd_rdisp_blockinfo_t * block = block_map_ele_query( disp->blockmap, &disp->unstaged[ txn_idx ].block, NULL, disp->block_pool );
    1196          18 :     FD_TEST( rtxn==unstaged_txn_ll_ele_peek_head( block->ll, disp->pool ) );
    1197          18 :     unstaged_txn_ll_ele_pop_head( block->ll, disp->pool );
    1198          18 :     block->completed_cnt++;
    1199      581265 :   } else {
    1200             :     /* Staged */
    1201      581265 :     ulong w_cnt = (rtxn->edge_cnt_etc    ) & 0x7FU;
    1202      581265 :     ulong r_cnt = (rtxn->edge_cnt_etc>> 7) & 0x7FU;
    1203      581265 :     ulong lane  = (rtxn->edge_cnt_etc>>14) & 0x3U;
    1204      581265 :     ulong edge_idx = 0UL;
    1205     7273656 :     for( ulong i=0UL; i<w_cnt+r_cnt; i++ ) {
    1206     6692391 :       edge_t const * e = rtxn->edges+edge_idx;
    1207     6692391 :       edge_t const  e0 = *e;
    1208     6692391 :       edge_t ref_to_me = (uint)((txn_idx<<8) | i);
    1209             : 
    1210             :       /* To help with explanations, consider the following DAG:
    1211             : 
    1212             :            --> B --\   --> E --\
    1213             :           /         V /         V
    1214             :          A           D         (G)
    1215             :           \         ^ \         ^
    1216             :            --> C --/   --> F --/     */
    1217             : 
    1218     6692391 :       if( FD_UNLIKELY( EDGE_IS_LAST( e0 ) ) ) {
    1219     3951159 :         ulong acct_idx = e0 & 0x7FFFFFFFU;
    1220     3951159 :         acct_info_t * ai = disp->acct_pool + acct_idx;
    1221             :         /* If this is a writer, e.g. node G above, we know it's the last
    1222             :            one, so we can clear last_reference.
    1223             :            If this is a reader, e.g. node E and F above, if node G
    1224             :            didn't exist, we need to check if it's the last one
    1225             :            (me==me->next).  If so, we can clear last_reference.  If not,
    1226             :            we need to delete this node from the linked list */
    1227     3951159 :         if( edge_idx<w_cnt || e[1]==ref_to_me ) ai->last_reference[ lane ] = 0U;
    1228     1724565 :         else {
    1229     1724565 :           int _ignore;
    1230     1724565 :           FOLLOW_EDGE( disp->pool, e[1], _ignore )[2] = e[2];  /* me->next->prev = me->prev */
    1231     1724565 :           FOLLOW_EDGE( disp->pool, e[2], _ignore )[1] = e[1];  /* me->prev->next = me->next */
    1232     1724565 :           ai->last_reference[ lane ]= fd_uint_if( ai->last_reference[ lane ]==ref_to_me, e[1], ai->last_reference[ lane ] );
    1233     1724565 :         }
    1234             : 
    1235             :         /* Potentially transition from ACTIVE -> CACHED */
    1236     3951159 :         if( FD_UNLIKELY( (ai->last_reference[ 0 ]==0U)&(ai->last_reference[ 1 ]==0U)&
    1237     3951159 :                          (ai->last_reference[ 2 ]==0U)&(ai->last_reference[ 3 ]==0U) ) ) {
    1238     2226594 :           ai->flags = 0;
    1239     2226594 :           acct_map_idx_remove_fast( disp->acct_map,        acct_idx, disp->acct_pool );
    1240     2226594 :           acct_map_idx_insert     ( disp->free_acct_map,   acct_idx, disp->acct_pool );
    1241     2226594 :           free_dlist_idx_push_tail( disp->free_acct_dlist, acct_idx, disp->acct_pool );
    1242     2226594 :         }
    1243     3951159 :       } else {
    1244     2741232 :         int child_is_writer;
    1245             : 
    1246     2741232 :         edge_t next_e = e0;
    1247     2741232 :         edge_t const * child_edge;
    1248     3352758 :         while( 1 ) {
    1249             :           /* This loop first traverses the me->child link, and then
    1250             :              traverses any sibling links.  For example, in the case that
    1251             :              we're completing node D above, the first child_txn is E,
    1252             :              and the second child_txn is F. */
    1253     3352758 :           /*            */ child_edge = FOLLOW_EDGE(     disp->pool, next_e, child_is_writer );
    1254     3352758 :           fd_rdisp_txn_t * child_txn  = FOLLOW_EDGE_TXN( disp->pool, next_e                  );
    1255     3352758 :           FD_TEST( child_txn->in_degree>0U );
    1256     3352758 :           if( FD_UNLIKELY( 0U==(--(child_txn->in_degree)) ) ) {
    1257      477828 :             pending_prq_ele_t temp[1] = {{ .score               = child_txn->score,
    1258      477828 :                                            .linear_block_number = child_txn->edge_cnt_etc>>16,
    1259      477828 :                                            .txn_idx             = (uint)(child_txn-disp->pool) }};
    1260      477828 :             pending_prq_insert( disp->lanes[ lane ].pending, temp );
    1261      477828 :           }
    1262     3352758 :           if( child_is_writer || child_edge[1]==e0 ) break;
    1263      611526 :           next_e = child_edge[1];
    1264      611526 :         }
    1265             :         /* In the case that the completed transaction is a reader, say B
    1266             :            or C above, it seems like we should need to remove it from
    1267             :            the doubly linked list, but we actually don't.  The times
    1268             :            that we need to read the sibling pointers are:
    1269             :             1. Completing the writer before a reader (e.g. completing A)
    1270             :             2. Completing a reader that's the last in the DAG (e.g.
    1271             :                completing E/F if G didn't exist)
    1272             :             3. Adding another reader to the same set of readers (e.g. if
    1273             :                G were added as a reader instead of a writer)
    1274             :             4. Adding a writer after a set of readers (e.g. adding G).
    1275             : 
    1276             :            Supposing that the completed transaction is a reader, since
    1277             :            we checked EDGE_IS_LAST( e0 ), we know that there is at least
    1278             :            one writer that follows this reader, e.g. D above.
    1279             : 
    1280             :            And so none of these reason can apply to this group of readers:
    1281             :             1. Completing B or C implies that A has already completed,
    1282             :                so it can't complete again.
    1283             :             2. We know that we're not in that case because we checked
    1284             :                EDGE_IS_LAST( e0 ), and if we're not last now, we cannot
    1285             :                become last later, because the growth happens in the
    1286             :                other direction.
    1287             :             3. Similarly, because we checked EDGE_IS_LAST, any future
    1288             :                additions of readers won't be to this group of readers.
    1289             :             4. Similarly, we know that there already is a writer that
    1290             :                follows this group of readers, so a later writer would
    1291             :                not read this set of readers.
    1292             : 
    1293             :            So then, we don't need to deal with the sibling edges.  The
    1294             :            fact that we only need to do it in one case almost calls into
    1295             :            question whether we need to maintain the whole circular
    1296             :            system in the first place, and whether we could get away with
    1297             :            a reference count or something instead, but it's important in
    1298             :            one critical case: suppose we add a bunch of readers, then
    1299             :            some of them complete, and then we add a writer (case 4
    1300             :            above).  We need to be able to enumerate the nodes in the DAG
    1301             :            that have not yet completed, and we need to be able to remove
    1302             :            them from that set in O(1).  Those two requirements don't
    1303             :            leave us with many alternatives besides a doubly linked list. */
    1304             : 
    1305     2741232 :         if( FD_UNLIKELY( EDGE_IS_LAST( *child_edge ) ) ) {
    1306             :           /* For example, either:
    1307             :              1. completing D when if G didn't exist
    1308             :              2. completing E or F with G in the DAG
    1309             : 
    1310             :              After completing this transaction, there's either 0 writers
    1311             :              left (case 1) or 1 writer left (case 2) in the DAG. and if
    1312             :              there is one, it is the last reference.
    1313             : 
    1314             :              Either way, we want to set ANY_WRITERS to
    1315             :              LAST_REF_WAS_WRITER. */
    1316     2130813 :           acct_info_t * ai = disp->acct_pool + (*child_edge & 0x7FFFFFFFU);
    1317     2130813 :           ulong flags = ai->flags;
    1318     2130813 :           flags &= ~(ulong)ACCT_INFO_FLAG_ANY_WRITERS( lane );
    1319     2130813 :           flags |= (flags & (ulong)ACCT_INFO_FLAG_LAST_REF_WAS_WRITE( lane ))<<1;
    1320     2130813 :           ai->flags = (uchar)flags;
    1321     2130813 :         }
    1322     2741232 :       }
    1323     6692391 :       edge_idx += fd_ulong_if( i<w_cnt, 1UL, 3UL );
    1324     6692391 :     }
    1325      581265 :     block_slist_ele_peek_head( disp->lanes[ lane ].block_ll, disp->block_pool )->completed_cnt++;
    1326      581265 :   }
    1327             :   /* For testing purposes, to make sure we don't read a completed
    1328             :      transaction, we can clobber the memory. */
    1329             :   /* memset( disp->pool+txn_idx, '\xCC', sizeof(fd_rdisp_txn_t) ); */
    1330      581283 :   pool_idx_release( disp->pool, txn_idx );
    1331      581283 : }
    1332             : 
    1333             : 
    1334             : ulong
    1335             : fd_rdisp_staging_lane_info( fd_rdisp_t           const * disp,
    1336          36 :                             fd_rdisp_staging_lane_info_t out_sched[ static 4 ] ) {
    1337         180 :   for( ulong i=0UL; i<4UL; i++ ) {
    1338         144 :     if( !(disp->free_lanes & (1<<i) ) ) {
    1339          51 :       block_slist_t const * sl = disp->lanes[ i ].block_ll;
    1340          51 :       out_sched[ i ].insert_ready_block   = block_slist_ele_peek_tail( sl, disp->block_pool )->block;
    1341          51 :       out_sched[ i ].schedule_ready_block = block_slist_ele_peek_head( sl, disp->block_pool )->block;
    1342          51 :     }
    1343         144 :   }
    1344          36 :   return 0xFUL & ~(ulong)disp->free_lanes;
    1345          36 : }
    1346             : 
    1347             : void
    1348      573513 : fd_rdisp_verify( fd_rdisp_t const * disp ) {
    1349      573513 :   ulong acct_depth  = disp->depth*MAX_ACCT_PER_TXN;
    1350      573513 :   ulong block_depth = disp->block_depth;
    1351      573513 :   FD_TEST( 0==acct_map_verify ( disp->acct_map,      acct_depth+1UL,  disp->acct_pool ) );
    1352      573513 :   FD_TEST( 0==acct_map_verify ( disp->free_acct_map, acct_depth+1UL,  disp->acct_pool ) );
    1353      573513 :   FD_TEST( 0==block_map_verify( disp->blockmap,     block_depth+1UL, disp->block_pool ) );
    1354      573513 : }
    1355             : 
    1356           6 : void * fd_rdisp_leave ( fd_rdisp_t * disp ) { return disp; }
    1357           6 : void * fd_rdisp_delete( void * mem        ) { return  mem; }

Generated by: LCOV version 1.14